[med-svn] [python-ruffus] 01/02: Imported Upstream version 2.5
Andreas Tille
tille at debian.org
Sun Feb 1 14:22:37 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository python-ruffus.
commit 8cbe49cfda3d4c00932e25b6bad53c2c9a7de4c4
Author: Andreas Tille <tille at debian.org>
Date: Sun Feb 1 15:16:19 2015 +0100
Imported Upstream version 2.5
---
CHANGES.TXT | 742 +
LICENSE.TXT | 17 +
PKG-INFO | 151 +
USAGE.TXT | 81 +
doc/Makefile | 184 +
doc/_build/doctrees/cheatsheet.doctree | Bin 0 -> 26861 bytes
doc/_build/doctrees/contents.doctree | Bin 0 -> 16669 bytes
doc/_build/doctrees/decorators/active_if.doctree | Bin 0 -> 17494 bytes
.../doctrees/decorators/check_if_uptodate.doctree | Bin 0 -> 14314 bytes
doc/_build/doctrees/decorators/collate.doctree | Bin 0 -> 42503 bytes
doc/_build/doctrees/decorators/collate_ex.doctree | Bin 0 -> 49722 bytes
.../doctrees/decorators/combinations.doctree | Bin 0 -> 32943 bytes
.../combinations_with_replacement.doctree | Bin 0 -> 33936 bytes
doc/_build/doctrees/decorators/decorators.doctree | Bin 0 -> 179554 bytes
doc/_build/doctrees/decorators/files.doctree | Bin 0 -> 38162 bytes
doc/_build/doctrees/decorators/files_ex.doctree | Bin 0 -> 16801 bytes
doc/_build/doctrees/decorators/files_re.doctree | Bin 0 -> 36533 bytes
doc/_build/doctrees/decorators/follows.doctree | Bin 0 -> 21832 bytes
doc/_build/doctrees/decorators/graphviz.doctree | Bin 0 -> 24630 bytes
.../doctrees/decorators/indicator_objects.doctree | Bin 0 -> 106157 bytes
doc/_build/doctrees/decorators/jobs_limit.doctree | Bin 0 -> 17518 bytes
doc/_build/doctrees/decorators/merge.doctree | Bin 0 -> 19164 bytes
doc/_build/doctrees/decorators/mkdir.doctree | Bin 0 -> 56201 bytes
doc/_build/doctrees/decorators/originate.doctree | Bin 0 -> 17280 bytes
doc/_build/doctrees/decorators/parallel.doctree | Bin 0 -> 18153 bytes
.../doctrees/decorators/permutations.doctree | Bin 0 -> 31643 bytes
doc/_build/doctrees/decorators/posttask.doctree | Bin 0 -> 17303 bytes
doc/_build/doctrees/decorators/product.doctree | Bin 0 -> 40049 bytes
doc/_build/doctrees/decorators/split.doctree | Bin 0 -> 23865 bytes
doc/_build/doctrees/decorators/subdivide.doctree | Bin 0 -> 52137 bytes
doc/_build/doctrees/decorators/transform.doctree | Bin 0 -> 51924 bytes
.../doctrees/decorators/transform_ex.doctree | Bin 0 -> 62008 bytes
doc/_build/doctrees/design.doctree | Bin 0 -> 87531 bytes
.../doctrees/drmaa_wrapper_functions.doctree | Bin 0 -> 53028 bytes
doc/_build/doctrees/environment.pickle | Bin 0 -> 1958126 bytes
.../doctrees/examples/bioinformatics/index.doctree | Bin 0 -> 50804 bytes
.../examples/bioinformatics/part1_code.doctree | Bin 0 -> 7295 bytes
.../doctrees/examples/bioinformatics/part2.doctree | Bin 0 -> 30352 bytes
.../examples/bioinformatics/part2_code.doctree | Bin 0 -> 22167 bytes
.../doctrees/examples/paired_end_data.py.doctree | Bin 0 -> 15954 bytes
doc/_build/doctrees/faq.doctree | Bin 0 -> 148069 bytes
doc/_build/doctrees/gallery.doctree | Bin 0 -> 12964 bytes
doc/_build/doctrees/glossary.doctree | Bin 0 -> 16207 bytes
doc/_build/doctrees/history.doctree | Bin 0 -> 174722 bytes
doc/_build/doctrees/implementation_notes.doctree | Bin 0 -> 92811 bytes
doc/_build/doctrees/installation.doctree | Bin 0 -> 16496 bytes
doc/_build/doctrees/pipeline_functions.doctree | Bin 0 -> 227564 bytes
doc/_build/doctrees/proxy_logger.doctree | Bin 0 -> 44070 bytes
doc/_build/doctrees/recipes.doctree | Bin 0 -> 3529 bytes
.../doctrees/refactoring_ruffus_notes.doctree | Bin 0 -> 2680 bytes
doc/_build/doctrees/task.doctree | Bin 0 -> 95778 bytes
doc/_build/doctrees/todo.doctree | Bin 0 -> 101075 bytes
.../tutorials/manual/advanced_transform.doctree | Bin 0 -> 60048 bytes
.../tutorials/manual/check_if_uptodate.doctree | Bin 0 -> 25728 bytes
.../doctrees/tutorials/manual/collate.doctree | Bin 0 -> 24757 bytes
.../doctrees/tutorials/manual/dependencies.doctree | Bin 0 -> 36904 bytes
.../tutorials/manual/dependencies_code.doctree | Bin 0 -> 9238 bytes
.../doctrees/tutorials/manual/exceptions.doctree | Bin 0 -> 23390 bytes
doc/_build/doctrees/tutorials/manual/files.doctree | Bin 0 -> 46234 bytes
.../doctrees/tutorials/manual/files_re.doctree | Bin 0 -> 31784 bytes
.../doctrees/tutorials/manual/follows.doctree | Bin 0 -> 41258 bytes
.../doctrees/tutorials/manual/jobs_limit.doctree | Bin 0 -> 23537 bytes
.../doctrees/tutorials/manual/logging.doctree | Bin 0 -> 41942 bytes
.../doctrees/tutorials/manual/logging_code.doctree | Bin 0 -> 16356 bytes
.../doctrees/tutorials/manual/manual_code.doctree | Bin 0 -> 4810 bytes
.../tutorials/manual/manual_contents.doctree | Bin 0 -> 15027 bytes
.../tutorials/manual/manual_introduction.doctree | Bin 0 -> 37475 bytes
doc/_build/doctrees/tutorials/manual/merge.doctree | Bin 0 -> 34249 bytes
.../doctrees/tutorials/manual/onthefly.doctree | Bin 0 -> 31178 bytes
.../tutorials/manual/onthefly_code.doctree | Bin 0 -> 29632 bytes
.../doctrees/tutorials/manual/parallel.doctree | Bin 0 -> 22117 bytes
.../tutorials/manual/parallel_processing.doctree | Bin 0 -> 22097 bytes
.../doctrees/tutorials/manual/posttask.doctree | Bin 0 -> 28855 bytes
doc/_build/doctrees/tutorials/manual/split.doctree | Bin 0 -> 48227 bytes
.../manual/tasks_and_globs_in_inputs.doctree | Bin 0 -> 56335 bytes
.../tutorials/manual/tasks_as_recipes.doctree | Bin 0 -> 50452 bytes
.../manual/tracing_pipeline_parameters.doctree | Bin 0 -> 30041 bytes
.../doctrees/tutorials/manual/transform.doctree | Bin 0 -> 47369 bytes
.../tutorials/manual/transform_code.doctree | Bin 0 -> 10794 bytes
.../tutorials/new_tutorial/active_if.doctree | Bin 0 -> 31655 bytes
.../new_tutorial/check_if_uptodate.doctree | Bin 0 -> 29707 bytes
.../tutorials/new_tutorial/checkpointing.doctree | Bin 0 -> 80834 bytes
.../new_tutorial/checkpointing_code.doctree | Bin 0 -> 17835 bytes
.../tutorials/new_tutorial/combinatorics.doctree | Bin 0 -> 81474 bytes
.../new_tutorial/combinatorics_code.doctree | Bin 0 -> 40931 bytes
.../tutorials/new_tutorial/command_line.doctree | Bin 0 -> 56086 bytes
.../new_tutorial/decorators_compendium.doctree | Bin 0 -> 79327 bytes
.../tutorials/new_tutorial/dependencies.doctree | Bin 0 -> 34782 bytes
.../new_tutorial/deprecated_files.doctree | Bin 0 -> 52457 bytes
.../new_tutorial/deprecated_files_re.doctree | Bin 0 -> 36069 bytes
.../tutorials/new_tutorial/exceptions.doctree | Bin 0 -> 36913 bytes
.../new_tutorial/flowchart_colours.doctree | Bin 0 -> 27855 bytes
.../new_tutorial/flowchart_colours_code.doctree | Bin 0 -> 40341 bytes
.../doctrees/tutorials/new_tutorial/inputs.doctree | Bin 0 -> 54503 bytes
.../tutorials/new_tutorial/inputs_code.doctree | Bin 0 -> 41199 bytes
.../tutorials/new_tutorial/introduction.doctree | Bin 0 -> 81008 bytes
.../new_tutorial/introduction_code.doctree | Bin 0 -> 23035 bytes
.../new_tutorial/list_of_ruffus_names.doctree | Bin 0 -> 55484 bytes
.../tutorials/new_tutorial/logging.doctree | Bin 0 -> 53221 bytes
.../tutorials/new_tutorial/logging_code.doctree | Bin 0 -> 19233 bytes
.../tutorials/new_tutorial/manual_contents.doctree | Bin 0 -> 53551 bytes
.../doctrees/tutorials/new_tutorial/merge.doctree | Bin 0 -> 37820 bytes
.../tutorials/new_tutorial/merge_code.doctree | Bin 0 -> 26126 bytes
.../doctrees/tutorials/new_tutorial/mkdir.doctree | Bin 0 -> 39407 bytes
.../tutorials/new_tutorial/mkdir_code.doctree | Bin 0 -> 28438 bytes
.../tutorials/new_tutorial/multiprocessing.doctree | Bin 0 -> 78099 bytes
.../new_tutorial/multiprocessing_code.doctree | Bin 0 -> 32920 bytes
.../tutorials/new_tutorial/onthefly.doctree | Bin 0 -> 42962 bytes
.../tutorials/new_tutorial/onthefly_code.doctree | Bin 0 -> 42438 bytes
.../tutorials/new_tutorial/originate.doctree | Bin 0 -> 26038 bytes
.../tutorials/new_tutorial/originate_code.doctree | Bin 0 -> 22062 bytes
.../new_tutorial/output_file_names.doctree | Bin 0 -> 127227 bytes
.../new_tutorial/output_file_names_code.doctree | Bin 0 -> 40188 bytes
.../tutorials/new_tutorial/parallel.doctree | Bin 0 -> 25669 bytes
.../new_tutorial/pipeline_printout.doctree | Bin 0 -> 56964 bytes
.../new_tutorial/pipeline_printout_code.doctree | Bin 0 -> 33261 bytes
.../new_tutorial/pipeline_printout_graph.doctree | Bin 0 -> 40189 bytes
.../pipeline_printout_graph_code.doctree | Bin 0 -> 26425 bytes
.../tutorials/new_tutorial/posttask.doctree | Bin 0 -> 33812 bytes
.../doctrees/tutorials/new_tutorial/split.doctree | Bin 0 -> 55538 bytes
.../tutorials/new_tutorial/split_code.doctree | Bin 0 -> 23910 bytes
.../new_tutorial/subdivide_collate.doctree | Bin 0 -> 50447 bytes
.../new_tutorial/subdivide_collate_code.doctree | Bin 0 -> 32043 bytes
.../tutorials/new_tutorial/transform.doctree | Bin 0 -> 56788 bytes
.../tutorials/new_tutorial/transform_code.doctree | Bin 0 -> 23719 bytes
.../new_tutorial/transform_in_parallel.doctree | Bin 0 -> 84548 bytes
.../transform_in_parallel_code.doctree | Bin 0 -> 47182 bytes
.../simple_tutorial/simple_tutorial.doctree | Bin 0 -> 15754 bytes
.../simple_tutorial/simple_tutorial_code.doctree | Bin 0 -> 9897 bytes
.../simple_tutorial/step1_follows.doctree | Bin 0 -> 53784 bytes
.../tutorials/simple_tutorial/step2.doctree | Bin 0 -> 89864 bytes
.../tutorials/simple_tutorial/step2_code.doctree | Bin 0 -> 10023 bytes
.../simple_tutorial/step3_run_pipeline.doctree | Bin 0 -> 22943 bytes
.../step3_run_pipeline_code.doctree | Bin 0 -> 13370 bytes
.../step4_run_pipeline_graphically.doctree | Bin 0 -> 15055 bytes
.../step4_run_pipeline_graphically_code.doctree | Bin 0 -> 13318 bytes
.../tutorials/simple_tutorial/step5_split.doctree | Bin 0 -> 27330 bytes
.../simple_tutorial/step5_split_code.doctree | Bin 0 -> 10589 bytes
.../simple_tutorial/step6_transform.doctree | Bin 0 -> 19728 bytes
.../simple_tutorial/step6_transform_code.doctree | Bin 0 -> 12263 bytes
.../tutorials/simple_tutorial/step7_merge.doctree | Bin 0 -> 17279 bytes
.../simple_tutorial/step7_merge_code.doctree | Bin 0 -> 14635 bytes
.../simple_tutorial/step8_posttask.doctree | Bin 0 -> 24607 bytes
.../simple_tutorial/step8_posttask_code.doctree | Bin 0 -> 16609 bytes
doc/_build/doctrees/why_ruffus.doctree | Bin 0 -> 13933 bytes
doc/_build/html/.buildinfo | 4 +
doc/_build/html/BingSiteAuth.xml | 4 +
.../html/_downloads/flowchart_colour_schemes.svg | 895 +
.../html/_downloads/gallery_big_pipeline.svg | 2699 +++
doc/_build/html/_downloads/gallery_dless.svg | 197 +
doc/_build/html/_downloads/gallery_rna_seq.svg | 672 +
.../html/_downloads/gallery_snp_annotation.svg | 470 +
.../gallery_snp_annotation_consequences.svg | 471 +
doc/_build/html/_downloads/play_with_colours.py | 268 +
doc/_build/html/_downloads/ruffus.pdf | Bin 0 -> 4513083 bytes
doc/_build/html/_images/bestiary_combinatorics.png | Bin 0 -> 185818 bytes
doc/_build/html/_images/bestiary_decorators.png | Bin 0 -> 246675 bytes
doc/_build/html/_images/bestiary_transform.png | Bin 0 -> 105988 bytes
.../html/_images/examples_bioinformatics_error.png | Bin 0 -> 66668 bytes
.../html/_images/examples_bioinformatics_merge.jpg | Bin 0 -> 51653 bytes
.../_images/examples_bioinformatics_pipeline.jpg | Bin 0 -> 53868 bytes
.../html/_images/examples_bioinformatics_split.jpg | Bin 0 -> 62009 bytes
.../_images/examples_bioinformatics_transform.jpg | Bin 0 -> 53189 bytes
.../html/_images/flowchart_colour_schemes.png | Bin 0 -> 107483 bytes
doc/_build/html/_images/front_page_flowchart.png | Bin 0 -> 24437 bytes
doc/_build/html/_images/gallery_big_pipeline.png | Bin 0 -> 1010747 bytes
doc/_build/html/_images/gallery_dless.png | Bin 0 -> 110281 bytes
doc/_build/html/_images/gallery_rna_seq.png | Bin 0 -> 128916 bytes
doc/_build/html/_images/gallery_snp_annotation.png | Bin 0 -> 133568 bytes
.../gallery_snp_annotation_consequences.png | Bin 0 -> 136378 bytes
doc/_build/html/_images/history_html_flowchart.png | Bin 0 -> 126287 bytes
doc/_build/html/_images/history_html_flowchart.svg | 269 +
.../html/_images/history_html_flowchart1.png | Bin 0 -> 126287 bytes
.../html/_images/history_html_flowchart2.png | Bin 0 -> 126287 bytes
doc/_build/html/_images/jobs_limit.png | Bin 0 -> 236324 bytes
doc/_build/html/_images/jobs_limit2.png | Bin 0 -> 216405 bytes
doc/_build/html/_images/logo.jpg | Bin 0 -> 86526 bytes
.../_images/manual_dependencies_flowchart1.png | Bin 0 -> 14540 bytes
.../_images/manual_dependencies_flowchart2.png | Bin 0 -> 16504 bytes
.../_images/manual_dependencies_flowchart3.png | Bin 0 -> 20983 bytes
.../manual_dependencies_flowchart_intro.png | Bin 0 -> 30945 bytes
.../manual_dependencies_flowchart_intro1.png | Bin 0 -> 30945 bytes
doc/_build/html/_images/manual_exceptions.png | Bin 0 -> 34800 bytes
doc/_build/html/_images/manual_follows1.png | Bin 0 -> 10161 bytes
.../html/_images/manual_split_merge_example.jpg | Bin 0 -> 60508 bytes
doc/_build/html/_images/manual_transform.png | Bin 0 -> 29327 bytes
.../_images/manual_transform_complex_outputs.png | Bin 0 -> 18713 bytes
doc/_build/html/_images/pretty_flowchart.png | Bin 0 -> 23259 bytes
.../_images/simple_tutorial_complex_flowchart.png | Bin 0 -> 78472 bytes
.../_images/simple_tutorial_complex_flowchart1.png | Bin 0 -> 78472 bytes
.../simple_tutorial_complex_flowchart_error.png | Bin 0 -> 64334 bytes
.../simple_tutorial_complex_flowchart_error1.png | Bin 0 -> 64334 bytes
.../_images/simple_tutorial_decorator_syntax.png | Bin 0 -> 13200 bytes
.../html/_images/simple_tutorial_hello_world.png | Bin 0 -> 7447 bytes
.../_images/simple_tutorial_hello_world_output.png | Bin 0 -> 9761 bytes
.../html/_images/simple_tutorial_intro_follows.png | Bin 0 -> 26312 bytes
doc/_build/html/_images/simple_tutorial_merge1.png | Bin 0 -> 29639 bytes
doc/_build/html/_images/simple_tutorial_merge2.png | Bin 0 -> 10077 bytes
.../_images/simple_tutorial_pipeline_printout1.png | Bin 0 -> 32970 bytes
.../simple_tutorial_pipeline_printout11.png | Bin 0 -> 32970 bytes
.../_images/simple_tutorial_pipeline_printout2.png | Bin 0 -> 108504 bytes
.../simple_tutorial_pipeline_printout21.png | Bin 0 -> 108504 bytes
.../_images/simple_tutorial_pipeline_printout3.png | Bin 0 -> 114387 bytes
.../simple_tutorial_pipeline_printout31.png | Bin 0 -> 114387 bytes
.../html/_images/simple_tutorial_posttask.png | Bin 0 -> 23863 bytes
doc/_build/html/_images/simple_tutorial_split.png | Bin 0 -> 30237 bytes
doc/_build/html/_images/simple_tutorial_split1.png | Bin 0 -> 30237 bytes
.../html/_images/simple_tutorial_stage4_after.png | Bin 0 -> 16750 bytes
.../html/_images/simple_tutorial_stage4_before.png | Bin 0 -> 13502 bytes
.../html/_images/simple_tutorial_stage5_after.png | Bin 0 -> 16256 bytes
.../html/_images/simple_tutorial_stage5_before.png | Bin 0 -> 16498 bytes
.../_images/simple_tutorial_stage5_flowchart.png | Bin 0 -> 26034 bytes
doc/_build/html/_images/simple_tutorial_step4.png | Bin 0 -> 8271 bytes
doc/_build/html/_images/simple_tutorial_step5.png | Bin 0 -> 31481 bytes
.../_images/simple_tutorial_step5_sans_key.png | Bin 0 -> 25436 bytes
.../html/_images/simple_tutorial_transform.png | Bin 0 -> 39019 bytes
.../html/_images/simple_tutorial_transform1.png | Bin 0 -> 39019 bytes
...mple_tutorial_zoo_animals_formatter_example.jpg | Bin 0 -> 115697 bytes
.../_images/theoretical_pipeline_schematic.png | Bin 0 -> 13895 bytes
.../html/_images/transform_1_to_1_example.png | Bin 0 -> 108323 bytes
doc/_build/html/_images/tutorial_key.jpg | Bin 0 -> 76358 bytes
doc/_build/html/_images/tutorial_key.png | Bin 0 -> 48636 bytes
doc/_build/html/_images/tutorial_ruffus_files.jpg | Bin 0 -> 485002 bytes
.../_images/tutorial_step1_decorator_syntax.png | Bin 0 -> 11204 bytes
doc/_build/html/_images/wikimedia_bandedkrait.jpg | Bin 0 -> 170650 bytes
doc/_build/html/_images/wikimedia_cyl_ruffus.jpg | Bin 0 -> 104896 bytes
doc/_build/html/_modules/index.html | 163 +
doc/_build/html/_modules/ruffus/proxy_logger.html | 560 +
doc/_build/html/_modules/ruffus/task.html | 3953 ++++
doc/_build/html/_sources/cheatsheet.txt | 85 +
doc/_build/html/_sources/contents.txt | 197 +
doc/_build/html/_sources/decorators/active_if.txt | 110 +
.../html/_sources/decorators/check_if_uptodate.txt | 68 +
doc/_build/html/_sources/decorators/collate.txt | 154 +
doc/_build/html/_sources/decorators/collate_ex.txt | 145 +
.../html/_sources/decorators/combinations.txt | 153 +
.../decorators/combinations_with_replacement.txt | 157 +
doc/_build/html/_sources/decorators/decorators.txt | 296 +
doc/_build/html/_sources/decorators/files.txt | 155 +
doc/_build/html/_sources/decorators/files_ex.txt | 77 +
doc/_build/html/_sources/decorators/files_re.txt | 130 +
doc/_build/html/_sources/decorators/follows.txt | 82 +
doc/_build/html/_sources/decorators/graphviz.txt | 92 +
.../html/_sources/decorators/indicator_objects.txt | 547 +
doc/_build/html/_sources/decorators/jobs_limit.txt | 73 +
doc/_build/html/_sources/decorators/merge.txt | 64 +
doc/_build/html/_sources/decorators/mkdir.txt | 220 +
doc/_build/html/_sources/decorators/originate.txt | 79 +
doc/_build/html/_sources/decorators/parallel.txt | 81 +
.../html/_sources/decorators/permutations.txt | 158 +
doc/_build/html/_sources/decorators/posttask.txt | 70 +
doc/_build/html/_sources/decorators/product.txt | 192 +
doc/_build/html/_sources/decorators/split.txt | 92 +
doc/_build/html/_sources/decorators/subdivide.txt | 189 +
doc/_build/html/_sources/decorators/transform.txt | 176 +
.../html/_sources/decorators/transform_ex.txt | 190 +
doc/_build/html/_sources/design.txt | 304 +
.../html/_sources/drmaa_wrapper_functions.txt | 234 +
.../_sources/examples/bioinformatics/index.txt | 290 +
.../examples/bioinformatics/part1_code.txt | 70 +
.../_sources/examples/bioinformatics/part2.txt | 152 +
.../examples/bioinformatics/part2_code.txt | 267 +
.../html/_sources/examples/paired_end_data.py.txt | 122 +
doc/_build/html/_sources/faq.txt | 980 +
doc/_build/html/_sources/gallery.txt | 63 +
doc/_build/html/_sources/glossary.txt | 81 +
doc/_build/html/_sources/history.txt | 733 +
doc/_build/html/_sources/implementation_notes.txt | 437 +
doc/_build/html/_sources/installation.txt | 79 +
doc/_build/html/_sources/pipeline_functions.txt | 689 +
doc/_build/html/_sources/proxy_logger.txt | 24 +
doc/_build/html/_sources/recipes.txt | 9 +
.../html/_sources/refactoring_ruffus_notes.txt | 5 +
doc/_build/html/_sources/task.txt | 138 +
doc/_build/html/_sources/todo.txt | 500 +
.../tutorials/manual/advanced_transform.txt | 347 +
.../tutorials/manual/check_if_uptodate.txt | 89 +
.../html/_sources/tutorials/manual/collate.txt | 95 +
.../_sources/tutorials/manual/dependencies.txt | 158 +
.../tutorials/manual/dependencies_code.txt | 97 +
.../html/_sources/tutorials/manual/exceptions.txt | 82 +
.../html/_sources/tutorials/manual/files.txt | 218 +
.../html/_sources/tutorials/manual/files_re.txt | 135 +
.../html/_sources/tutorials/manual/follows.txt | 193 +
.../html/_sources/tutorials/manual/jobs_limit.txt | 80 +
.../html/_sources/tutorials/manual/logging.txt | 185 +
.../_sources/tutorials/manual/logging_code.txt | 187 +
.../html/_sources/tutorials/manual/manual_code.txt | 12 +
.../_sources/tutorials/manual/manual_contents.txt | 30 +
.../tutorials/manual/manual_introduction.txt | 224 +
.../html/_sources/tutorials/manual/merge.txt | 102 +
.../html/_sources/tutorials/manual/onthefly.txt | 138 +
.../_sources/tutorials/manual/onthefly_code.txt | 320 +
.../html/_sources/tutorials/manual/parallel.txt | 65 +
.../tutorials/manual/parallel_processing.txt | 52 +
.../html/_sources/tutorials/manual/posttask.txt | 110 +
.../html/_sources/tutorials/manual/split.txt | 177 +
.../tutorials/manual/tasks_and_globs_in_inputs.txt | 265 +
.../_sources/tutorials/manual/tasks_as_recipes.txt | 192 +
.../manual/tracing_pipeline_parameters.txt | 115 +
.../html/_sources/tutorials/manual/transform.txt | 194 +
.../_sources/tutorials/manual/transform_code.txt | 68 +
.../_sources/tutorials/new_tutorial/active_if.txt | 149 +
.../tutorials/new_tutorial/check_if_uptodate.txt | 89 +
.../tutorials/new_tutorial/checkpointing.txt | 400 +
.../tutorials/new_tutorial/checkpointing_code.txt | 23 +
.../tutorials/new_tutorial/combinatorics.txt | 442 +
.../tutorials/new_tutorial/combinatorics_code.txt | 308 +
.../tutorials/new_tutorial/command_line.txt | 352 +
.../new_tutorial/decorators_compendium.txt | 154 +
.../tutorials/new_tutorial/dependencies.txt | 110 +
.../tutorials/new_tutorial/deprecated_files.txt | 238 +
.../tutorials/new_tutorial/deprecated_files_re.txt | 145 +
.../_sources/tutorials/new_tutorial/exceptions.txt | 191 +
.../tutorials/new_tutorial/flowchart_colours.txt | 61 +
.../new_tutorial/flowchart_colours_code.txt | 288 +
.../_sources/tutorials/new_tutorial/inputs.txt | 239 +
.../tutorials/new_tutorial/inputs_code.txt | 229 +
.../tutorials/new_tutorial/introduction.txt | 399 +
.../tutorials/new_tutorial/introduction_code.txt | 94 +
.../new_tutorial/list_of_ruffus_names.txt | 77 +
.../_sources/tutorials/new_tutorial/logging.txt | 221 +
.../tutorials/new_tutorial/logging_code.txt | 55 +
.../tutorials/new_tutorial/manual_contents.txt | 64 +
.../html/_sources/tutorials/new_tutorial/merge.txt | 140 +
.../_sources/tutorials/new_tutorial/merge_code.txt | 147 +
.../html/_sources/tutorials/new_tutorial/mkdir.txt | 152 +
.../_sources/tutorials/new_tutorial/mkdir_code.txt | 113 +
.../tutorials/new_tutorial/multiprocessing.txt | 293 +
.../new_tutorial/multiprocessing_code.txt | 183 +
.../_sources/tutorials/new_tutorial/onthefly.txt | 192 +
.../tutorials/new_tutorial/onthefly_code.txt | 328 +
.../_sources/tutorials/new_tutorial/originate.txt | 92 +
.../tutorials/new_tutorial/originate_code.txt | 71 +
.../tutorials/new_tutorial/output_file_names.txt | 560 +
.../new_tutorial/output_file_names_code.txt | 248 +
.../_sources/tutorials/new_tutorial/parallel.txt | 63 +
.../tutorials/new_tutorial/pipeline_printout.txt | 215 +
.../new_tutorial/pipeline_printout_code.txt | 203 +
.../new_tutorial/pipeline_printout_graph.txt | 170 +
.../new_tutorial/pipeline_printout_graph_code.txt | 109 +
.../_sources/tutorials/new_tutorial/posttask.txt | 122 +
.../html/_sources/tutorials/new_tutorial/split.txt | 233 +
.../_sources/tutorials/new_tutorial/split_code.txt | 115 +
.../tutorials/new_tutorial/subdivide_collate.txt | 234 +
.../new_tutorial/subdivide_collate_code.txt | 155 +
.../_sources/tutorials/new_tutorial/transform.txt | 194 +
.../tutorials/new_tutorial/transform_code.txt | 99 +
.../new_tutorial/transform_in_parallel.txt | 394 +
.../new_tutorial/transform_in_parallel_code.txt | 366 +
.../tutorials/simple_tutorial/simple_tutorial.txt | 71 +
.../simple_tutorial/simple_tutorial_code.txt | 34 +
.../tutorials/simple_tutorial/step1_follows.txt | 258 +
.../_sources/tutorials/simple_tutorial/step2.txt | 517 +
.../tutorials/simple_tutorial/step2_code.txt | 72 +
.../simple_tutorial/step3_run_pipeline.txt | 161 +
.../simple_tutorial/step3_run_pipeline_code.txt | 112 +
.../step4_run_pipeline_graphically.txt | 71 +
.../step4_run_pipeline_graphically_code.txt | 104 +
.../tutorials/simple_tutorial/step5_split.txt | 112 +
.../tutorials/simple_tutorial/step5_split_code.txt | 81 +
.../tutorials/simple_tutorial/step6_transform.txt | 89 +
.../simple_tutorial/step6_transform_code.txt | 104 +
.../tutorials/simple_tutorial/step7_merge.txt | 80 +
.../tutorials/simple_tutorial/step7_merge_code.txt | 138 +
.../tutorials/simple_tutorial/step8_posttask.txt | 119 +
.../simple_tutorial/step8_posttask_code.txt | 162 +
doc/_build/html/_sources/why_ruffus.txt | 37 +
doc/_build/html/_static/ajax-loader.gif | Bin 0 -> 673 bytes
doc/_build/html/_static/basic.css | 540 +
doc/_build/html/_static/comment-bright.png | Bin 0 -> 3500 bytes
doc/_build/html/_static/comment-close.png | Bin 0 -> 3578 bytes
doc/_build/html/_static/comment.png | Bin 0 -> 3445 bytes
doc/_build/html/_static/default.css | 256 +
doc/_build/html/_static/doctools.js | 235 +
doc/_build/html/_static/down-pressed.png | Bin 0 -> 368 bytes
doc/_build/html/_static/down.png | Bin 0 -> 363 bytes
.../_static/example_scripts/complicated_example.py | 527 +
.../example_scripts/intermediate_example.py | 313 +
.../_static/example_scripts/play_with_colours.py | 268 +
.../_static/example_scripts/ruffus_template.py | 270 +
doc/_build/html/_static/example_scripts/simpler.py | 260 +
doc/_build/html/_static/file.png | Bin 0 -> 392 bytes
doc/_build/html/_static/jquery.js | 4 +
doc/_build/html/_static/minus.png | Bin 0 -> 199 bytes
doc/_build/html/_static/plus.png | Bin 0 -> 199 bytes
doc/_build/html/_static/pygments.css | 62 +
doc/_build/html/_static/ruffus.css | 327 +
doc/_build/html/_static/ruffus.pdf | Bin 0 -> 4513083 bytes
doc/_build/html/_static/searchtools.js | 622 +
doc/_build/html/_static/sidebar.js | 159 +
doc/_build/html/_static/underscore.js | 31 +
doc/_build/html/_static/up-pressed.png | Bin 0 -> 372 bytes
doc/_build/html/_static/up.png | Bin 0 -> 363 bytes
doc/_build/html/_static/websupport.js | 808 +
doc/_build/html/cheatsheet.html | 283 +
doc/_build/html/contents.html | 691 +
doc/_build/html/decorators/active_if.html | 295 +
doc/_build/html/decorators/check_if_uptodate.html | 256 +
doc/_build/html/decorators/collate.html | 362 +
doc/_build/html/decorators/collate_ex.html | 363 +
doc/_build/html/decorators/combinations.html | 351 +
.../decorators/combinations_with_replacement.html | 355 +
doc/_build/html/decorators/decorators.html | 750 +
doc/_build/html/decorators/files.html | 364 +
doc/_build/html/decorators/files_ex.html | 264 +
doc/_build/html/decorators/files_re.html | 344 +
doc/_build/html/decorators/follows.html | 274 +
doc/_build/html/decorators/graphviz.html | 278 +
doc/_build/html/decorators/indicator_objects.html | 688 +
doc/_build/html/decorators/jobs_limit.html | 267 +
doc/_build/html/decorators/merge.html | 269 +
doc/_build/html/decorators/mkdir.html | 422 +
doc/_build/html/decorators/originate.html | 275 +
doc/_build/html/decorators/parallel.html | 271 +
doc/_build/html/decorators/permutations.html | 356 +
doc/_build/html/decorators/posttask.html | 262 +
doc/_build/html/decorators/product.html | 393 +
doc/_build/html/decorators/split.html | 295 +
doc/_build/html/decorators/subdivide.html | 402 +
doc/_build/html/decorators/transform.html | 381 +
doc/_build/html/decorators/transform_ex.html | 406 +
doc/_build/html/design.html | 503 +
doc/_build/html/drmaa_wrapper_functions.html | 407 +
doc/_build/html/examples/bioinformatics/index.html | 483 +
.../html/examples/bioinformatics/part1_code.html | 249 +
doc/_build/html/examples/bioinformatics/part2.html | 347 +
.../html/examples/bioinformatics/part2_code.html | 445 +
doc/_build/html/examples/paired_end_data.py.html | 305 +
doc/_build/html/faq.html | 1115 +
doc/_build/html/gallery.html | 236 +
doc/_build/html/genindex.html | 2200 ++
doc/_build/html/glossary.html | 247 +
doc/_build/html/googlef47546b8526ae0ed.html | 1 +
doc/_build/html/history.html | 1008 +
doc/_build/html/implementation_notes.html | 623 +
doc/_build/html/index.html | 313 +
doc/_build/html/installation.html | 265 +
doc/_build/html/objects.inv | Bin 0 -> 4994 bytes
doc/_build/html/pipeline_functions.html | 991 +
doc/_build/html/proxy_logger.html | 407 +
doc/_build/html/recipes.html | 199 +
doc/_build/html/refactoring_ruffus_notes.html | 187 +
doc/_build/html/search.html | 177 +
doc/_build/html/searchindex.js | 1 +
doc/_build/html/task.html | 516 +
doc/_build/html/todo.html | 674 +
.../html/tutorials/manual/advanced_transform.html | 517 +
.../html/tutorials/manual/check_if_uptodate.html | 269 +
doc/_build/html/tutorials/manual/collate.html | 269 +
doc/_build/html/tutorials/manual/dependencies.html | 335 +
.../html/tutorials/manual/dependencies_code.html | 289 +
doc/_build/html/tutorials/manual/exceptions.html | 255 +
doc/_build/html/tutorials/manual/files.html | 397 +
doc/_build/html/tutorials/manual/files_re.html | 311 +
doc/_build/html/tutorials/manual/follows.html | 363 +
doc/_build/html/tutorials/manual/jobs_limit.html | 262 +
doc/_build/html/tutorials/manual/logging.html | 375 +
doc/_build/html/tutorials/manual/logging_code.html | 376 +
doc/_build/html/tutorials/manual/manual_code.html | 193 +
.../html/tutorials/manual/manual_contents.html | 213 +
.../html/tutorials/manual/manual_introduction.html | 365 +
doc/_build/html/tutorials/manual/merge.html | 291 +
doc/_build/html/tutorials/manual/onthefly.html | 324 +
.../html/tutorials/manual/onthefly_code.html | 507 +
doc/_build/html/tutorials/manual/parallel.html | 248 +
.../html/tutorials/manual/parallel_processing.html | 239 +
doc/_build/html/tutorials/manual/posttask.html | 288 +
doc/_build/html/tutorials/manual/split.html | 352 +
.../manual/tasks_and_globs_in_inputs.html | 418 +
.../html/tutorials/manual/tasks_as_recipes.html | 353 +
.../manual/tracing_pipeline_parameters.html | 271 +
doc/_build/html/tutorials/manual/transform.html | 395 +
.../html/tutorials/manual/transform_code.html | 261 +
.../html/tutorials/new_tutorial/active_if.html | 330 +
.../tutorials/new_tutorial/check_if_uptodate.html | 267 +
.../html/tutorials/new_tutorial/checkpointing.html | 538 +
.../tutorials/new_tutorial/checkpointing_code.html | 211 +
.../html/tutorials/new_tutorial/combinatorics.html | 618 +
.../tutorials/new_tutorial/combinatorics_code.html | 488 +
.../html/tutorials/new_tutorial/command_line.html | 515 +
.../new_tutorial/decorators_compendium.html | 364 +
.../html/tutorials/new_tutorial/dependencies.html | 289 +
.../tutorials/new_tutorial/deprecated_files.html | 411 +
.../new_tutorial/deprecated_files_re.html | 319 +
.../html/tutorials/new_tutorial/exceptions.html | 359 +
.../tutorials/new_tutorial/flowchart_colours.html | 241 +
.../new_tutorial/flowchart_colours_code.html | 473 +
doc/_build/html/tutorials/new_tutorial/inputs.html | 426 +
.../html/tutorials/new_tutorial/inputs_code.html | 409 +
.../html/tutorials/new_tutorial/introduction.html | 554 +
.../tutorials/new_tutorial/introduction_code.html | 285 +
.../new_tutorial/list_of_ruffus_names.html | 288 +
.../html/tutorials/new_tutorial/logging.html | 383 +
.../html/tutorials/new_tutorial/logging_code.html | 243 +
.../tutorials/new_tutorial/manual_contents.html | 244 +
doc/_build/html/tutorials/new_tutorial/merge.html | 298 +
.../html/tutorials/new_tutorial/merge_code.html | 335 +
doc/_build/html/tutorials/new_tutorial/mkdir.html | 334 +
.../html/tutorials/new_tutorial/mkdir_code.html | 300 +
.../tutorials/new_tutorial/multiprocessing.html | 445 +
.../new_tutorial/multiprocessing_code.html | 365 +
.../html/tutorials/new_tutorial/onthefly.html | 363 +
.../html/tutorials/new_tutorial/onthefly_code.html | 514 +
.../html/tutorials/new_tutorial/originate.html | 265 +
.../tutorials/new_tutorial/originate_code.html | 260 +
.../tutorials/new_tutorial/output_file_names.html | 697 +
.../new_tutorial/output_file_names_code.html | 432 +
.../html/tutorials/new_tutorial/parallel.html | 246 +
.../tutorials/new_tutorial/pipeline_printout.html | 381 +
.../new_tutorial/pipeline_printout_code.html | 396 +
.../new_tutorial/pipeline_printout_graph.html | 341 +
.../new_tutorial/pipeline_printout_graph_code.html | 364 +
.../html/tutorials/new_tutorial/posttask.html | 299 +
doc/_build/html/tutorials/new_tutorial/split.html | 393 +
.../html/tutorials/new_tutorial/split_code.html | 303 +
.../tutorials/new_tutorial/subdivide_collate.html | 397 +
.../new_tutorial/subdivide_collate_code.html | 341 +
.../html/tutorials/new_tutorial/transform.html | 375 +
.../tutorials/new_tutorial/transform_code.html | 289 +
.../new_tutorial/transform_in_parallel.html | 530 +
.../new_tutorial/transform_in_parallel_code.html | 561 +
.../tutorials/simple_tutorial/simple_tutorial.html | 252 +
.../simple_tutorial/simple_tutorial_code.html | 213 +
.../tutorials/simple_tutorial/step1_follows.html | 395 +
.../html/tutorials/simple_tutorial/step2.html | 689 +
.../html/tutorials/simple_tutorial/step2_code.html | 264 +
.../simple_tutorial/step3_run_pipeline.html | 262 +
.../simple_tutorial/step3_run_pipeline_code.html | 305 +
.../step4_run_pipeline_graphically.html | 266 +
.../step4_run_pipeline_graphically_code.html | 302 +
.../tutorials/simple_tutorial/step5_split.html | 297 +
.../simple_tutorial/step5_split_code.html | 277 +
.../tutorials/simple_tutorial/step6_transform.html | 258 +
.../simple_tutorial/step6_transform_code.html | 297 +
.../tutorials/simple_tutorial/step7_merge.html | 244 +
.../simple_tutorial/step7_merge_code.html | 332 +
.../tutorials/simple_tutorial/step8_posttask.html | 292 +
.../simple_tutorial/step8_posttask_code.html | 343 +
doc/_build/html/why_ruffus.html | 221 +
doc/_build/latex/Makefile | 66 +
doc/_build/latex/bestiary_combinatorics.png | Bin 0 -> 185818 bytes
doc/_build/latex/bestiary_decorators.png | Bin 0 -> 246675 bytes
doc/_build/latex/bestiary_transform.png | Bin 0 -> 105988 bytes
doc/_build/latex/examples_bioinformatics_error.png | Bin 0 -> 66668 bytes
doc/_build/latex/examples_bioinformatics_merge.jpg | Bin 0 -> 51653 bytes
.../latex/examples_bioinformatics_pipeline.jpg | Bin 0 -> 53868 bytes
doc/_build/latex/examples_bioinformatics_split.jpg | Bin 0 -> 62009 bytes
.../latex/examples_bioinformatics_transform.jpg | Bin 0 -> 53189 bytes
doc/_build/latex/flowchart_colour_schemes.png | Bin 0 -> 107483 bytes
doc/_build/latex/fncychap.sty | 683 +
doc/_build/latex/front_page_flowchart.png | Bin 0 -> 24437 bytes
doc/_build/latex/gallery_big_pipeline.png | Bin 0 -> 1010747 bytes
doc/_build/latex/gallery_dless.png | Bin 0 -> 110281 bytes
doc/_build/latex/gallery_rna_seq.png | Bin 0 -> 128916 bytes
doc/_build/latex/gallery_snp_annotation.png | Bin 0 -> 133568 bytes
.../latex/gallery_snp_annotation_consequences.png | Bin 0 -> 136378 bytes
doc/_build/latex/history_html_flowchart.png | Bin 0 -> 126287 bytes
doc/_build/latex/history_html_flowchart1.png | Bin 0 -> 126287 bytes
doc/_build/latex/history_html_flowchart2.png | Bin 0 -> 126287 bytes
doc/_build/latex/jobs_limit.png | Bin 0 -> 236324 bytes
doc/_build/latex/logo.jpg | Bin 0 -> 86526 bytes
.../latex/manual_dependencies_flowchart_intro.png | Bin 0 -> 30945 bytes
.../latex/manual_dependencies_flowchart_intro1.png | Bin 0 -> 30945 bytes
doc/_build/latex/manual_split_merge_example.jpg | Bin 0 -> 60508 bytes
doc/_build/latex/pretty_flowchart.png | Bin 0 -> 23259 bytes
doc/_build/latex/python.ist | 11 +
doc/_build/latex/ruffus.aux | 1937 ++
doc/_build/latex/ruffus.idx | 270 +
doc/_build/latex/ruffus.log | 3488 +++
doc/_build/latex/ruffus.out | 77 +
doc/_build/latex/ruffus.pdf | Bin 0 -> 4513083 bytes
doc/_build/latex/ruffus.tex | 21827 +++++++++++++++++++
doc/_build/latex/ruffus.toc | 544 +
.../latex/simple_tutorial_complex_flowchart.png | Bin 0 -> 78472 bytes
.../simple_tutorial_complex_flowchart_error.png | Bin 0 -> 64334 bytes
doc/_build/latex/simple_tutorial_stage5_after.png | Bin 0 -> 16256 bytes
doc/_build/latex/simple_tutorial_stage5_before.png | Bin 0 -> 16498 bytes
.../latex/simple_tutorial_stage5_flowchart.png | Bin 0 -> 26034 bytes
...mple_tutorial_zoo_animals_formatter_example.jpg | Bin 0 -> 115697 bytes
doc/_build/latex/sphinx.sty | 520 +
doc/_build/latex/sphinxhowto.cls | 104 +
doc/_build/latex/sphinxmanual.cls | 147 +
doc/_build/latex/tabulary.sty | 452 +
.../latex/theoretical_pipeline_schematic.png | Bin 0 -> 13895 bytes
doc/_build/latex/transform_1_to_1_example.png | Bin 0 -> 108323 bytes
doc/_build/latex/tutorial_key.png | Bin 0 -> 48636 bytes
doc/_build/latex/tutorial_ruffus_files.jpg | Bin 0 -> 485002 bytes
.../latex/tutorial_step1_decorator_syntax.png | Bin 0 -> 11204 bytes
doc/_build/latex/wikimedia_bandedkrait.jpg | Bin 0 -> 170650 bytes
doc/_build/latex/wikimedia_cyl_ruffus.jpg | Bin 0 -> 104896 bytes
doc/_templates/index.html | 160 +
doc/_templates/layout.html | 76 +
doc/cheatsheet.rst | 85 +
doc/complex_dags/dot/all.dot | 83 +
doc/complex_dags/dot/non_dag.dot | 86 +
doc/complex_dags/dot/task17.dot | 83 +
doc/complex_dags/dot/task17_from_task9.dot | 83 +
doc/complex_dags/dot/task25_from_task9.dot | 83 +
doc/complex_dags/jpg/all.jpg | Bin 0 -> 84488 bytes
doc/complex_dags/jpg/all_sm.jpg | Bin 0 -> 7617 bytes
doc/complex_dags/jpg/non_dag.jpg | Bin 0 -> 90676 bytes
doc/complex_dags/jpg/non_dag_sm.jpg | Bin 0 -> 8469 bytes
doc/complex_dags/jpg/task17.jpg | Bin 0 -> 78341 bytes
doc/complex_dags/jpg/task17_from_task9.jpg | Bin 0 -> 79455 bytes
doc/complex_dags/jpg/task17_from_task9_sm.jpg | Bin 0 -> 6895 bytes
doc/complex_dags/jpg/task17_sm.jpg | Bin 0 -> 6630 bytes
doc/complex_dags/jpg/task25_from_task9.jpg | Bin 0 -> 85116 bytes
doc/complex_dags/jpg/task25_from_task9_sm.jpg | Bin 0 -> 7597 bytes
doc/complex_dags/png/all.png | Bin 0 -> 126202 bytes
doc/complex_dags/png/non_dag.png | Bin 0 -> 115041 bytes
doc/complex_dags/png/task17.png | Bin 0 -> 138379 bytes
doc/complex_dags/png/task17_from_task9.png | Bin 0 -> 139735 bytes
doc/complex_dags/png/task25_from_task9.png | Bin 0 -> 129033 bytes
doc/complex_dags/svg/all.svg | 373 +
doc/complex_dags/svg/non_dag.svg | 380 +
doc/complex_dags/svg/task17.svg | 378 +
doc/complex_dags/svg/task17_from_task9.svg | 381 +
doc/complex_dags/svg/task25_from_task9.svg | 376 +
doc/conf.py | 329 +
doc/contents.rst | 197 +
doc/decorators/active_if.rst | 110 +
doc/decorators/check_if_uptodate.rst | 68 +
doc/decorators/collate.rst | 154 +
doc/decorators/collate_ex.rst | 145 +
doc/decorators/combinations.rst | 153 +
doc/decorators/combinations_with_replacement.rst | 157 +
doc/decorators/decorators.rst | 296 +
doc/decorators/files.rst | 155 +
doc/decorators/files_ex.rst | 77 +
doc/decorators/files_re.rst | 130 +
doc/decorators/follows.rst | 82 +
doc/decorators/graphviz.rst | 92 +
doc/decorators/indicator_objects.rst | 547 +
doc/decorators/jobs_limit.rst | 73 +
doc/decorators/merge.rst | 64 +
doc/decorators/mkdir.rst | 220 +
doc/decorators/originate.rst | 79 +
doc/decorators/parallel.rst | 81 +
doc/decorators/permutations.rst | 158 +
doc/decorators/posttask.rst | 70 +
doc/decorators/product.rst | 192 +
doc/decorators/split.rst | 92 +
doc/decorators/subdivide.rst | 189 +
doc/decorators/todo.sphinx | 40 +
doc/decorators/transform.rst | 176 +
doc/decorators/transform_ex.rst | 190 +
doc/design.rst | 304 +
doc/drmaa_wrapper_functions.rst | 234 +
doc/examples/bioinformatics/index.rst | 290 +
doc/examples/bioinformatics/part1_code.rst | 70 +
doc/examples/bioinformatics/part2.rst | 152 +
doc/examples/bioinformatics/part2_code.rst | 267 +
doc/examples/paired_end_data.py.rst | 122 +
doc/faq.rst | 980 +
doc/gallery.rst | 63 +
doc/global.inc | 4 +
doc/glossary.rst | 81 +
doc/history.rst | 733 +
doc/images/bestiary_combinatorics.png | Bin 0 -> 185818 bytes
doc/images/bestiary_decorators.png | Bin 0 -> 246675 bytes
doc/images/bestiary_transform.png | Bin 0 -> 105988 bytes
doc/images/colour_schemes.svg | 799 +
doc/images/complete.svg | 144 +
doc/images/complex_conceptual.jpg | Bin 0 -> 90369 bytes
doc/images/complex_file_dag.jpg | Bin 0 -> 168939 bytes
doc/images/complex_ruffus.jpg | Bin 0 -> 187909 bytes
doc/images/design.file_based_workflow.dot | 59 +
doc/images/design.file_based_workflow.png | Bin 0 -> 20956 bytes
doc/images/design.task_based_workflow.dot | 19 +
doc/images/design.task_based_workflow.png | Bin 0 -> 17307 bytes
doc/images/examples_bioinformatics_before.jpg | Bin 0 -> 21238 bytes
doc/images/examples_bioinformatics_complete.jpg | Bin 0 -> 17064 bytes
doc/images/examples_bioinformatics_error.png | Bin 0 -> 66668 bytes
doc/images/examples_bioinformatics_merge.jpg | Bin 0 -> 51653 bytes
doc/images/examples_bioinformatics_pipeline.jpg | Bin 0 -> 53868 bytes
doc/images/examples_bioinformatics_split.jpg | Bin 0 -> 62009 bytes
doc/images/examples_bioinformatics_transform.jpg | Bin 0 -> 53189 bytes
doc/images/flowchart_colour_schemes.png | Bin 0 -> 107483 bytes
doc/images/flowchart_colour_schemes.svg | 895 +
doc/images/front_page_flowchart.png | Bin 0 -> 24437 bytes
doc/images/gallery/gallery_big_pipeline.png | Bin 0 -> 1010747 bytes
doc/images/gallery/gallery_big_pipeline.svg | 2699 +++
doc/images/gallery/gallery_dless.png | Bin 0 -> 110281 bytes
doc/images/gallery/gallery_dless.py | 531 +
doc/images/gallery/gallery_dless.svg | 197 +
doc/images/gallery/gallery_rna_seq.png | Bin 0 -> 128916 bytes
doc/images/gallery/gallery_rna_seq.svg | 672 +
doc/images/gallery/gallery_snp_annotation.png | Bin 0 -> 133568 bytes
doc/images/gallery/gallery_snp_annotation.svg | 470 +
.../gallery_snp_annotation_consequences.png | Bin 0 -> 136378 bytes
.../gallery_snp_annotation_consequences.svg | 471 +
doc/images/history_html_flowchart.png | Bin 0 -> 126287 bytes
doc/images/history_html_flowchart.svg | 269 +
doc/images/jobs_limit.png | Bin 0 -> 236324 bytes
doc/images/jobs_limit2.png | Bin 0 -> 216405 bytes
doc/images/logo.jpg | Bin 0 -> 86526 bytes
doc/images/manual_dependencies_flowchart.png.py | 95 +
doc/images/manual_dependencies_flowchart1.dot | 26 +
doc/images/manual_dependencies_flowchart1.png | Bin 0 -> 14540 bytes
doc/images/manual_dependencies_flowchart2.dot | 20 +
doc/images/manual_dependencies_flowchart2.png | Bin 0 -> 16504 bytes
doc/images/manual_dependencies_flowchart3.dot | 20 +
doc/images/manual_dependencies_flowchart3.png | Bin 0 -> 20983 bytes
doc/images/manual_dependencies_flowchart4.dot | 30 +
doc/images/manual_dependencies_flowchart4.png | Bin 0 -> 23442 bytes
doc/images/manual_dependencies_flowchart_intro.png | Bin 0 -> 30945 bytes
.../manual_dependencies_flowchart_intro.png.py | 98 +
.../manual_dependencies_flowchart_legend.dot | 30 +
.../manual_dependencies_flowchart_legend.png | Bin 0 -> 28596 bytes
doc/images/manual_exceptions.png | Bin 0 -> 34800 bytes
doc/images/manual_follows1.jpg | Bin 0 -> 15882 bytes
doc/images/manual_follows1.png | Bin 0 -> 10161 bytes
doc/images/manual_follows1.png.py | 19 +
doc/images/manual_split_merge_example.jpg | Bin 0 -> 60508 bytes
doc/images/manual_transform.png | Bin 0 -> 29327 bytes
doc/images/manual_transform_complex_outputs.png | Bin 0 -> 18713 bytes
doc/images/pretty_flowchart.png | Bin 0 -> 23259 bytes
doc/images/pretty_flowchart.png.py | 46 +
doc/images/simple_tutorial_complex_flowchart.dot | 67 +
doc/images/simple_tutorial_complex_flowchart.png | Bin 0 -> 78472 bytes
doc/images/simple_tutorial_complex_flowchart.py | 437 +
.../simple_tutorial_complex_flowchart_error.dot | 70 +
.../simple_tutorial_complex_flowchart_error.png | Bin 0 -> 64334 bytes
...e_tutorial_complex_flowchart_error_with_key.png | Bin 0 -> 70694 bytes
.../simple_tutorial_complex_flowchart_with_key.png | Bin 0 -> 77870 bytes
doc/images/simple_tutorial_decorator_syntax.png | Bin 0 -> 13200 bytes
doc/images/simple_tutorial_files1.png | Bin 0 -> 21429 bytes
doc/images/simple_tutorial_files2.png | Bin 0 -> 12967 bytes
doc/images/simple_tutorial_files3.png | Bin 0 -> 24938 bytes
doc/images/simple_tutorial_files4.png | Bin 0 -> 19949 bytes
doc/images/simple_tutorial_files5.png | Bin 0 -> 24862 bytes
doc/images/simple_tutorial_flowchart_legend.dot | 24 +
doc/images/simple_tutorial_flowchart_legend.png | Bin 0 -> 44724 bytes
doc/images/simple_tutorial_follows.png | Bin 0 -> 18415 bytes
doc/images/simple_tutorial_hello_world.png | Bin 0 -> 7447 bytes
doc/images/simple_tutorial_hello_world_output.png | Bin 0 -> 9761 bytes
doc/images/simple_tutorial_intro_follows.png | Bin 0 -> 26312 bytes
doc/images/simple_tutorial_merge1.png | Bin 0 -> 29639 bytes
doc/images/simple_tutorial_merge2.png | Bin 0 -> 10077 bytes
doc/images/simple_tutorial_pipeline_printout1.png | Bin 0 -> 32970 bytes
doc/images/simple_tutorial_pipeline_printout2.png | Bin 0 -> 108504 bytes
doc/images/simple_tutorial_pipeline_printout3.png | Bin 0 -> 114387 bytes
doc/images/simple_tutorial_posttask.png | Bin 0 -> 23863 bytes
doc/images/simple_tutorial_split.png | Bin 0 -> 30237 bytes
doc/images/simple_tutorial_stage4_after.png | Bin 0 -> 16750 bytes
doc/images/simple_tutorial_stage4_before.png | Bin 0 -> 13502 bytes
doc/images/simple_tutorial_stage5_after.png | Bin 0 -> 16256 bytes
doc/images/simple_tutorial_stage5_before.png | Bin 0 -> 16498 bytes
doc/images/simple_tutorial_stage5_flowchart.png | Bin 0 -> 26034 bytes
doc/images/simple_tutorial_step2_ex1.png | Bin 0 -> 34182 bytes
doc/images/simple_tutorial_step2_ex2.png | Bin 0 -> 12941 bytes
doc/images/simple_tutorial_step3 copy.png | Bin 0 -> 14791 bytes
doc/images/simple_tutorial_step3.jpg | Bin 0 -> 40335 bytes
doc/images/simple_tutorial_step4.png | Bin 0 -> 8271 bytes
doc/images/simple_tutorial_step5.png | Bin 0 -> 31481 bytes
doc/images/simple_tutorial_step5_sans_key.png | Bin 0 -> 25436 bytes
doc/images/simple_tutorial_transform.png | Bin 0 -> 39019 bytes
...mple_tutorial_zoo_animals_formatter_example.jpg | Bin 0 -> 115697 bytes
doc/images/src/Backup_of_complex_file_dag.cdr | Bin 0 -> 32325 bytes
doc/images/src/Backup_of_complex_pipeline.cdr | Bin 0 -> 43907 bytes
doc/images/src/complex_conceptual.cdr | Bin 0 -> 22310 bytes
doc/images/src/complex_file_dag.cdr | Bin 0 -> 44056 bytes
doc/images/src/complex_pipeline.cdr | Bin 0 -> 24907 bytes
doc/images/src/key.cdr | Bin 0 -> 21879 bytes
doc/images/theoretical_pipeline_schematic.png | Bin 0 -> 13895 bytes
doc/images/transform_1_to_1_example.png | Bin 0 -> 108323 bytes
doc/images/tutorial_complete.jpg | Bin 0 -> 12008 bytes
doc/images/tutorial_force_from_task1.jpg | Bin 0 -> 16205 bytes
doc/images/tutorial_four_stage_pipeline.jpg | Bin 0 -> 15302 bytes
doc/images/tutorial_key.jpg | Bin 0 -> 76358 bytes
doc/images/tutorial_key.png | Bin 0 -> 48636 bytes
doc/images/tutorial_maximal_mode.jpg | Bin 0 -> 14882 bytes
doc/images/tutorial_minimal_mode.jpg | Bin 0 -> 12303 bytes
doc/images/tutorial_pipeline_key.jpg | Bin 0 -> 76358 bytes
doc/images/tutorial_ruffus_files.jpg | Bin 0 -> 485002 bytes
doc/images/tutorial_step1_decorator_syntax.png | Bin 0 -> 11204 bytes
doc/images/tutorial_step1_follows.png | Bin 0 -> 31160 bytes
doc/images/web_front_page.py | 347 +
doc/images/wikimedia_bandedkrait.jpg | Bin 0 -> 170650 bytes
doc/images/wikimedia_cyl_ruffus.jpg | Bin 0 -> 104896 bytes
doc/implementation_notes.rst | 442 +
doc/installation.rst | 79 +
doc/make.bat | 112 +
doc/manual_follows1.png | Bin 0 -> 10161 bytes
doc/pipeline_functions.rst | 689 +
doc/propset | 5 +
doc/proxy_logger.rst | 24 +
doc/regenerate_figures | 6 +
.../example_scripts/complicated_example.py | 527 +
.../example_scripts/intermediate_example.py | 313 +
.../example_scripts/play_with_colours.py | 268 +
doc/static_data/example_scripts/ruffus_template.py | 270 +
doc/static_data/example_scripts/simpler.py | 260 +
doc/static_data/ruffus.css | 327 +
doc/static_data/ruffus.pdf | Bin 0 -> 4513083 bytes
doc/task.rst | 138 +
doc/todo.rst | 500 +
doc/tutorials/new_tutorial/active_if.rst | 149 +
doc/tutorials/new_tutorial/check_if_uptodate.rst | 89 +
doc/tutorials/new_tutorial/checkpointing.rst | 400 +
doc/tutorials/new_tutorial/checkpointing_code.rst | 23 +
doc/tutorials/new_tutorial/combinatorics.rst | 442 +
doc/tutorials/new_tutorial/combinatorics_code.rst | 308 +
doc/tutorials/new_tutorial/command_line.rst | 352 +
.../new_tutorial/decorators_compendium.rst | 154 +
doc/tutorials/new_tutorial/dependencies.rst | 110 +
doc/tutorials/new_tutorial/deprecated_files.rst | 238 +
doc/tutorials/new_tutorial/deprecated_files_re.rst | 145 +
doc/tutorials/new_tutorial/exceptions.rst | 191 +
doc/tutorials/new_tutorial/flowchart_colours.rst | 61 +
.../new_tutorial/flowchart_colours_code.rst | 288 +
doc/tutorials/new_tutorial/inputs.rst | 239 +
doc/tutorials/new_tutorial/inputs_code.rst | 229 +
doc/tutorials/new_tutorial/introduction.rst | 399 +
doc/tutorials/new_tutorial/introduction_code.rst | 94 +
.../new_tutorial/list_of_ruffus_names.rst | 77 +
doc/tutorials/new_tutorial/logging.rst | 221 +
doc/tutorials/new_tutorial/logging_code.rst | 55 +
.../new_tutorial/manual_chapter_numbers.inc | 30 +
doc/tutorials/new_tutorial/manual_contents.rst | 64 +
doc/tutorials/new_tutorial/merge.rst | 140 +
doc/tutorials/new_tutorial/merge_code.rst | 147 +
doc/tutorials/new_tutorial/mkdir.rst | 152 +
doc/tutorials/new_tutorial/mkdir_code.rst | 113 +
doc/tutorials/new_tutorial/multiprocessing.rst | 293 +
.../new_tutorial/multiprocessing_code.rst | 183 +
doc/tutorials/new_tutorial/onthefly.rst | 192 +
doc/tutorials/new_tutorial/onthefly_code.rst | 328 +
doc/tutorials/new_tutorial/originate.rst | 92 +
doc/tutorials/new_tutorial/originate_code.rst | 71 +
doc/tutorials/new_tutorial/output_file_names.rst | 560 +
.../new_tutorial/output_file_names_code.rst | 248 +
doc/tutorials/new_tutorial/parallel.rst | 63 +
doc/tutorials/new_tutorial/pipeline_printout.rst | 215 +
.../new_tutorial/pipeline_printout_code.rst | 203 +
.../new_tutorial/pipeline_printout_graph.rst | 170 +
.../new_tutorial/pipeline_printout_graph_code.rst | 109 +
doc/tutorials/new_tutorial/posttask.rst | 122 +
doc/tutorials/new_tutorial/split.rst | 233 +
doc/tutorials/new_tutorial/split_code.rst | 115 +
doc/tutorials/new_tutorial/subdivide_collate.rst | 234 +
.../new_tutorial/subdivide_collate_code.rst | 155 +
doc/tutorials/new_tutorial/transform.rst | 194 +
doc/tutorials/new_tutorial/transform_code.rst | 99 +
.../new_tutorial/transform_in_parallel.rst | 394 +
.../new_tutorial/transform_in_parallel_code.rst | 366 +
doc/why_ruffus.rst | 37 +
ruffus/__init__.py | 38 +
ruffus/adjacent_pairs_iterate.py | 68 +
ruffus/cmdline.py | 827 +
ruffus/combinatorics.py | 82 +
ruffus/dbdict.py | 459 +
ruffus/drmaa_wrapper.py | 462 +
ruffus/file_name_parameters.py | 1399 ++
ruffus/graph.py | 1151 +
ruffus/print_dependencies.py | 623 +
ruffus/proxy_logger.py | 397 +
ruffus/ruffus_exceptions.py | 346 +
ruffus/ruffus_utility.py | 1434 ++
ruffus/ruffus_version.py | 27 +
ruffus/task.py | 4146 ++++
.../auto_generated_pipeline_examples/parallel.py | 228 +
.../auto_generated_pipeline_examples/simple.py | 253 +
.../auto_generated_pipeline_examples/simpler.py | 269 +
ruffus/test/complicated_example.py | 531 +
.../create_test_script_from_dependency_tree.py | 332 +
ruffus/test/draw_specified_dependency_tree.py | 208 +
ruffus/test/five_second.py | 17 +
ruffus/test/play_with_colours.py | 282 +
ruffus/test/qrsh_workaround.py | 50 +
ruffus/test/simpler.py | 234 +
ruffus/test/simpler_at_runtime.py | 253 +
ruffus/test/simpler_with_shared_logging.py | 322 +
ruffus/test/test_N_x_M_and_collate.py | 425 +
ruffus/test/test_active_if.py | 218 +
ruffus/test/test_branching_dependencies.py | 452 +
ruffus/test/test_cmdline.py | 276 +
ruffus/test/test_collate.py | 257 +
ruffus/test/test_combinatorics.py | 571 +
ruffus/test/test_ctrl_c_exceptions.py | 94 +
ruffus/test/test_drmaa.py | 9 +
ruffus/test/test_empty_files_decorator.py | 214 +
ruffus/test/test_exceptions.py | 59 +
ruffus/test/test_file_name_parameters.py | 1518 ++
ruffus/test/test_files_decorator.py | 277 +
ruffus/test/test_files_post_merge.py | 299 +
ruffus/test/test_filesre_combine.py | 242 +
ruffus/test/test_filesre_split_and_combine.py | 296 +
ruffus/test/test_follows_mkdir.py | 203 +
ruffus/test/test_graphviz.py | 229 +
..._inputs_with_multiple_args_raising_exception.py | 196 +
ruffus/test/test_job_completion_checksums.py | 362 +
ruffus/test/test_job_history_with_exceptions.py | 235 +
ruffus/test/test_mkdir.py | 115 +
ruffus/test/test_pausing.py | 386 +
ruffus/test/test_regex_error_messages.py | 444 +
ruffus/test/test_ruffus_utility.py | 965 +
ruffus/test/test_softlink_uptodate.py | 156 +
ruffus/test/test_split_and_combine.py | 278 +
ruffus/test/test_split_regex_and_collate.py | 218 +
ruffus/test/test_task_file_dependencies.py | 149 +
ruffus/test/test_task_misc.py | 61 +
ruffus/test/test_transform_add_inputs.py | 232 +
ruffus/test/test_transform_inputs.py | 217 +
ruffus/test/test_transform_with_no_re_matches.py | 220 +
ruffus/test/test_tutorial7.py | 124 +
ruffus/test/test_unicode_filenames.py | 217 +
ruffus/test/test_verbosity.py | 137 +
setup.py | 202 +
910 files changed, 183062 insertions(+)
diff --git a/CHANGES.TXT b/CHANGES.TXT
new file mode 100644
index 0000000..85bd8d6
--- /dev/null
+++ b/CHANGES.TXT
@@ -0,0 +1,742 @@
+= v. 2.4.1=
+ 2014-04-26
+ * Breaking changes to drmaa API suggested by Bernie Pope to ensure portability across different drmaa implementations (SGE, SLURM etc.)
+= v. 2.4=
+ 2014-04-03
+ ============================================================================================================================================================
+ Additions to ``ruffus`` namespace
+ ============================================================================================================================================================
+
+ * :ref:`formatter() <new_manual.formatter>` (:ref:`syntax <decorators.formatter>`)
+ * :ref:`originate() <new_manual.originate>` (:ref:`syntax <decorators.originate>`)
+ * :ref:`subdivide() <new_manual.subdivide>` (:ref:`syntax <decorators.subdivide>`)
+
+ ============================================================================================================================================================
+ Installation: use pip
+ ============================================================================================================================================================
+
+ ::
+
+ sudo pip install ruffus --upgrade
+
+ ============================================================================================================================================================
+ 1) Command Line support
+ ============================================================================================================================================================
+
+ The optional ``Ruffus.cmdline`` module provides support for a set of common command
+ line arguments which make writing *Ruffus* pipelines much more pleasant.
+ See :ref:`manual <new_manual.cmdline>`
+
+ ============================================================================================================================================================
+ 2) Check pointing
+ ============================================================================================================================================================
+
+ * Contributed by **Jake Biesinger**
+ * See :ref:`Manual <new_manual.checkpointing>`
+ * Uses a fault resistant sqlite database file to log i/o files, and additional checksums
+ * defaults to checking file timestamps stored in the current directory (``ruffus_utilility.RUFFUS_HISTORY_FILE = '.ruffus_history.sqlite'``)
+ * :ref:`pipeline_run(..., checksum_level = N, ...) <pipeline_functions.pipeline_run>`
+
+ * level 0 = CHECKSUM_FILE_TIMESTAMPS : Classic mode. Use only file timestamps (no checksum file will be created)
+ * level 1 = CHECKSUM_HISTORY_TIMESTAMPS : Also store timestamps in a database after successful job completion
+ * level 2 = CHECKSUM_FUNCTIONS : As above, plus a checksum of the pipeline function body
+ * level 3 = CHECKSUM_FUNCTIONS_AND_PARAMS : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+ * defaults to level 1
+
+ * Can speed up trivial tasks: Previously Ruffus always added an extra 1 second pause between tasks
+ to guard against file systems (Ext3, FAT, some NFS) with low timestamp granularity.
+
+
+ ============================================================================================================================================================
+ 3) :ref:`subdivide() <new_manual.subdivide>` (:ref:`syntax <decorators.subdivide>`)
+ ============================================================================================================================================================
+
+ *
+ * Take a list of input jobs (like :ref:`@transform <decorators.transform>`) but further splits each into multiple jobs, i.e. it is a **many->even more** relationship
+ * synonym for the deprecated ``@split(..., regex(), ...)``
+
+ ========================================================================================================================================================================================================================================================================================================================
+ 4) :ref:`mkdir() <new_manual.mkdir>` (:ref:`syntax <decorators.mkdir>`) with :ref:`formatter() <new_manual.formatter>`, :ref:`suffix() <decorators.suffix>` and :ref:`regex() <decorators.regex>`
+ ========================================================================================================================================================================================================================================================================================================================
+
+ * allows directories to be created depending on runtime parameters or the output of previous tasks
+ * behaves just like :ref:`@transform <decorators.transform>` but with its own (internal) function which does the actual work of making a directory
+ * Previous behavior is retained:``mkdir`` continues to work seamlessly inside :ref:`@follows <decorators.follows>`
+
+ ============================================================================================================================================================
+ 5) :ref:`originate() <new_manual.originate>` (:ref:`syntax <decorators.originate>`)
+ ============================================================================================================================================================
+
+ * Generates output files without dependencies from scratch (*ex nihilo*!)
+ * For first step in a pipeline
+ * Task function obviously only takes output and not input parameters. (There *are* no inputs!)
+ * synonym for :ref:`@split(None,...) <decorators.split>`
+ * See :ref:`Summary <decorators.originate>` / :ref:`Manual <new_manual.originate>`
+
+ ========================================================================================================================================================================================================================================================================================================================
+ 6) New flexible :ref:`formatter() <new_manual.formatter>` (:ref:`syntax <decorators.formatter>`) alternative to :ref:`regex() <decorators.regex>` & :ref:`suffix() <decorators.suffix>`
+ ========================================================================================================================================================================================================================================================================================================================
+
+ * Easy manipulation of path subcomponents in the style of `os.path.split() <http://docs.python.org/2/library/os.path.html#os.path.split>`__
+ * Regular expressions are no longer necessary for path manipulation
+ * Familiar python syntax
+ * Optional regular expression matches
+ * Can refer to any in the list of N input files (not only the first file as for ``regex(...)``)
+ * Can even refer to individual letters within a match
+
+ ============================================================================================================================================================
+ 7) Combinatorics (all vs. all decorators)
+ ============================================================================================================================================================
+
+ * :ref:`@product <new_manual.product>` (See `itertools.product <http://docs.python.org/2/library/itertools.html#itertools.product>`__)
+ * :ref:`@permutations <new_manual.permutations>` (See `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__)
+ * :ref:`@combinations <new_manual.combinations>` (See `itertools.combinations <http://docs.python.org/2/library/itertools.html#itertools.combinations>`__)
+ * :ref:`@combinations_with_replacement <new_manual.combinations_with_replacement>` (See `itertools.combinations_with_replacement <http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement>`__)
+ * in optional :ref:`combinatorics <new_manual.combinatorics>` module
+ * Only :ref:`formatter() <new_manual.formatter>` provides the necessary flexibility to construct the output. (:ref:`suffix() <decorators.suffix>` and :ref:`regex() <decorators.regex>` are not supported.)
+ * See :ref:`Summary <decorators.combinatorics>` / :ref:`Manual <new_manual.combinatorics>`
+
+
+
+ ============================================================================================================================================================
+ 8) drmaa support and multithreading:
+ ============================================================================================================================================================
+
+ * :ref:`ruffus.drmaa_wrapper.run_job() <new_manual.ruffus.drmaa_wrapper.run_job>` (:ref:`syntax <drmaa_wrapper.run_job>`)
+ * Optional helper module allows jobs to dispatch work to a computational cluster and wait until it completes.
+ * Requires ``multithread`` rather than ``multiprocess``
+
+ ============================================================================================================================================================
+ 9) ``pipeline_run(...)`` and exceptions
+ ============================================================================================================================================================
+ See :ref:`Manual <new_manual.exceptions>`
+
+ * Optionally terminate pipeline after first exception
+ * Display exceptions without delay
+
+
+ ============================================================================================================================================================
+ 10) Miscellaneous
+ ============================================================================================================================================================
+
+ Better error messages for ``formatter()``, ``suffix()`` and ``regex()`` for ``pipeline_printout(..., verbose >= 3, ...)``
+ * Error messages for showing mismatching regular expression and offending file name
+ * Wrong capture group names or out of range indices will raise informative Exception
+
+= v. 2.3=
+ _03/October/2011_
+ * ``@active_if`` turns off tasks at runtime
+ The Design and initial implementation were contributed by Jacob Biesinger
+
+ Takes one or more parameters which can be either booleans or functions or callable objects which return True / False::
+
+ run_if_true_1 = True
+ run_if_true_2 = False
+
+ @active_if(run_if_true, lambda: run_if_true_2)
+ def this_task_might_be_inactive():
+ pass
+
+ The expressions inside @active_if are evaluated each time
+ ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+
+ Dormant tasks behave as if they are up to date and have no output.
+
+ * Command line parsing
+ * Supports both argparse (python 2.7) and optparse (python 2.6):
+ * ``Ruffus.cmdline`` module is optional.
+ * See :ref:`manual <new_manual.cmdline>`
+ * Optionally terminate pipeline after first exception
+ To have all exceptions interrupt immediately::
+
+ pipeline_run(..., exceptions_terminate_immediately = True)
+
+ By default ruffus accumulates ``NN`` errors before interrupting the pipeline prematurely. ``NN`` is the specified parallelism for ``pipeline_run(..., multiprocess = NN)``.
+
+ Otherwise, a pipeline will only be interrupted immediately if exceptions of type ``ruffus.JobSignalledBreak`` are thrown.
+
+ * Display exceptions without delay
+
+ By default, Ruffus re-throws exceptions in ensemble after pipeline termination.
+
+ To see exceptions as they occur::
+
+ pipeline_run(..., log_exceptions = True)
+
+ ``logger.error(...)`` will be invoked with the string representation of the each exception, and associated stack trace.
+
+ The default logger prints to sys.stderr, but this can be changed to any class from the logging module or compatible object via ``pipeline_run(..., logger = ???)``
+
+ * Improved ``pipeline_printout()``
+
+ * `@split` operations now show the 1->many output in pipeline_printout
+
+ This make it clearer that ``@split`` is creating multiple output parameters (rather than a single output parameter consisting of a list)::
+
+ Task = split_animals
+ Job = [None
+ -> cows
+ -> horses
+ -> pigs
+ , any_extra_parameters]
+ * File date and time are displayed in human readable form and out of date files are flagged with asterisks.
+
+
+
+= v. 2.2=
+ _21/July/2010_
+ * Simplifying **@transform** syntax with **suffix(...)**
+
+ Regular expressions within ruffus are very powerful, and can allow files to be moved
+ from one directory to another and renamed at will.
+
+ However, using consistent file extensions and
+ ``@transform(..., suffix(...))`` makes the code much simpler and easier to read.
+
+ Previously, ``suffix(...)`` did not cooperate well with ``inputs(...)``.
+ For example, finding the corresponding header file (".h") for the matching input
+ required a complicated ``regex(...)`` regular expression and ``input(...)``. This simple case,
+ e.g. matching "something.c" with "something.h", is now much easier in Ruffus.
+
+
+ For example:
+ ::
+
+ source_files = ["something.c", "more_code.c"]
+ @transform(source_files, suffix(".c"), add_inputs(r"\1.h", "common.h"), ".o")
+ def compile(input_files, output_file):
+ ( source_file,
+ header_file,
+ common_header) = input_files
+ # call compiler to make object file
+
+ This is equivalent to calling:
+
+ ::
+
+ compile(["something.c", "something.h", "common.h"], "something.o")
+ compile(["more_code.c", "more_code.h", "common.h"], "more_code.o")
+
+ The ``\1`` matches everything *but* the suffix and will be applied to both ``glob``\ s and file names.
+
+ For simplicity and compatibility with previous versions, there is always an implied r"\1" before
+ the output parameters. I.e. output parameters strings are *always* substituted.
+
+
+ * Tasks and glob in **inputs(...)** and **add_inputs(...)**
+
+ ``glob``\ s and tasks can be added as the prerequisites / input files using
+ ``inputs(...)`` and ``add_inputs(...)``. ``glob`` expansions will take place when the task
+ is run.
+
+ * Advanced form of **@split** with **regex**:
+
+ The standard ``@split`` divided one set of inputs into multiple outputs (the number of which
+ can be determined at runtime).
+
+ This is a ``one->many`` operation.
+
+
+ An advanced form of ``@split`` has been added which can split each of several files further.
+
+ In other words, this is a ``many->"many more"`` operation.
+
+ For example, given three starting files:
+ ::
+
+ original_files = ["original_0.file",
+ "original_1.file",
+ "original_2.file"]
+ We can split each into its own set of sub-sections:
+ ::
+
+ @split(original_files,
+ regex(r"starting_(\d+).fa"), # match starting files
+ r"files.split.\1.*.fa" # glob pattern
+ r"\1") # index of original file
+ def split_files(input_file, output_files, original_index):
+ """
+ Code to split each input_file
+ "original_0.file" -> "files.split.0.*.fa"
+ "original_1.file" -> "files.split.1.*.fa"
+ "original_2.file" -> "files.split.2.*.fa"
+ """
+
+
+ This is, conceptually, the reverse of the @collate(...) decorator
+
+ * Ruffus will complain about unescaped regular expression special characters:
+
+ Ruffus uses "\\1" and "\\2" in regular expression substitutions. Even seasoned python
+ users may not remember that these have to be 'escaped' in strings. The best option is
+ to use 'raw' python strings e.g.
+
+ ::
+
+ r"\1_substitutes\2correctly\3four\4times"
+
+ Ruffus will throw an exception if it sees an unescaped "\\1" or "\\2" in a file name,
+ which should catch most of these bugs.
+
+ * Prettier output from *pipeline_printout_graph*
+
+ Changed to nicer colours, symbols etc. for a more professional look.
+ @split and @merge tasks now look different from @transform.
+ Colours, size and resolution are now fully customisable::
+
+ pipeline_printout_graph( #...
+ user_colour_scheme = {
+ "colour_scheme_index":1,
+ "Task to run" : {"fillcolor":"blue"},
+ pipeline_name : "My flowchart",
+ size : (11,8),
+ dpi : 120)})
+
+ An SVG bug in firefox has been worked around so that font size are displayed correctly.
+
+
+= v. 2.1.1=
+ _12/March/2010_
+ * **@transform(.., add_inputs(...))**
+ ``add_inputs(...)`` allows the addition of extra input dependencies / parameters for each job.
+
+ Unlike ``inputs(...)``, the original input parameter is retained:
+ ::
+
+ from ruffus import *
+ @transform(["a.input", "b.input"], suffix(".input"), add_inputs("just.1.more","just.2.more"), ".output")
+ def task(i, o):
+ ""
+
+ Produces:
+ ::
+
+ Job = [[a.input, just.1.more, just.2.more] ->a.output]
+ Job = [[b.input, just.1.more, just.2.more] ->b.output]
+
+
+ Like ``inputs``, ``add_inputs`` accepts strings, tasks and ``glob`` s
+ This minor syntactic change promises add much clarity to Ruffus code.
+ ``add_inputs()`` is available for ``@transform``, ``@collate`` and ``@split``
+
+
+
+= v. 2.1.0=
+ _2/March/2010_
+ * **@jobs_limit**
+ Some tasks are resource intensive and too many jobs should not be run at the
+ same time. Examples include disk intensive operations such as unzipping, or
+ downloading from FTP sites.
+
+ Adding::
+
+ @jobs_limit(4)
+ @transform(new_data_list, suffix(".big_data.gz"), ".big_data")
+ def unzip(i, o):
+ "unzip code goes here"
+
+ would limit the unzip operation to 4 jobs at a time, even if the rest of the
+ pipeline runs highly in parallel.
+
+ (Thanks to Rob Young for suggesting this.)
+
+= v. 2.0.10=
+ _27/February/2010_
+ * **touch_files_only** option for **pipeline_run**
+
+ When the pipeline runs, task functions will not be run. Instead, the output files for
+ each job (in each task) will be ``touch``\ -ed if necessary.
+ This can be useful for simulating a pipeline run so that all files look as
+ if they are up-to-date.
+
+ Caveats:
+
+ * This may not work correctly where output files are only determined at runtime, e.g. with **@split**
+ * Only the output from pipelined jobs which are currently out-of-date will be ``touch``\ -ed.
+ In other words, the pipeline runs *as normal*, the only difference is that the
+ output files are ``touch``\ -ed instead of being created by the python task functions
+ which would otherwise have been called.
+
+ * Parameter substitution for **inputs(...)**
+
+ The **inputs(...)** parameter in **@transform**, **@collate** can now take tasks and ``glob`` s,
+ and these will be expanded appropriately (after regular expression replacement).
+
+ For example::
+
+ @transform("dir/a.input", regex(r"(.*)\/(.+).input"),
+ inputs((r"\1/\2.other", r"\1/*.more")), r"elsewhere/\2.output")
+ def task1(i, o):
+ """
+ Some pipeline task
+ """
+
+ Is equivalent to calling::
+
+ task1(("dir/a.other", "dir/1.more", "dir/2.more"), "elsewhere/a.output")
+
+ \
+
+ Here::
+
+ r"\1/*.more"
+
+ is first converted to::
+
+ r"dir/*.more"
+
+ which matches::
+
+ "dir/1.more"
+ "dir/2.more"
+
+
+
+= v. 2.0.9=
+ _25/February/2010_
+ * Better display of logging output
+ * Advanced form of **@split**
+ This is an experimental feature.
+
+ Hitherto, **@split** only takes 1 set of input (tasks/files/``glob`` s) and split these
+ into an indeterminate number of output.
+
+ This is a one->many operation.
+
+ Sometimes it is desirable to take multiple input files, and split each of them further.
+
+ This is a many->many (more) operation.
+
+ It is possible to hack something together using **@transform** but downstream tasks would not
+ aware that each job in **@transform** produces multiple outputs (rather than one input,
+ one output per job).
+
+ The syntax looks like::
+
+ @split(get_files, regex(r"(.+).original"), r"\1.*.split")
+ def split_files(i, o):
+ pass
+
+ If ``get_files()`` returned ``A.original``, ``B.original`` and ``C.original``,
+ ``split_files()`` might lead to the following operations::
+
+ A.original
+ -> A.1.original
+ -> A.2.original
+ -> A.3.original
+ B.original
+ -> B.1.original
+ -> B.2.original
+ C.original
+ -> C.1.original
+ -> C.2.original
+ -> C.3.original
+ -> C.4.original
+ -> C.5.original
+
+ Note that each input (``A/B/C.original``) can produce a number of output, the exact
+ number of which does not have to be pre-determined.
+ This is similar to **@split**
+
+ Tasks following ``split_files`` will have ten inputs corresponding to each of the
+ output from ``split_files``.
+
+ If **@transform** was used instead of **@split**, then tasks following ``split_files``
+ would only have 3 inputs.
+
+
+
+
+= v. 2.0.8=
+ _22/January/2010_
+ * File names can be in unicode
+ * File systems with 1 second timestamp granularity no longer cause problems.
+ * Now accepts unicode file names:
+ Change `isinstance(x,str)` to `isinstance(x, basestring)`
+ (Thanks to P.J. Davis for contributing this.)
+ * inputs(...) now raises an exception when passed multiple arguments.
+ If the input parameter is being passed a tuple, add an extra set of enclosing
+ brackets. Documentation updated accordingly.
+ (Thanks to Z. Harris for spotting this.)
+ * tasks where regular expressions are incorrectly specified are a great source of frustration
+ and puzzlement.
+ Now if no regular expression matches occur, a warning is printed
+ (Thanks to C. Nellaker for suggesting this)
+
+= v. 2.0.7=
+ _11/December/2009_
+ * graph printout blows up because of missing run time data error
+ (Thanks to A. Heger for reporting this!)
+
+
+= v. 2.0.6=
+ _10/December/2009_
+ * several minor bugs
+ * better error messages when eoncountering decorator problems when checking if the pipeline is uptodate
+ * Exception when output specifications in @split were expanded (unnecessarily) in logging.
+ (Thanks to N. Spies for reporting this!)
+
+= v. 2.0.4=
+ _22/November/2009_
+ * Bug Fix
+
+ * task.get_job_names() dies for jobs with no parameters
+ * JobSignalledBreak was not exported
+
+= v. 2.0.3=
+ _18/November/2009_
+ * @transform accepts single file names. Thanks Chris N.
+
+= v. 2.0.2=
+ _18/November/2009_
+ * pipeline_printout output much prettier
+ * pipeline_run at high verbose levels
+ Shows which tasks are being checked
+ to see if they are up-to-date or not
+ * New tutorial / manual
+ * pretty code figures
+
+= v. 2.0.1=
+ _18/November/2009_
+ All unit tests passed
+ * Numerous bugs to do with ordering of glob / job output consistency
+
+= v. 2.0.1 beta4=
+ _16/November/2009_
+ * Fixed problems with tasks depending on @split
+
+= v. 2.0 beta=
+ _30/October/2009_
+ With the experience and feedback over the past few months, I have reworked **Ruffus**
+ completely mainly to make the syntax more transparent, with fewer gotchas.
+ Previous limitations to do with parameters have been removed.
+ The experience with what *Ruffus* calls "Indicator Objects" has been very positive
+ and there are more of them.
+ These are dummy classes with obvious names like "regex" or "suffix" which indicate the
+ type of optional parameters much like named parameters.
+
+ * Revamped documentation:
+
+ * Rewritten tutorial
+ * Comprehensive manual
+ * New syntax help
+
+ * Major redesign. New decorators include
+
+ * :ref:`@split <manual.split>`
+ * :ref:`@transform <manual.transform>`
+ * :ref:`@merge <manual.merge>`
+ * :ref:`@collate <manual.collate>`
+
+ * Major redesign. Decorator *inputs* can mix
+
+ * Output from previous tasks
+ * |glob|_ patterns e.g. ``*.txt``
+ * Files names
+ * Any other data type
+
+ * Deprecated Decorators:
+
+ * @files_re
+
+ Functionality is divided among the new decorators
+
+ * New Features
+
+ * Files can be chained from task to task, implicit dependencies are inferred automatically
+ * Limitations on parameters removed. Any degree of nesting is allowed.
+ * Strings contain glob letters ``[]?*`` automatically inferred as globs and expanded
+ * input and output parameters containing strings assumed to be filenames, whatever the nested data structures they are found in
+
+ * Documentation
+
+ * New documentation almost complete
+ * New Simplified 7 step tutorial
+ * New manual work in progress
+
+ * Bug Fix
+
+ * Scheduling errors
+
+= v. 1.1.4=
+ _15/October/2009_
+
+ * New Feature
+
+ * Tasks can get their input by automatically chaining to the output from one or more parent tasks using the `@files_re`
+ * Added example showing how files can be split up into multiple jobs and then recombined
+ # Run `test/test_filesre_split_and_combine.py` with `-v|--verbose` `-s|--start_again`
+ # Run with `-D|--debug` to test.
+ * Documentation to follow
+
+ * Bug Fix
+
+ * Scheduling race conditions
+
+= v. 1.1.3=
+ _14/October/2009_
+
+ * Bug Fix
+
+ * Minor (but show stopping) bug in task.generic_job_descriptor
+
+= v. 1.1.2=
+ _9/October/2009_
+
+ * Bug Fix
+
+ * Nasty (long standing) bug for single job tasks only decorated with `@follows(mkdir(...))` to be caught in an infinite loop
+
+ * Code Changes
+
+ * Add example of combining multiple input files depending on a regular expression pattern.
+ # Run `test/test_filesre_combine.py` with -v (verbose)
+ # Run with -D (debug) to test.
+
+
+
+= v. 1.1.1=
+ _8/October/2009_
+
+ * New Feature
+
+ * _Combine multiple input files using a regular expression_
+ * Added `combine` syntax to `@files_re` decorators:
+ * Documentation to follow...
+ * Example from `src/ruffus/test/test_branching_dependencies.py`:
+ ::
+
+ @files_re('*.*', '(.*/)(.*\.[345]$)', combine(r'\1\2'), r'\1final.6')
+ def test(input_files, output_files):
+ pass`
+
+ * will take all files in the current directory
+ * will identify files which end in `.3`, `.4` and `.5` as input files
+ * will use `final.6` as the output file
+ * `input_files == [a.3, a.4, b.3, b.5]` (for example)
+ * `output_files == [final.6]`
+
+ * Bug Fix
+
+ * All (known) bugs for running jobs from independent tasks in parallel
+
+
+
+= v. 1.0.9=
+ _8/October/2009_
+
+ * New Feature
+
+ _Multitasking independent tasks_
+ * In a major piece of retooling, jobs from independent tasks which do not depend on each other will be run in parallel.
+ * This involves major changes to the scheduling code.
+ * Please contact me asap if anything breaks.
+
+ * Code Changes
+ * Add example of independent tasks running concurrently in
+ `test/test_branching_dependencies.py`
+ * Run with -v (verbose) and -j 1 or -j 10 to show the indeterminancy of multiprocessing.
+ * Run with -D (debug) to test.
+
+= v. 1.0.8=
+ _12/August/2009_
+
+ * Documentation
+
+ * Errors fixed. Thanks to Matteo Bertini!
+
+ * Code Changes
+
+ * Added functions which print out job parameters more prettily.
+ * `task.shorten_filenames_encoder`
+ * `task.ignore_unknown_encoder`
+ * Parameters which look like file paths will only have the file part printed
+ (i.e. `"/a/b/c" -> 'c'`)
+ * Test scripts `simpler_with_shared_logging.py` and `test_follows_mkdir.py`
+ have been changed to test for this.
+
+
+= v. 1.0.7=
+ _17/June/2009_
+
+ * Code Changes
+
+ * Added `proxy_logger` module for accessing a shared log across multiple jobs in
+ different processes.
+
+= v. 1.0.6=
+ _12/June/2009_
+
+ * Bug Fix
+
+ * _Ruffus_ version module (`ruffus_version.py`) links fixed
+ Soft links in linux do not travel well
+ * `mkdir` now can take a list of strings
+ added test case
+
+ * Documentation
+
+ * Added history of changes
+
+= v. 1.0.5=
+ _11/June/2009_
+
+ * Bug Fix
+
+ * Changed "graph_printout" to `pipeline_printout_graph` in documentation.
+ This function had been renamed in the code but not in the documentation :-(
+
+ * Documentation
+
+ * Added example for sharing synchronising data between jobs.
+ This shows how different jobs can write to a common log file while still leaveraging the full power of _ruffus_.
+
+
+ * Code Changes
+
+ * The graph and print_dependencies modules are no longer exported by default from task.
+ Please email me if this breaks anything.
+ * More informative error message when refer to unadorned (without _Ruffus_ decorators) python functions as pipelined Tasks
+ * Added Ruffus version module `ruffus_version.py`
+
+
+
+= v. 1.0.4=
+ _05/June/2009_
+ * Bug fix:
+
+ * `task.task_names_to_tasks` did not include tasks specified by function rather than name
+ * `task.run_all_jobs_in_task` did not work properly without multiprocessing (# of jobs = 1)
+ * `task.pipeline_run` only uses multiprocessing pools if `multiprocess` (# of jobs) > 1
+
+ * Changes to allow python 2.4/2.5 to run
+
+ * `setup.py` changed to remove dependency
+ * `simplejson` can be loaded instead of python 2.6 `json` module
+ * Changed `NamedTemporaryFile` to `mkstemp` because delete parameter is not available before python 2.6
+
+ * Windows programmes
+ It is necessary to protect the "entry point" of the program under windows.
+ Otherwise, a new process with be created recursively, like the magicians's apprentice
+ See: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+
+= v. 1.0.3=
+ _04/June/2009_
+ * Documentation
+
+
+ Including SGE `qrsh` workaround in FAQ.
+
+= v. 1.0.1=
+ _22/May/2009_
+ * Add simple tutorial.
+
+
+ No major bugs so far...!!
+
+= v. 1.0.0 beta =
+ _28/April/2009_
+
+ Initial Release in Oxford
+
diff --git a/LICENSE.TXT b/LICENSE.TXT
new file mode 100644
index 0000000..3c55ff0
--- /dev/null
+++ b/LICENSE.TXT
@@ -0,0 +1,17 @@
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..2733964
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,151 @@
+Metadata-Version: 1.1
+Name: ruffus
+Version: 2.5
+Summary: Light-weight Python Computational Pipeline Management
+Home-page: http://www.ruffus.org.uk
+Author: Leo Goodstadt
+Author-email: ruffus_lib at llew.org.uk
+License: MIT
+Download-URL: https://pypi.python.org/pypi/ruffus
+Description:
+ ***************************************
+ Overview
+ ***************************************
+
+
+ The Ruffus module is a lightweight way to add support
+ for running computational pipelines.
+
+ Computational pipelines are often conceptually quite simple, especially
+ if we breakdown the process into simple stages, or separate **tasks**.
+
+ Each stage or **task** in a computational pipeline is represented by a python function
+ Each python function can be called in parallel to run multiple **jobs**.
+
+ Ruffus was originally designed for use in bioinformatics to analyse multiple genome
+ data sets.
+
+ ***************************************
+ Documentation
+ ***************************************
+
+ Ruffus documentation can be found `here <http://www.ruffus.org.uk>`__ ,
+ with `download notes <http://www.ruffus.org.uk/installation.html>`__ ,
+ a `tutorial <http://www.ruffus.org.uk/tutorials/new_tutorial/introduction.html>`__ and
+ an `in-depth manual <http://www.ruffus.org.uk/tutorials/new_tutorial/manual_contents.html>`__ .
+
+
+ ***************************************
+ Background
+ ***************************************
+
+ The purpose of a pipeline is to determine automatically which parts of a multi-stage
+ process needs to be run and in what order in order to reach an objective ("targets")
+
+ Computational pipelines, especially for analysing large scientific datasets are
+ in widespread use.
+ However, even a conceptually simple series of steps can be difficult to set up and
+ maintain.
+
+ ***************************************
+ Design
+ ***************************************
+ The ruffus module has the following design goals:
+
+ * Lightweight
+ * Scalable / Flexible / Powerful
+ * Standard Python
+ * Unintrusive
+ * As simple as possible
+
+ ***************************************
+ Features
+ ***************************************
+
+ Automatic support for
+
+ * Managing dependencies
+ * Parallel jobs, including dispatching work to computational clusters
+ * Re-starting from arbitrary points, especially after errors (checkpointing)
+ * Display of the pipeline as a flowchart
+ * Managing complex pipeline topologies
+
+
+ ***************************************
+ A Simple example
+ ***************************************
+
+ Use the **@follows(...)** python decorator before the function definitions::
+
+ from ruffus import *
+ import sys
+
+ def first_task():
+ print "First task"
+
+ @follows(first_task)
+ def second_task():
+ print "Second task"
+
+ @follows(second_task)
+ def final_task():
+ print "Final task"
+
+
+
+
+ the ``@follows`` decorator indicate that the ``first_task`` function precedes ``second_task`` in
+ the pipeline.
+
+ The canonical Ruffus decorator is ``@transform`` which **transforms** data flowing down a
+ computational pipeline from one stage to teh next.
+
+ ********
+ Usage
+ ********
+
+ Each stage or **task** in a computational pipeline is represented by a python function
+ Each python function can be called in parallel to run multiple **jobs**.
+
+ 1. Import module::
+
+ import ruffus
+
+
+ 1. Annotate functions with python decorators
+
+ 2. Print dependency graph if you necessary
+
+ - For a graphical flowchart in ``jpg``, ``svg``, ``dot``, ``png``, ``ps``, ``gif`` formats::
+
+ pipeline_printout_graph ("flowchart.svg")
+
+ This requires ``dot`` to be installed
+
+ - For a text printout of all jobs ::
+
+ pipeline_printout(sys.stdout)
+
+
+ 3. Run the pipeline::
+
+ pipeline_run()
+
+
+
+Keywords: make task pipeline parallel bioinformatics science
+Platform: UNKNOWN
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: Intended Audience :: Information Technology
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: System :: Distributed Computing
+Classifier: Topic :: Software Development :: Build Tools
+Classifier: Topic :: Software Development :: Build Tools
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Environment :: Console
diff --git a/USAGE.TXT b/USAGE.TXT
new file mode 100644
index 0000000..ea37e55
--- /dev/null
+++ b/USAGE.TXT
@@ -0,0 +1,81 @@
+Each stage or task in a computational pipeline is represented by a python function
+Each python function can be called in parallel to run multiple jobs.
+
+1. Import module::
+
+ from ruffus import *
+
+
+2. Annotate functions with python decorators
+
+ e.g.::
+
+ from ruffus import *
+ import sys
+
+ def first_task():
+ print "First task"
+
+ @follows(first_task)
+ def second_task():
+ print "Second task"
+
+ @follows(second_task)
+ def final_task():
+ print "Final task"
+
+ Examples of decorators:
+
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ | Decorator | Purpose | Example |
+ +========================+=====================================+=====================================================================================================+
+ |**@follows** | - Indicate task dependency | ``@follows(task1, "task2")`` |
+ | | | |
+ | | - mkdir prerequisite shorthand | ``@follows(task1, mkdir("my/directory/for/results"))`` |
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ |**@files** | - I/O parameters | ``@files(parameter_list)`` |
+ | | | |
+ | | - skips up-to-date jobs | ``@files(parameter_generating_function)`` |
+ | | | |
+ | | | ``@files(input, output, other_params_for_a_single_job)`` |
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ |**@split** | - Splits a single input into | ``@split ( tasks_or_file_names, output_files, [extra_parameters,...] )`` |
+ | | multiple output | |
+ | | - Globs in output can specify an | |
+ | | indeterminate number of files. | |
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ |**@transform** | - Applies the task function to | ``@transform ( tasks_or_file_names, suffix(suffix_string), output_pattern, [extra_parameters,..] )``|
+ | | transform input data to output. | |
+ | | | ``@transform ( tasks_or_file_names, regex(regex_pattern), output_pattern, [extra_parameters,...] )``|
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ |**@merge** | - Merges multiple input | ``@merge (tasks_or_file_names, output, [extra_parameters,...] )`` |
+ | | into a single output. | |
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ |**@collate** | - Groups together sets of input | ``@collate ( tasks_or_file_names, regex(matching_regex), output_pattern, [extra_parameters,...] )`` |
+ | | into a few outputs | |
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+ |**@posttask** | - Call function after task | ``@posttask(signal_task_completion_function)`` |
+ | | | |
+ | | - touch file shorthand | ``@posttask(touch_file("task1.completed")`` |
+ +------------------------+-------------------------------------+-----------------------------------------------------------------------------------------------------+
+
+3. Print dependency graph if you necessary
+
+ - For a graphical flowchart in ``jpg``, ``svg``, ``dot``, ``png``, ``ps``, ``gif`` formats::
+
+ graph_printout ( open("flowchart.svg", "w"),
+ "svg",
+ list_of_target_tasks)
+
+ This requires ``dot`` to be installed
+
+ - For a text printout of all jobs ::
+
+ pipeline_printout(sys.stdout, list_of_target_tasks)
+
+
+4. Run the pipeline::
+
+ pipeline_run(list_of_target_tasks, [list_of_tasks_forced_to_rerun, multiprocess = N_PARALLEL_JOBS])
+
+
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..322cc5b
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,184 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " xml to make Docutils-native XML files"
+ @echo " pseudoxml to make pseudoxml-XML files for display purposes"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+htmlsync:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ rsync -t --delete --recursive _build/html/* mus:/home/lg/public_html/oss/ruffus
+ rsync -t --delete --recursive _build/html/* u36264041 at www.llewgoodstadt.org.uk:/kunden/homepages/33/d100248119/htdocs/ruffus
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html and copied to the test web directory."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ruffus.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ruffus.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/ruffus"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ruffus"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through platex and dvipdfmx..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+ $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+ @echo
+ @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+ @echo
+ @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/doc/_build/doctrees/cheatsheet.doctree b/doc/_build/doctrees/cheatsheet.doctree
new file mode 100644
index 0000000..58a84d3
Binary files /dev/null and b/doc/_build/doctrees/cheatsheet.doctree differ
diff --git a/doc/_build/doctrees/contents.doctree b/doc/_build/doctrees/contents.doctree
new file mode 100644
index 0000000..1cae842
Binary files /dev/null and b/doc/_build/doctrees/contents.doctree differ
diff --git a/doc/_build/doctrees/decorators/active_if.doctree b/doc/_build/doctrees/decorators/active_if.doctree
new file mode 100644
index 0000000..dc70fc8
Binary files /dev/null and b/doc/_build/doctrees/decorators/active_if.doctree differ
diff --git a/doc/_build/doctrees/decorators/check_if_uptodate.doctree b/doc/_build/doctrees/decorators/check_if_uptodate.doctree
new file mode 100644
index 0000000..64dd636
Binary files /dev/null and b/doc/_build/doctrees/decorators/check_if_uptodate.doctree differ
diff --git a/doc/_build/doctrees/decorators/collate.doctree b/doc/_build/doctrees/decorators/collate.doctree
new file mode 100644
index 0000000..52c84c3
Binary files /dev/null and b/doc/_build/doctrees/decorators/collate.doctree differ
diff --git a/doc/_build/doctrees/decorators/collate_ex.doctree b/doc/_build/doctrees/decorators/collate_ex.doctree
new file mode 100644
index 0000000..f555890
Binary files /dev/null and b/doc/_build/doctrees/decorators/collate_ex.doctree differ
diff --git a/doc/_build/doctrees/decorators/combinations.doctree b/doc/_build/doctrees/decorators/combinations.doctree
new file mode 100644
index 0000000..aef231a
Binary files /dev/null and b/doc/_build/doctrees/decorators/combinations.doctree differ
diff --git a/doc/_build/doctrees/decorators/combinations_with_replacement.doctree b/doc/_build/doctrees/decorators/combinations_with_replacement.doctree
new file mode 100644
index 0000000..f24a6e7
Binary files /dev/null and b/doc/_build/doctrees/decorators/combinations_with_replacement.doctree differ
diff --git a/doc/_build/doctrees/decorators/decorators.doctree b/doc/_build/doctrees/decorators/decorators.doctree
new file mode 100644
index 0000000..a820786
Binary files /dev/null and b/doc/_build/doctrees/decorators/decorators.doctree differ
diff --git a/doc/_build/doctrees/decorators/files.doctree b/doc/_build/doctrees/decorators/files.doctree
new file mode 100644
index 0000000..a7186d3
Binary files /dev/null and b/doc/_build/doctrees/decorators/files.doctree differ
diff --git a/doc/_build/doctrees/decorators/files_ex.doctree b/doc/_build/doctrees/decorators/files_ex.doctree
new file mode 100644
index 0000000..a0754d1
Binary files /dev/null and b/doc/_build/doctrees/decorators/files_ex.doctree differ
diff --git a/doc/_build/doctrees/decorators/files_re.doctree b/doc/_build/doctrees/decorators/files_re.doctree
new file mode 100644
index 0000000..6c775bd
Binary files /dev/null and b/doc/_build/doctrees/decorators/files_re.doctree differ
diff --git a/doc/_build/doctrees/decorators/follows.doctree b/doc/_build/doctrees/decorators/follows.doctree
new file mode 100644
index 0000000..ecc0040
Binary files /dev/null and b/doc/_build/doctrees/decorators/follows.doctree differ
diff --git a/doc/_build/doctrees/decorators/graphviz.doctree b/doc/_build/doctrees/decorators/graphviz.doctree
new file mode 100644
index 0000000..4682e96
Binary files /dev/null and b/doc/_build/doctrees/decorators/graphviz.doctree differ
diff --git a/doc/_build/doctrees/decorators/indicator_objects.doctree b/doc/_build/doctrees/decorators/indicator_objects.doctree
new file mode 100644
index 0000000..b8ea5d1
Binary files /dev/null and b/doc/_build/doctrees/decorators/indicator_objects.doctree differ
diff --git a/doc/_build/doctrees/decorators/jobs_limit.doctree b/doc/_build/doctrees/decorators/jobs_limit.doctree
new file mode 100644
index 0000000..50345f8
Binary files /dev/null and b/doc/_build/doctrees/decorators/jobs_limit.doctree differ
diff --git a/doc/_build/doctrees/decorators/merge.doctree b/doc/_build/doctrees/decorators/merge.doctree
new file mode 100644
index 0000000..ea6cb8e
Binary files /dev/null and b/doc/_build/doctrees/decorators/merge.doctree differ
diff --git a/doc/_build/doctrees/decorators/mkdir.doctree b/doc/_build/doctrees/decorators/mkdir.doctree
new file mode 100644
index 0000000..5c26a2a
Binary files /dev/null and b/doc/_build/doctrees/decorators/mkdir.doctree differ
diff --git a/doc/_build/doctrees/decorators/originate.doctree b/doc/_build/doctrees/decorators/originate.doctree
new file mode 100644
index 0000000..efe2bee
Binary files /dev/null and b/doc/_build/doctrees/decorators/originate.doctree differ
diff --git a/doc/_build/doctrees/decorators/parallel.doctree b/doc/_build/doctrees/decorators/parallel.doctree
new file mode 100644
index 0000000..c2c84f1
Binary files /dev/null and b/doc/_build/doctrees/decorators/parallel.doctree differ
diff --git a/doc/_build/doctrees/decorators/permutations.doctree b/doc/_build/doctrees/decorators/permutations.doctree
new file mode 100644
index 0000000..870c60b
Binary files /dev/null and b/doc/_build/doctrees/decorators/permutations.doctree differ
diff --git a/doc/_build/doctrees/decorators/posttask.doctree b/doc/_build/doctrees/decorators/posttask.doctree
new file mode 100644
index 0000000..ebc3915
Binary files /dev/null and b/doc/_build/doctrees/decorators/posttask.doctree differ
diff --git a/doc/_build/doctrees/decorators/product.doctree b/doc/_build/doctrees/decorators/product.doctree
new file mode 100644
index 0000000..750a26c
Binary files /dev/null and b/doc/_build/doctrees/decorators/product.doctree differ
diff --git a/doc/_build/doctrees/decorators/split.doctree b/doc/_build/doctrees/decorators/split.doctree
new file mode 100644
index 0000000..c8a148e
Binary files /dev/null and b/doc/_build/doctrees/decorators/split.doctree differ
diff --git a/doc/_build/doctrees/decorators/subdivide.doctree b/doc/_build/doctrees/decorators/subdivide.doctree
new file mode 100644
index 0000000..835c906
Binary files /dev/null and b/doc/_build/doctrees/decorators/subdivide.doctree differ
diff --git a/doc/_build/doctrees/decorators/transform.doctree b/doc/_build/doctrees/decorators/transform.doctree
new file mode 100644
index 0000000..99452e0
Binary files /dev/null and b/doc/_build/doctrees/decorators/transform.doctree differ
diff --git a/doc/_build/doctrees/decorators/transform_ex.doctree b/doc/_build/doctrees/decorators/transform_ex.doctree
new file mode 100644
index 0000000..f9e50b8
Binary files /dev/null and b/doc/_build/doctrees/decorators/transform_ex.doctree differ
diff --git a/doc/_build/doctrees/design.doctree b/doc/_build/doctrees/design.doctree
new file mode 100644
index 0000000..c04c6f4
Binary files /dev/null and b/doc/_build/doctrees/design.doctree differ
diff --git a/doc/_build/doctrees/drmaa_wrapper_functions.doctree b/doc/_build/doctrees/drmaa_wrapper_functions.doctree
new file mode 100644
index 0000000..66b94ea
Binary files /dev/null and b/doc/_build/doctrees/drmaa_wrapper_functions.doctree differ
diff --git a/doc/_build/doctrees/environment.pickle b/doc/_build/doctrees/environment.pickle
new file mode 100644
index 0000000..fcf7ac5
Binary files /dev/null and b/doc/_build/doctrees/environment.pickle differ
diff --git a/doc/_build/doctrees/examples/bioinformatics/index.doctree b/doc/_build/doctrees/examples/bioinformatics/index.doctree
new file mode 100644
index 0000000..58eac1e
Binary files /dev/null and b/doc/_build/doctrees/examples/bioinformatics/index.doctree differ
diff --git a/doc/_build/doctrees/examples/bioinformatics/part1_code.doctree b/doc/_build/doctrees/examples/bioinformatics/part1_code.doctree
new file mode 100644
index 0000000..9d26f12
Binary files /dev/null and b/doc/_build/doctrees/examples/bioinformatics/part1_code.doctree differ
diff --git a/doc/_build/doctrees/examples/bioinformatics/part2.doctree b/doc/_build/doctrees/examples/bioinformatics/part2.doctree
new file mode 100644
index 0000000..0697d16
Binary files /dev/null and b/doc/_build/doctrees/examples/bioinformatics/part2.doctree differ
diff --git a/doc/_build/doctrees/examples/bioinformatics/part2_code.doctree b/doc/_build/doctrees/examples/bioinformatics/part2_code.doctree
new file mode 100644
index 0000000..39eb3eb
Binary files /dev/null and b/doc/_build/doctrees/examples/bioinformatics/part2_code.doctree differ
diff --git a/doc/_build/doctrees/examples/paired_end_data.py.doctree b/doc/_build/doctrees/examples/paired_end_data.py.doctree
new file mode 100644
index 0000000..2232453
Binary files /dev/null and b/doc/_build/doctrees/examples/paired_end_data.py.doctree differ
diff --git a/doc/_build/doctrees/faq.doctree b/doc/_build/doctrees/faq.doctree
new file mode 100644
index 0000000..510b98d
Binary files /dev/null and b/doc/_build/doctrees/faq.doctree differ
diff --git a/doc/_build/doctrees/gallery.doctree b/doc/_build/doctrees/gallery.doctree
new file mode 100644
index 0000000..70a44cd
Binary files /dev/null and b/doc/_build/doctrees/gallery.doctree differ
diff --git a/doc/_build/doctrees/glossary.doctree b/doc/_build/doctrees/glossary.doctree
new file mode 100644
index 0000000..26fef6b
Binary files /dev/null and b/doc/_build/doctrees/glossary.doctree differ
diff --git a/doc/_build/doctrees/history.doctree b/doc/_build/doctrees/history.doctree
new file mode 100644
index 0000000..f20f456
Binary files /dev/null and b/doc/_build/doctrees/history.doctree differ
diff --git a/doc/_build/doctrees/implementation_notes.doctree b/doc/_build/doctrees/implementation_notes.doctree
new file mode 100644
index 0000000..2d2f221
Binary files /dev/null and b/doc/_build/doctrees/implementation_notes.doctree differ
diff --git a/doc/_build/doctrees/installation.doctree b/doc/_build/doctrees/installation.doctree
new file mode 100644
index 0000000..0827cad
Binary files /dev/null and b/doc/_build/doctrees/installation.doctree differ
diff --git a/doc/_build/doctrees/pipeline_functions.doctree b/doc/_build/doctrees/pipeline_functions.doctree
new file mode 100644
index 0000000..e1b73fa
Binary files /dev/null and b/doc/_build/doctrees/pipeline_functions.doctree differ
diff --git a/doc/_build/doctrees/proxy_logger.doctree b/doc/_build/doctrees/proxy_logger.doctree
new file mode 100644
index 0000000..06439ac
Binary files /dev/null and b/doc/_build/doctrees/proxy_logger.doctree differ
diff --git a/doc/_build/doctrees/recipes.doctree b/doc/_build/doctrees/recipes.doctree
new file mode 100644
index 0000000..c02ea8c
Binary files /dev/null and b/doc/_build/doctrees/recipes.doctree differ
diff --git a/doc/_build/doctrees/refactoring_ruffus_notes.doctree b/doc/_build/doctrees/refactoring_ruffus_notes.doctree
new file mode 100644
index 0000000..0a21e9b
Binary files /dev/null and b/doc/_build/doctrees/refactoring_ruffus_notes.doctree differ
diff --git a/doc/_build/doctrees/task.doctree b/doc/_build/doctrees/task.doctree
new file mode 100644
index 0000000..bb24a2f
Binary files /dev/null and b/doc/_build/doctrees/task.doctree differ
diff --git a/doc/_build/doctrees/todo.doctree b/doc/_build/doctrees/todo.doctree
new file mode 100644
index 0000000..feeadce
Binary files /dev/null and b/doc/_build/doctrees/todo.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/advanced_transform.doctree b/doc/_build/doctrees/tutorials/manual/advanced_transform.doctree
new file mode 100644
index 0000000..63aeb26
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/advanced_transform.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/check_if_uptodate.doctree b/doc/_build/doctrees/tutorials/manual/check_if_uptodate.doctree
new file mode 100644
index 0000000..19193f1
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/check_if_uptodate.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/collate.doctree b/doc/_build/doctrees/tutorials/manual/collate.doctree
new file mode 100644
index 0000000..8915349
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/collate.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/dependencies.doctree b/doc/_build/doctrees/tutorials/manual/dependencies.doctree
new file mode 100644
index 0000000..ea6a6af
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/dependencies.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/dependencies_code.doctree b/doc/_build/doctrees/tutorials/manual/dependencies_code.doctree
new file mode 100644
index 0000000..4c60170
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/dependencies_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/exceptions.doctree b/doc/_build/doctrees/tutorials/manual/exceptions.doctree
new file mode 100644
index 0000000..a0f7d7b
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/exceptions.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/files.doctree b/doc/_build/doctrees/tutorials/manual/files.doctree
new file mode 100644
index 0000000..7242a67
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/files.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/files_re.doctree b/doc/_build/doctrees/tutorials/manual/files_re.doctree
new file mode 100644
index 0000000..18c8a8c
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/files_re.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/follows.doctree b/doc/_build/doctrees/tutorials/manual/follows.doctree
new file mode 100644
index 0000000..b4db9c3
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/follows.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/jobs_limit.doctree b/doc/_build/doctrees/tutorials/manual/jobs_limit.doctree
new file mode 100644
index 0000000..27c5632
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/jobs_limit.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/logging.doctree b/doc/_build/doctrees/tutorials/manual/logging.doctree
new file mode 100644
index 0000000..efca5ed
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/logging.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/logging_code.doctree b/doc/_build/doctrees/tutorials/manual/logging_code.doctree
new file mode 100644
index 0000000..7fcf576
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/logging_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/manual_code.doctree b/doc/_build/doctrees/tutorials/manual/manual_code.doctree
new file mode 100644
index 0000000..4ca6fc2
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/manual_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/manual_contents.doctree b/doc/_build/doctrees/tutorials/manual/manual_contents.doctree
new file mode 100644
index 0000000..278a2c7
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/manual_contents.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/manual_introduction.doctree b/doc/_build/doctrees/tutorials/manual/manual_introduction.doctree
new file mode 100644
index 0000000..849230e
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/manual_introduction.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/merge.doctree b/doc/_build/doctrees/tutorials/manual/merge.doctree
new file mode 100644
index 0000000..40920aa
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/merge.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/onthefly.doctree b/doc/_build/doctrees/tutorials/manual/onthefly.doctree
new file mode 100644
index 0000000..ccf0a5d
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/onthefly.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/onthefly_code.doctree b/doc/_build/doctrees/tutorials/manual/onthefly_code.doctree
new file mode 100644
index 0000000..57e9769
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/onthefly_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/parallel.doctree b/doc/_build/doctrees/tutorials/manual/parallel.doctree
new file mode 100644
index 0000000..4a3f6e7
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/parallel.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/parallel_processing.doctree b/doc/_build/doctrees/tutorials/manual/parallel_processing.doctree
new file mode 100644
index 0000000..327fefd
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/parallel_processing.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/posttask.doctree b/doc/_build/doctrees/tutorials/manual/posttask.doctree
new file mode 100644
index 0000000..eb6c45b
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/posttask.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/split.doctree b/doc/_build/doctrees/tutorials/manual/split.doctree
new file mode 100644
index 0000000..dd79aad
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/split.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/tasks_and_globs_in_inputs.doctree b/doc/_build/doctrees/tutorials/manual/tasks_and_globs_in_inputs.doctree
new file mode 100644
index 0000000..e37dcd6
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/tasks_and_globs_in_inputs.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/tasks_as_recipes.doctree b/doc/_build/doctrees/tutorials/manual/tasks_as_recipes.doctree
new file mode 100644
index 0000000..0a2bf6a
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/tasks_as_recipes.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/tracing_pipeline_parameters.doctree b/doc/_build/doctrees/tutorials/manual/tracing_pipeline_parameters.doctree
new file mode 100644
index 0000000..60780d7
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/tracing_pipeline_parameters.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/transform.doctree b/doc/_build/doctrees/tutorials/manual/transform.doctree
new file mode 100644
index 0000000..f49b431
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/transform.doctree differ
diff --git a/doc/_build/doctrees/tutorials/manual/transform_code.doctree b/doc/_build/doctrees/tutorials/manual/transform_code.doctree
new file mode 100644
index 0000000..0a66cda
Binary files /dev/null and b/doc/_build/doctrees/tutorials/manual/transform_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/active_if.doctree b/doc/_build/doctrees/tutorials/new_tutorial/active_if.doctree
new file mode 100644
index 0000000..6384caa
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/active_if.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/check_if_uptodate.doctree b/doc/_build/doctrees/tutorials/new_tutorial/check_if_uptodate.doctree
new file mode 100644
index 0000000..579d258
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/check_if_uptodate.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/checkpointing.doctree b/doc/_build/doctrees/tutorials/new_tutorial/checkpointing.doctree
new file mode 100644
index 0000000..186d489
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/checkpointing.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/checkpointing_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/checkpointing_code.doctree
new file mode 100644
index 0000000..37deca0
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/checkpointing_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/combinatorics.doctree b/doc/_build/doctrees/tutorials/new_tutorial/combinatorics.doctree
new file mode 100644
index 0000000..0614225
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/combinatorics.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/combinatorics_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/combinatorics_code.doctree
new file mode 100644
index 0000000..da54ee0
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/combinatorics_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/command_line.doctree b/doc/_build/doctrees/tutorials/new_tutorial/command_line.doctree
new file mode 100644
index 0000000..8212a84
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/command_line.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/decorators_compendium.doctree b/doc/_build/doctrees/tutorials/new_tutorial/decorators_compendium.doctree
new file mode 100644
index 0000000..a7439f1
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/decorators_compendium.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/dependencies.doctree b/doc/_build/doctrees/tutorials/new_tutorial/dependencies.doctree
new file mode 100644
index 0000000..35047a0
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/dependencies.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/deprecated_files.doctree b/doc/_build/doctrees/tutorials/new_tutorial/deprecated_files.doctree
new file mode 100644
index 0000000..88e2056
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/deprecated_files.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/deprecated_files_re.doctree b/doc/_build/doctrees/tutorials/new_tutorial/deprecated_files_re.doctree
new file mode 100644
index 0000000..e1c7d57
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/deprecated_files_re.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/exceptions.doctree b/doc/_build/doctrees/tutorials/new_tutorial/exceptions.doctree
new file mode 100644
index 0000000..6fd585b
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/exceptions.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/flowchart_colours.doctree b/doc/_build/doctrees/tutorials/new_tutorial/flowchart_colours.doctree
new file mode 100644
index 0000000..8920902
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/flowchart_colours.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/flowchart_colours_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/flowchart_colours_code.doctree
new file mode 100644
index 0000000..110f472
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/flowchart_colours_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/inputs.doctree b/doc/_build/doctrees/tutorials/new_tutorial/inputs.doctree
new file mode 100644
index 0000000..12295dc
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/inputs.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/inputs_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/inputs_code.doctree
new file mode 100644
index 0000000..3cfbfa4
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/inputs_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/introduction.doctree b/doc/_build/doctrees/tutorials/new_tutorial/introduction.doctree
new file mode 100644
index 0000000..ea89e15
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/introduction.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/introduction_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/introduction_code.doctree
new file mode 100644
index 0000000..744c2fc
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/introduction_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/list_of_ruffus_names.doctree b/doc/_build/doctrees/tutorials/new_tutorial/list_of_ruffus_names.doctree
new file mode 100644
index 0000000..c703d55
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/list_of_ruffus_names.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/logging.doctree b/doc/_build/doctrees/tutorials/new_tutorial/logging.doctree
new file mode 100644
index 0000000..5dc397a
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/logging.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/logging_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/logging_code.doctree
new file mode 100644
index 0000000..ec5db4b
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/logging_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/manual_contents.doctree b/doc/_build/doctrees/tutorials/new_tutorial/manual_contents.doctree
new file mode 100644
index 0000000..31a390c
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/manual_contents.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/merge.doctree b/doc/_build/doctrees/tutorials/new_tutorial/merge.doctree
new file mode 100644
index 0000000..de279f0
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/merge.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/merge_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/merge_code.doctree
new file mode 100644
index 0000000..3a59228
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/merge_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/mkdir.doctree b/doc/_build/doctrees/tutorials/new_tutorial/mkdir.doctree
new file mode 100644
index 0000000..52c2a5d
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/mkdir.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/mkdir_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/mkdir_code.doctree
new file mode 100644
index 0000000..0011366
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/mkdir_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/multiprocessing.doctree b/doc/_build/doctrees/tutorials/new_tutorial/multiprocessing.doctree
new file mode 100644
index 0000000..da353ff
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/multiprocessing.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/multiprocessing_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/multiprocessing_code.doctree
new file mode 100644
index 0000000..3f02595
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/multiprocessing_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/onthefly.doctree b/doc/_build/doctrees/tutorials/new_tutorial/onthefly.doctree
new file mode 100644
index 0000000..4fab074
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/onthefly.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/onthefly_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/onthefly_code.doctree
new file mode 100644
index 0000000..04e5ce2
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/onthefly_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/originate.doctree b/doc/_build/doctrees/tutorials/new_tutorial/originate.doctree
new file mode 100644
index 0000000..e9662bd
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/originate.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/originate_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/originate_code.doctree
new file mode 100644
index 0000000..7657d10
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/originate_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/output_file_names.doctree b/doc/_build/doctrees/tutorials/new_tutorial/output_file_names.doctree
new file mode 100644
index 0000000..d8db91e
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/output_file_names.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/output_file_names_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/output_file_names_code.doctree
new file mode 100644
index 0000000..d482777
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/output_file_names_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/parallel.doctree b/doc/_build/doctrees/tutorials/new_tutorial/parallel.doctree
new file mode 100644
index 0000000..467491d
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/parallel.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout.doctree b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout.doctree
new file mode 100644
index 0000000..3cc5904
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_code.doctree
new file mode 100644
index 0000000..1991da7
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_graph.doctree b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_graph.doctree
new file mode 100644
index 0000000..e934347
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_graph.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_graph_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_graph_code.doctree
new file mode 100644
index 0000000..004170c
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/pipeline_printout_graph_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/posttask.doctree b/doc/_build/doctrees/tutorials/new_tutorial/posttask.doctree
new file mode 100644
index 0000000..fb050ef
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/posttask.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/split.doctree b/doc/_build/doctrees/tutorials/new_tutorial/split.doctree
new file mode 100644
index 0000000..ae0d0f4
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/split.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/split_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/split_code.doctree
new file mode 100644
index 0000000..246e2b3
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/split_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/subdivide_collate.doctree b/doc/_build/doctrees/tutorials/new_tutorial/subdivide_collate.doctree
new file mode 100644
index 0000000..e6709e4
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/subdivide_collate.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/subdivide_collate_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/subdivide_collate_code.doctree
new file mode 100644
index 0000000..3f48d27
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/subdivide_collate_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/transform.doctree b/doc/_build/doctrees/tutorials/new_tutorial/transform.doctree
new file mode 100644
index 0000000..f144023
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/transform.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/transform_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/transform_code.doctree
new file mode 100644
index 0000000..2234546
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/transform_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/transform_in_parallel.doctree b/doc/_build/doctrees/tutorials/new_tutorial/transform_in_parallel.doctree
new file mode 100644
index 0000000..1511808
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/transform_in_parallel.doctree differ
diff --git a/doc/_build/doctrees/tutorials/new_tutorial/transform_in_parallel_code.doctree b/doc/_build/doctrees/tutorials/new_tutorial/transform_in_parallel_code.doctree
new file mode 100644
index 0000000..19fbe1f
Binary files /dev/null and b/doc/_build/doctrees/tutorials/new_tutorial/transform_in_parallel_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/simple_tutorial.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/simple_tutorial.doctree
new file mode 100644
index 0000000..0a1b3e2
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/simple_tutorial.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/simple_tutorial_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/simple_tutorial_code.doctree
new file mode 100644
index 0000000..4bfc004
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/simple_tutorial_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step1_follows.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step1_follows.doctree
new file mode 100644
index 0000000..a815bfa
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step1_follows.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step2.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step2.doctree
new file mode 100644
index 0000000..a0862ef
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step2.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step2_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step2_code.doctree
new file mode 100644
index 0000000..cce8939
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step2_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step3_run_pipeline.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step3_run_pipeline.doctree
new file mode 100644
index 0000000..f589c26
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step3_run_pipeline.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step3_run_pipeline_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step3_run_pipeline_code.doctree
new file mode 100644
index 0000000..2e1c449
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step3_run_pipeline_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step4_run_pipeline_graphically.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step4_run_pipeline_graphically.doctree
new file mode 100644
index 0000000..9e03541
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step4_run_pipeline_graphically.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.doctree
new file mode 100644
index 0000000..59aa42a
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step5_split.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step5_split.doctree
new file mode 100644
index 0000000..947409e
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step5_split.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step5_split_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step5_split_code.doctree
new file mode 100644
index 0000000..1be9f24
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step5_split_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step6_transform.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step6_transform.doctree
new file mode 100644
index 0000000..0a1e112
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step6_transform.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step6_transform_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step6_transform_code.doctree
new file mode 100644
index 0000000..3898372
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step6_transform_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step7_merge.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step7_merge.doctree
new file mode 100644
index 0000000..c7b30d0
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step7_merge.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step7_merge_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step7_merge_code.doctree
new file mode 100644
index 0000000..924f724
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step7_merge_code.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step8_posttask.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step8_posttask.doctree
new file mode 100644
index 0000000..d3fefa8
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step8_posttask.doctree differ
diff --git a/doc/_build/doctrees/tutorials/simple_tutorial/step8_posttask_code.doctree b/doc/_build/doctrees/tutorials/simple_tutorial/step8_posttask_code.doctree
new file mode 100644
index 0000000..18d9a31
Binary files /dev/null and b/doc/_build/doctrees/tutorials/simple_tutorial/step8_posttask_code.doctree differ
diff --git a/doc/_build/doctrees/why_ruffus.doctree b/doc/_build/doctrees/why_ruffus.doctree
new file mode 100644
index 0000000..5c10aa6
Binary files /dev/null and b/doc/_build/doctrees/why_ruffus.doctree differ
diff --git a/doc/_build/html/.buildinfo b/doc/_build/html/.buildinfo
new file mode 100644
index 0000000..d8efc41
--- /dev/null
+++ b/doc/_build/html/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: dcf33159fa537344d6e233d8d96e8dd5
+tags: a205e9ed8462ae86fdd2f73488852ba9
diff --git a/doc/_build/html/BingSiteAuth.xml b/doc/_build/html/BingSiteAuth.xml
new file mode 100644
index 0000000..1c214f5
--- /dev/null
+++ b/doc/_build/html/BingSiteAuth.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<users>
+ <user>EA4177F8D753421998FE988997062AFA</user>
+</users>
\ No newline at end of file
diff --git a/doc/_build/html/_downloads/flowchart_colour_schemes.svg b/doc/_build/html/_downloads/flowchart_colour_schemes.svg
new file mode 100644
index 0000000..0b4d872
--- /dev/null
+++ b/doc/_build/html/_downloads/flowchart_colour_schemes.svg
@@ -0,0 +1,895 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (lg) Leo Goodstadt -->
+<!-- Title: Colour schemes Pages: 1 -->
+<svg width="792pt" height="283pt"
+ viewBox="0.00 0.00 792.00 283.32" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.402439 0.402439) rotate(0) translate(4 700)">
+<title>Colour schemes</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-700 1964,-700 1964,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clusterkey0</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="8,-16 8,-688 244,-688 244,-16 8,-16"/>
+<text text-anchor="middle" x="126" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 0</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey1</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="252,-16 252,-688 488,-688 488,-16 252,-16"/>
+<text text-anchor="middle" x="370" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 1</text>
+</g>
+<g id="cluster4" class="cluster"><title>clusterkey2</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="496,-16 496,-688 732,-688 732,-16 496,-16"/>
+<text text-anchor="middle" x="614" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 2</text>
+</g>
+<g id="cluster5" class="cluster"><title>clusterkey3</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="740,-16 740,-688 976,-688 976,-16 740,-16"/>
+<text text-anchor="middle" x="858" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 3</text>
+</g>
+<g id="cluster6" class="cluster"><title>clusterkey4</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="984,-16 984,-688 1220,-688 1220,-16 984,-16"/>
+<text text-anchor="middle" x="1102" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 4</text>
+</g>
+<g id="cluster7" class="cluster"><title>clusterkey5</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1228,-16 1228,-688 1464,-688 1464,-16 1228,-16"/>
+<text text-anchor="middle" x="1346" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 5</text>
+</g>
+<g id="cluster8" class="cluster"><title>clusterkey6</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1472,-16 1472,-688 1708,-688 1708,-16 1472,-16"/>
+<text text-anchor="middle" x="1590" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 6</text>
+</g>
+<g id="cluster9" class="cluster"><title>clusterkey7</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1716,-16 1716,-688 1952,-688 1952,-16 1716,-16"/>
+<text text-anchor="middle" x="1834" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 7</text>
+</g>
+<!-- k1_0 -->
+<g id="node2" class="node"><title>k1_0</title>
+<polygon style="fill:#ff3232;stroke:white;" points="194,-637.5 62,-637.5 58,-633.5 58,-586.5 190,-586.5 194,-590.5 194,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="190,-633.5 58,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="190,-633.5 190,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="190,-633.5 194,-637.5 "/>
+<text text-anchor="middle" x="126" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_0 -->
+<g id="node3" class="node"><title>k2_0</title>
+<polygon style="fill:white;stroke:gray;" points="193,-563.5 63,-563.5 59,-559.5 59,-512.5 189,-512.5 193,-516.5 193,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="189,-559.5 59,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="189,-559.5 189,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="189,-559.5 193,-563.5 "/>
+<text text-anchor="middle" x="126" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_0->k2_0 -->
+<g id="edge3" class="edge"><title>k1_0->k2_0</title>
+<path style="fill:none;stroke:#ff3232;" d="M120,-586C119,-582 119,-578 119,-574"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="122.488,-574.299 120,-564 115.522,-573.602 122.488,-574.299"/>
+</g>
+<!-- k2_0->k1_0 -->
+<g id="edge5" class="edge"><title>k2_0->k1_0</title>
+<path style="fill:none;stroke:#ff3232;" d="M132,-564C133,-568 133,-572 133,-576"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="129.512,-575.701 132,-586 136.478,-576.398 129.512,-575.701"/>
+</g>
+<!-- k3_0 -->
+<g id="node6" class="node"><title>k3_0</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="202,-489.5 54,-489.5 50,-485.5 50,-438.5 198,-438.5 202,-442.5 202,-489.5"/>
+<polyline style="fill:none;stroke:#006000;" points="198,-485.5 50,-485.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="198,-485.5 198,-438.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="198,-485.5 202,-489.5 "/>
+<text text-anchor="middle" x="126" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date task</text>
+</g>
+<!-- k2_0->k3_0 -->
+<g id="edge7" class="edge"><title>k2_0->k3_0</title>
+<path style="fill:none;stroke:gray;" d="M126,-512C126,-508 126,-504 126,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="129.5,-500 126,-490 122.5,-500 129.5,-500"/>
+</g>
+<!-- k4_0 -->
+<g id="node8" class="node"><title>k4_0</title>
+<polygon style="fill:none;stroke:black;" points="236,-415.5 20,-415.5 16,-411.5 16,-364.5 232,-364.5 236,-368.5 236,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="232,-411.5 16,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="232,-411.5 232,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="232,-411.5 236,-415.5 "/>
+<text text-anchor="middle" x="126" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_0->k4_0 -->
+<g id="edge9" class="edge"><title>k3_0->k4_0</title>
+<path style="fill:none;stroke:gray;" d="M126,-438C126,-434 126,-430 126,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="129.5,-426 126,-416 122.5,-426 129.5,-426"/>
+</g>
+<!-- k5_0 -->
+<g id="node10" class="node"><title>k5_0</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="184,-341.5 72,-341.5 68,-337.5 68,-290.5 180,-290.5 184,-294.5 184,-341.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="180,-337.5 68,-337.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="180,-337.5 180,-290.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="180,-337.5 184,-341.5 "/>
+<text text-anchor="middle" x="126" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Task to run</text>
+</g>
+<!-- k4_0->k5_0 -->
+<g id="edge11" class="edge"><title>k4_0->k5_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M126,-364C126,-360 126,-356 126,-352"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="129.5,-352 126,-342 122.5,-352 129.5,-352"/>
+</g>
+<!-- k6_0 -->
+<g id="node12" class="node"><title>k6_0</title>
+<polygon style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="202,-268 54,-268 50,-264 50,-194 198,-194 202,-198 202,-268"/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="198,-264 50,-264 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="198,-264 198,-194 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="198,-264 202,-268 "/>
+<text text-anchor="middle" x="126" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Up-to-date task</text>
+<text text-anchor="middle" x="126" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">forced to rerun</text>
+</g>
+<!-- k5_0->k6_0 -->
+<g id="edge13" class="edge"><title>k5_0->k6_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M126,-290C126,-286 126,-282 126,-278"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="129.5,-278 126,-268 122.5,-278 129.5,-278"/>
+</g>
+<!-- k7_0 -->
+<g id="node14" class="node"><title>k7_0</title>
+<polygon style="fill:#efa03b;stroke:#006000;" points="186,-172 70,-172 66,-168 66,-98 182,-98 186,-102 186,-172"/>
+<polyline style="fill:none;stroke:#006000;" points="182,-168 66,-168 "/>
+<polyline style="fill:none;stroke:#006000;" points="182,-168 182,-98 "/>
+<polyline style="fill:none;stroke:#006000;" points="182,-168 186,-172 "/>
+<text text-anchor="middle" x="126" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date</text>
+<text text-anchor="middle" x="126" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Final target</text>
+</g>
+<!-- k6_0->k7_0 -->
+<g id="edge15" class="edge"><title>k6_0->k7_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M126,-194C126,-190 126,-186 126,-182"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="129.5,-182 126,-172 122.5,-182 129.5,-182"/>
+</g>
+<!-- k8_0 -->
+<g id="node16" class="node"><title>k8_0</title>
+<polygon style="fill:#efa03b;stroke:black;" points="186,-75.5 70,-75.5 66,-71.5 66,-24.5 182,-24.5 186,-28.5 186,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="182,-71.5 66,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="182,-71.5 182,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="182,-71.5 186,-75.5 "/>
+<text text-anchor="middle" x="126" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_0->k8_0 -->
+<g id="edge17" class="edge"><title>k7_0->k8_0</title>
+<path style="fill:none;stroke:gray;" d="M126,-98C126,-94 126,-90 126,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="129.5,-86 126,-76 122.5,-86 129.5,-86"/>
+</g>
+<!-- k1_1 -->
+<g id="node19" class="node"><title>k1_1</title>
+<polygon style="fill:#d93611;stroke:white;" points="438,-637.5 306,-637.5 302,-633.5 302,-586.5 434,-586.5 438,-590.5 438,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="434,-633.5 302,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="434,-633.5 434,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="434,-633.5 438,-637.5 "/>
+<text text-anchor="middle" x="370" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_1 -->
+<g id="node20" class="node"><title>k2_1</title>
+<polygon style="fill:white;stroke:gray;" points="437,-563.5 307,-563.5 303,-559.5 303,-512.5 433,-512.5 437,-516.5 437,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="433,-559.5 303,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="433,-559.5 433,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="433,-559.5 437,-563.5 "/>
+<text text-anchor="middle" x="370" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge20" class="edge"><title>k1_1->k2_1</title>
+<path style="fill:none;stroke:#d93611;" d="M364,-586C363,-582 363,-578 363,-574"/>
+<polygon style="fill:#d93611;stroke:#d93611;" points="366.488,-574.299 364,-564 359.522,-573.602 366.488,-574.299"/>
+</g>
+<!-- k2_1->k1_1 -->
+<g id="edge22" class="edge"><title>k2_1->k1_1</title>
+<path style="fill:none;stroke:#d93611;" d="M376,-564C377,-568 377,-572 377,-576"/>
+<polygon style="fill:#d93611;stroke:#d93611;" points="373.512,-575.701 376,-586 380.478,-576.398 373.512,-575.701"/>
+</g>
+<!-- k3_1 -->
+<g id="node23" class="node"><title>k3_1</title>
+<polygon style="fill:#9ed983;stroke:#4b8c2e;" points="446,-489.5 298,-489.5 294,-485.5 294,-438.5 442,-438.5 446,-442.5 446,-489.5"/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="442,-485.5 294,-485.5 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="442,-485.5 442,-438.5 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="442,-485.5 446,-489.5 "/>
+<text text-anchor="middle" x="370" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4b8c2e;">Up-to-date task</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge24" class="edge"><title>k2_1->k3_1</title>
+<path style="fill:none;stroke:gray;" d="M370,-512C370,-508 370,-504 370,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="373.5,-500 370,-490 366.5,-500 373.5,-500"/>
+</g>
+<!-- k4_1 -->
+<g id="node25" class="node"><title>k4_1</title>
+<polygon style="fill:none;stroke:black;" points="480,-415.5 264,-415.5 260,-411.5 260,-364.5 476,-364.5 480,-368.5 480,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="476,-411.5 260,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="476,-411.5 476,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="476,-411.5 480,-415.5 "/>
+<text text-anchor="middle" x="370" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge26" class="edge"><title>k3_1->k4_1</title>
+<path style="fill:none;stroke:gray;" d="M370,-438C370,-434 370,-430 370,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="373.5,-426 370,-416 366.5,-426 373.5,-426"/>
+</g>
+<!-- k5_1 -->
+<g id="node27" class="node"><title>k5_1</title>
+<polygon style="fill:none;stroke:#000ddf;" points="428,-341.5 316,-341.5 312,-337.5 312,-290.5 424,-290.5 428,-294.5 428,-341.5"/>
+<polyline style="fill:none;stroke:#000ddf;" points="424,-337.5 312,-337.5 "/>
+<polyline style="fill:none;stroke:#000ddf;" points="424,-337.5 424,-290.5 "/>
+<polyline style="fill:none;stroke:#000ddf;" points="424,-337.5 428,-341.5 "/>
+<text text-anchor="middle" x="370" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">Task to run</text>
+</g>
+<!-- k4_1->k5_1 -->
+<g id="edge28" class="edge"><title>k4_1->k5_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M370,-364C370,-360 370,-356 370,-352"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="373.5,-352 370,-342 366.5,-352 373.5,-352"/>
+</g>
+<!-- k6_1 -->
+<g id="node29" class="node"><title>k6_1</title>
+<polygon style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="446,-268 298,-268 294,-264 294,-194 442,-194 446,-198 446,-268"/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="442,-264 294,-264 "/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="442,-264 442,-194 "/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="442,-264 446,-268 "/>
+<text text-anchor="middle" x="370" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">Up-to-date task</text>
+<text text-anchor="middle" x="370" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">forced to rerun</text>
+</g>
+<!-- k5_1->k6_1 -->
+<g id="edge30" class="edge"><title>k5_1->k6_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M370,-290C370,-286 370,-282 370,-278"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="373.5,-278 370,-268 366.5,-278 373.5,-278"/>
+</g>
+<!-- k7_1 -->
+<g id="node31" class="node"><title>k7_1</title>
+<polygon style="fill:#d98100;stroke:#d9d911;" points="430,-172 314,-172 310,-168 310,-98 426,-98 430,-102 430,-172"/>
+<polyline style="fill:none;stroke:#d9d911;" points="426,-168 310,-168 "/>
+<polyline style="fill:none;stroke:#d9d911;" points="426,-168 426,-98 "/>
+<polyline style="fill:none;stroke:#d9d911;" points="426,-168 430,-172 "/>
+<text text-anchor="middle" x="370" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#d9d911;">Up-to-date</text>
+<text text-anchor="middle" x="370" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#d9d911;">Final target</text>
+</g>
+<!-- k6_1->k7_1 -->
+<g id="edge32" class="edge"><title>k6_1->k7_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M370,-194C370,-190 370,-186 370,-182"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="373.5,-182 370,-172 366.5,-182 373.5,-182"/>
+</g>
+<!-- k8_1 -->
+<g id="node33" class="node"><title>k8_1</title>
+<polygon style="fill:#d98100;stroke:black;" points="430,-75.5 314,-75.5 310,-71.5 310,-24.5 426,-24.5 430,-28.5 430,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="426,-71.5 310,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="426,-71.5 426,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="426,-71.5 430,-75.5 "/>
+<text text-anchor="middle" x="370" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_1->k8_1 -->
+<g id="edge34" class="edge"><title>k7_1->k8_1</title>
+<path style="fill:none;stroke:gray;" d="M370,-98C370,-94 370,-90 370,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="373.5,-86 370,-76 366.5,-86 373.5,-86"/>
+</g>
+<!-- k1_2 -->
+<g id="node36" class="node"><title>k1_2</title>
+<polygon style="fill:#a54a64;stroke:white;" points="682,-637.5 550,-637.5 546,-633.5 546,-586.5 678,-586.5 682,-590.5 682,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="678,-633.5 546,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="678,-633.5 678,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="678,-633.5 682,-637.5 "/>
+<text text-anchor="middle" x="614" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_2 -->
+<g id="node37" class="node"><title>k2_2</title>
+<polygon style="fill:white;stroke:gray;" points="681,-563.5 551,-563.5 547,-559.5 547,-512.5 677,-512.5 681,-516.5 681,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="677,-559.5 547,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="677,-559.5 677,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="677,-559.5 681,-563.5 "/>
+<text text-anchor="middle" x="614" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_2->k2_2 -->
+<g id="edge37" class="edge"><title>k1_2->k2_2</title>
+<path style="fill:none;stroke:#a54a64;" d="M608,-586C607,-582 607,-578 607,-574"/>
+<polygon style="fill:#a54a64;stroke:#a54a64;" points="610.488,-574.299 608,-564 603.522,-573.602 610.488,-574.299"/>
+</g>
+<!-- k2_2->k1_2 -->
+<g id="edge39" class="edge"><title>k2_2->k1_2</title>
+<path style="fill:none;stroke:#a54a64;" d="M620,-564C621,-568 621,-572 621,-576"/>
+<polygon style="fill:#a54a64;stroke:#a54a64;" points="617.512,-575.701 620,-586 624.478,-576.398 617.512,-575.701"/>
+</g>
+<!-- k3_2 -->
+<g id="node40" class="node"><title>k3_2</title>
+<polygon style="fill:#99d1c1;stroke:#4a92a5;" points="690,-489.5 542,-489.5 538,-485.5 538,-438.5 686,-438.5 690,-442.5 690,-489.5"/>
+<polyline style="fill:none;stroke:#4a92a5;" points="686,-485.5 538,-485.5 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="686,-485.5 686,-438.5 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="686,-485.5 690,-489.5 "/>
+<text text-anchor="middle" x="614" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Up-to-date task</text>
+</g>
+<!-- k2_2->k3_2 -->
+<g id="edge41" class="edge"><title>k2_2->k3_2</title>
+<path style="fill:none;stroke:gray;" d="M614,-512C614,-508 614,-504 614,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="617.5,-500 614,-490 610.5,-500 617.5,-500"/>
+</g>
+<!-- k4_2 -->
+<g id="node42" class="node"><title>k4_2</title>
+<polygon style="fill:none;stroke:black;" points="724,-415.5 508,-415.5 504,-411.5 504,-364.5 720,-364.5 724,-368.5 724,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="720,-411.5 504,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="720,-411.5 720,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="720,-411.5 724,-415.5 "/>
+<text text-anchor="middle" x="614" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_2->k4_2 -->
+<g id="edge43" class="edge"><title>k3_2->k4_2</title>
+<path style="fill:none;stroke:gray;" d="M614,-438C614,-434 614,-430 614,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="617.5,-426 614,-416 610.5,-426 617.5,-426"/>
+</g>
+<!-- k5_2 -->
+<g id="node44" class="node"><title>k5_2</title>
+<polygon style="fill:none;stroke:#4a64a5;" points="672,-341.5 560,-341.5 556,-337.5 556,-290.5 668,-290.5 672,-294.5 672,-341.5"/>
+<polyline style="fill:none;stroke:#4a64a5;" points="668,-337.5 556,-337.5 "/>
+<polyline style="fill:none;stroke:#4a64a5;" points="668,-337.5 668,-290.5 "/>
+<polyline style="fill:none;stroke:#4a64a5;" points="668,-337.5 672,-341.5 "/>
+<text text-anchor="middle" x="614" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">Task to run</text>
+</g>
+<!-- k4_2->k5_2 -->
+<g id="edge45" class="edge"><title>k4_2->k5_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M614,-364C614,-360 614,-356 614,-352"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="617.5,-352 614,-342 610.5,-352 617.5,-352"/>
+</g>
+<!-- k6_2 -->
+<g id="node46" class="node"><title>k6_2</title>
+<polygon style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="690,-268 542,-268 538,-264 538,-194 686,-194 690,-198 690,-268"/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="686,-264 538,-264 "/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="686,-264 686,-194 "/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="686,-264 690,-268 "/>
+<text text-anchor="middle" x="614" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">Up-to-date task</text>
+<text text-anchor="middle" x="614" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">forced to rerun</text>
+</g>
+<!-- k5_2->k6_2 -->
+<g id="edge47" class="edge"><title>k5_2->k6_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M614,-290C614,-286 614,-282 614,-278"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="617.5,-278 614,-268 610.5,-278 617.5,-278"/>
+</g>
+<!-- k7_2 -->
+<g id="node48" class="node"><title>k7_2</title>
+<polygon style="fill:#d2c24a;stroke:#4a92a5;" points="674,-172 558,-172 554,-168 554,-98 670,-98 674,-102 674,-172"/>
+<polyline style="fill:none;stroke:#4a92a5;" points="670,-168 554,-168 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="670,-168 670,-98 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="670,-168 674,-172 "/>
+<text text-anchor="middle" x="614" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Up-to-date</text>
+<text text-anchor="middle" x="614" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Final target</text>
+</g>
+<!-- k6_2->k7_2 -->
+<g id="edge49" class="edge"><title>k6_2->k7_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M614,-194C614,-190 614,-186 614,-182"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="617.5,-182 614,-172 610.5,-182 617.5,-182"/>
+</g>
+<!-- k8_2 -->
+<g id="node50" class="node"><title>k8_2</title>
+<polygon style="fill:#d2c24a;stroke:black;" points="674,-75.5 558,-75.5 554,-71.5 554,-24.5 670,-24.5 674,-28.5 674,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="670,-71.5 554,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="670,-71.5 670,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="670,-71.5 674,-75.5 "/>
+<text text-anchor="middle" x="614" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_2->k8_2 -->
+<g id="edge51" class="edge"><title>k7_2->k8_2</title>
+<path style="fill:none;stroke:gray;" d="M614,-98C614,-94 614,-90 614,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="617.5,-86 614,-76 610.5,-86 617.5,-86"/>
+</g>
+<!-- k1_3 -->
+<g id="node53" class="node"><title>k1_3</title>
+<polygon style="fill:#ff3e68;stroke:white;" points="926,-637.5 794,-637.5 790,-633.5 790,-586.5 922,-586.5 926,-590.5 926,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="922,-633.5 790,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="922,-633.5 922,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="922,-633.5 926,-637.5 "/>
+<text text-anchor="middle" x="858" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_3 -->
+<g id="node54" class="node"><title>k2_3</title>
+<polygon style="fill:white;stroke:gray;" points="925,-563.5 795,-563.5 791,-559.5 791,-512.5 921,-512.5 925,-516.5 925,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="921,-559.5 791,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="921,-559.5 921,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="921,-559.5 925,-563.5 "/>
+<text text-anchor="middle" x="858" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_3->k2_3 -->
+<g id="edge54" class="edge"><title>k1_3->k2_3</title>
+<path style="fill:none;stroke:#ff3e68;" d="M852,-586C851,-582 851,-578 851,-574"/>
+<polygon style="fill:#ff3e68;stroke:#ff3e68;" points="854.488,-574.299 852,-564 847.522,-573.602 854.488,-574.299"/>
+</g>
+<!-- k2_3->k1_3 -->
+<g id="edge56" class="edge"><title>k2_3->k1_3</title>
+<path style="fill:none;stroke:#ff3e68;" d="M864,-564C865,-568 865,-572 865,-576"/>
+<polygon style="fill:#ff3e68;stroke:#ff3e68;" points="861.512,-575.701 864,-586 868.478,-576.398 861.512,-575.701"/>
+</g>
+<!-- k3_3 -->
+<g id="node57" class="node"><title>k3_3</title>
+<polygon style="fill:#c9d787;stroke:#7d8a2e;" points="934,-489.5 786,-489.5 782,-485.5 782,-438.5 930,-438.5 934,-442.5 934,-489.5"/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="930,-485.5 782,-485.5 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="930,-485.5 930,-438.5 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="930,-485.5 934,-489.5 "/>
+<text text-anchor="middle" x="858" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Up-to-date task</text>
+</g>
+<!-- k2_3->k3_3 -->
+<g id="edge58" class="edge"><title>k2_3->k3_3</title>
+<path style="fill:none;stroke:gray;" d="M858,-512C858,-508 858,-504 858,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="861.5,-500 858,-490 854.5,-500 861.5,-500"/>
+</g>
+<!-- k4_3 -->
+<g id="node59" class="node"><title>k4_3</title>
+<polygon style="fill:none;stroke:black;" points="968,-415.5 752,-415.5 748,-411.5 748,-364.5 964,-364.5 968,-368.5 968,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="964,-411.5 748,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="964,-411.5 964,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="964,-411.5 968,-415.5 "/>
+<text text-anchor="middle" x="858" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_3->k4_3 -->
+<g id="edge60" class="edge"><title>k3_3->k4_3</title>
+<path style="fill:none;stroke:gray;" d="M858,-438C858,-434 858,-430 858,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="861.5,-426 858,-416 854.5,-426 861.5,-426"/>
+</g>
+<!-- k5_3 -->
+<g id="node61" class="node"><title>k5_3</title>
+<polygon style="fill:none;stroke:#bfb5ff;" points="916,-341.5 804,-341.5 800,-337.5 800,-290.5 912,-290.5 916,-294.5 916,-341.5"/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="912,-337.5 800,-337.5 "/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="912,-337.5 912,-290.5 "/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="912,-337.5 916,-341.5 "/>
+<text text-anchor="middle" x="858" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">Task to run</text>
+</g>
+<!-- k4_3->k5_3 -->
+<g id="edge62" class="edge"><title>k4_3->k5_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M858,-364C858,-360 858,-356 858,-352"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="861.5,-352 858,-342 854.5,-352 861.5,-352"/>
+</g>
+<!-- k6_3 -->
+<g id="node63" class="node"><title>k6_3</title>
+<polygon style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="934,-268 786,-268 782,-264 782,-194 930,-194 934,-198 934,-268"/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="930,-264 782,-264 "/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="930,-264 930,-194 "/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="930,-264 934,-268 "/>
+<text text-anchor="middle" x="858" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">Up-to-date task</text>
+<text text-anchor="middle" x="858" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">forced to rerun</text>
+</g>
+<!-- k5_3->k6_3 -->
+<g id="edge64" class="edge"><title>k5_3->k6_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M858,-290C858,-286 858,-282 858,-278"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="861.5,-278 858,-268 854.5,-278 861.5,-278"/>
+</g>
+<!-- k7_3 -->
+<g id="node65" class="node"><title>k7_3</title>
+<polygon style="fill:#fff1dc;stroke:#7d8a2e;" points="918,-172 802,-172 798,-168 798,-98 914,-98 918,-102 918,-172"/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="914,-168 798,-168 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="914,-168 914,-98 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="914,-168 918,-172 "/>
+<text text-anchor="middle" x="858" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Up-to-date</text>
+<text text-anchor="middle" x="858" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Final target</text>
+</g>
+<!-- k6_3->k7_3 -->
+<g id="edge66" class="edge"><title>k6_3->k7_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M858,-194C858,-190 858,-186 858,-182"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="861.5,-182 858,-172 854.5,-182 861.5,-182"/>
+</g>
+<!-- k8_3 -->
+<g id="node67" class="node"><title>k8_3</title>
+<polygon style="fill:#fff1dc;stroke:black;" points="918,-75.5 802,-75.5 798,-71.5 798,-24.5 914,-24.5 918,-28.5 918,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="914,-71.5 798,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="914,-71.5 914,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="914,-71.5 918,-75.5 "/>
+<text text-anchor="middle" x="858" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_3->k8_3 -->
+<g id="edge68" class="edge"><title>k7_3->k8_3</title>
+<path style="fill:none;stroke:gray;" d="M858,-98C858,-94 858,-90 858,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="861.5,-86 858,-76 854.5,-86 861.5,-86"/>
+</g>
+<!-- k1_4 -->
+<g id="node70" class="node"><title>k1_4</title>
+<polygon style="fill:#f54f29;stroke:white;" points="1170,-637.5 1038,-637.5 1034,-633.5 1034,-586.5 1166,-586.5 1170,-590.5 1170,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1166,-633.5 1034,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1166,-633.5 1166,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1166,-633.5 1170,-637.5 "/>
+<text text-anchor="middle" x="1102" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_4 -->
+<g id="node71" class="node"><title>k2_4</title>
+<polygon style="fill:white;stroke:gray;" points="1169,-563.5 1039,-563.5 1035,-559.5 1035,-512.5 1165,-512.5 1169,-516.5 1169,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1165,-559.5 1035,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1165,-559.5 1165,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1165,-559.5 1169,-563.5 "/>
+<text text-anchor="middle" x="1102" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_4->k2_4 -->
+<g id="edge71" class="edge"><title>k1_4->k2_4</title>
+<path style="fill:none;stroke:#f54f29;" d="M1096,-586C1095,-582 1095,-578 1095,-574"/>
+<polygon style="fill:#f54f29;stroke:#f54f29;" points="1098.49,-574.299 1096,-564 1091.52,-573.602 1098.49,-574.299"/>
+</g>
+<!-- k2_4->k1_4 -->
+<g id="edge73" class="edge"><title>k2_4->k1_4</title>
+<path style="fill:none;stroke:#f54f29;" d="M1108,-564C1109,-568 1109,-572 1109,-576"/>
+<polygon style="fill:#f54f29;stroke:#f54f29;" points="1105.51,-575.701 1108,-586 1112.48,-576.398 1105.51,-575.701"/>
+</g>
+<!-- k3_4 -->
+<g id="node74" class="node"><title>k3_4</title>
+<polygon style="fill:#b8cc6e;stroke:#4b6000;" points="1178,-489.5 1030,-489.5 1026,-485.5 1026,-438.5 1174,-438.5 1178,-442.5 1178,-489.5"/>
+<polyline style="fill:none;stroke:#4b6000;" points="1174,-485.5 1026,-485.5 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1174,-485.5 1174,-438.5 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1174,-485.5 1178,-489.5 "/>
+<text text-anchor="middle" x="1102" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Up-to-date task</text>
+</g>
+<!-- k2_4->k3_4 -->
+<g id="edge75" class="edge"><title>k2_4->k3_4</title>
+<path style="fill:none;stroke:gray;" d="M1102,-512C1102,-508 1102,-504 1102,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1105.5,-500 1102,-490 1098.5,-500 1105.5,-500"/>
+</g>
+<!-- k4_4 -->
+<g id="node76" class="node"><title>k4_4</title>
+<polygon style="fill:none;stroke:black;" points="1212,-415.5 996,-415.5 992,-411.5 992,-364.5 1208,-364.5 1212,-368.5 1212,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1208,-411.5 992,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1208,-411.5 1208,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1208,-411.5 1212,-415.5 "/>
+<text text-anchor="middle" x="1102" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_4->k4_4 -->
+<g id="edge77" class="edge"><title>k3_4->k4_4</title>
+<path style="fill:none;stroke:gray;" d="M1102,-438C1102,-434 1102,-430 1102,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1105.5,-426 1102,-416 1098.5,-426 1105.5,-426"/>
+</g>
+<!-- k5_4 -->
+<g id="node78" class="node"><title>k5_4</title>
+<polygon style="fill:none;stroke:#004460;" points="1160,-341.5 1048,-341.5 1044,-337.5 1044,-290.5 1156,-290.5 1160,-294.5 1160,-341.5"/>
+<polyline style="fill:none;stroke:#004460;" points="1156,-337.5 1044,-337.5 "/>
+<polyline style="fill:none;stroke:#004460;" points="1156,-337.5 1156,-290.5 "/>
+<polyline style="fill:none;stroke:#004460;" points="1156,-337.5 1160,-341.5 "/>
+<text text-anchor="middle" x="1102" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">Task to run</text>
+</g>
+<!-- k4_4->k5_4 -->
+<g id="edge79" class="edge"><title>k4_4->k5_4</title>
+<path style="fill:none;stroke:#004460;" d="M1102,-364C1102,-360 1102,-356 1102,-352"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1105.5,-352 1102,-342 1098.5,-352 1105.5,-352"/>
+</g>
+<!-- k6_4 -->
+<g id="node80" class="node"><title>k6_4</title>
+<polygon style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1178,-268 1030,-268 1026,-264 1026,-194 1174,-194 1178,-198 1178,-268"/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1174,-264 1026,-264 "/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1174,-264 1174,-194 "/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1174,-264 1178,-268 "/>
+<text text-anchor="middle" x="1102" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">Up-to-date task</text>
+<text text-anchor="middle" x="1102" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">forced to rerun</text>
+</g>
+<!-- k5_4->k6_4 -->
+<g id="edge81" class="edge"><title>k5_4->k6_4</title>
+<path style="fill:none;stroke:#004460;" d="M1102,-290C1102,-286 1102,-282 1102,-278"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1105.5,-278 1102,-268 1098.5,-278 1105.5,-278"/>
+</g>
+<!-- k7_4 -->
+<g id="node82" class="node"><title>k7_4</title>
+<polygon style="fill:#fff0a3;stroke:#4b6000;" points="1162,-172 1046,-172 1042,-168 1042,-98 1158,-98 1162,-102 1162,-172"/>
+<polyline style="fill:none;stroke:#4b6000;" points="1158,-168 1042,-168 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1158,-168 1158,-98 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1158,-168 1162,-172 "/>
+<text text-anchor="middle" x="1102" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Up-to-date</text>
+<text text-anchor="middle" x="1102" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Final target</text>
+</g>
+<!-- k6_4->k7_4 -->
+<g id="edge83" class="edge"><title>k6_4->k7_4</title>
+<path style="fill:none;stroke:#004460;" d="M1102,-194C1102,-190 1102,-186 1102,-182"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1105.5,-182 1102,-172 1098.5,-182 1105.5,-182"/>
+</g>
+<!-- k8_4 -->
+<g id="node84" class="node"><title>k8_4</title>
+<polygon style="fill:#fff0a3;stroke:black;" points="1162,-75.5 1046,-75.5 1042,-71.5 1042,-24.5 1158,-24.5 1162,-28.5 1162,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1158,-71.5 1042,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1158,-71.5 1158,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1158,-71.5 1162,-75.5 "/>
+<text text-anchor="middle" x="1102" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_4->k8_4 -->
+<g id="edge85" class="edge"><title>k7_4->k8_4</title>
+<path style="fill:none;stroke:gray;" d="M1102,-98C1102,-94 1102,-90 1102,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1105.5,-86 1102,-76 1098.5,-86 1105.5,-86"/>
+</g>
+<!-- k1_5 -->
+<g id="node87" class="node"><title>k1_5</title>
+<polygon style="fill:#ff0000;stroke:white;" points="1414,-637.5 1282,-637.5 1278,-633.5 1278,-586.5 1410,-586.5 1414,-590.5 1414,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1410,-633.5 1278,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1410,-633.5 1410,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1410,-633.5 1414,-637.5 "/>
+<text text-anchor="middle" x="1346" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_5 -->
+<g id="node88" class="node"><title>k2_5</title>
+<polygon style="fill:white;stroke:gray;" points="1413,-563.5 1283,-563.5 1279,-559.5 1279,-512.5 1409,-512.5 1413,-516.5 1413,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1409,-559.5 1279,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1409,-559.5 1409,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1409,-559.5 1413,-563.5 "/>
+<text text-anchor="middle" x="1346" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_5->k2_5 -->
+<g id="edge88" class="edge"><title>k1_5->k2_5</title>
+<path style="fill:none;stroke:#ff0000;" d="M1340,-586C1339,-582 1339,-578 1339,-574"/>
+<polygon style="fill:#ff0000;stroke:#ff0000;" points="1342.49,-574.299 1340,-564 1335.52,-573.602 1342.49,-574.299"/>
+</g>
+<!-- k2_5->k1_5 -->
+<g id="edge90" class="edge"><title>k2_5->k1_5</title>
+<path style="fill:none;stroke:#ff0000;" d="M1352,-564C1353,-568 1353,-572 1353,-576"/>
+<polygon style="fill:#ff0000;stroke:#ff0000;" points="1349.51,-575.701 1352,-586 1356.48,-576.398 1349.51,-575.701"/>
+</g>
+<!-- k3_5 -->
+<g id="node91" class="node"><title>k3_5</title>
+<polygon style="fill:#44ff44;stroke:#007700;" points="1422,-489.5 1274,-489.5 1270,-485.5 1270,-438.5 1418,-438.5 1422,-442.5 1422,-489.5"/>
+<polyline style="fill:none;stroke:#007700;" points="1418,-485.5 1270,-485.5 "/>
+<polyline style="fill:none;stroke:#007700;" points="1418,-485.5 1418,-438.5 "/>
+<polyline style="fill:none;stroke:#007700;" points="1418,-485.5 1422,-489.5 "/>
+<text text-anchor="middle" x="1346" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#007700;">Up-to-date task</text>
+</g>
+<!-- k2_5->k3_5 -->
+<g id="edge92" class="edge"><title>k2_5->k3_5</title>
+<path style="fill:none;stroke:gray;" d="M1346,-512C1346,-508 1346,-504 1346,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1349.5,-500 1346,-490 1342.5,-500 1349.5,-500"/>
+</g>
+<!-- k4_5 -->
+<g id="node93" class="node"><title>k4_5</title>
+<polygon style="fill:none;stroke:black;" points="1456,-415.5 1240,-415.5 1236,-411.5 1236,-364.5 1452,-364.5 1456,-368.5 1456,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1452,-411.5 1236,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1452,-411.5 1452,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1452,-411.5 1456,-415.5 "/>
+<text text-anchor="middle" x="1346" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_5->k4_5 -->
+<g id="edge94" class="edge"><title>k3_5->k4_5</title>
+<path style="fill:none;stroke:gray;" d="M1346,-438C1346,-434 1346,-430 1346,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1349.5,-426 1346,-416 1342.5,-426 1349.5,-426"/>
+</g>
+<!-- k5_5 -->
+<g id="node95" class="node"><title>k5_5</title>
+<polygon style="fill:#aabbff;stroke:#1122ff;" points="1404,-341.5 1292,-341.5 1288,-337.5 1288,-290.5 1400,-290.5 1404,-294.5 1404,-341.5"/>
+<polyline style="fill:none;stroke:#1122ff;" points="1400,-337.5 1288,-337.5 "/>
+<polyline style="fill:none;stroke:#1122ff;" points="1400,-337.5 1400,-290.5 "/>
+<polyline style="fill:none;stroke:#1122ff;" points="1400,-337.5 1404,-341.5 "/>
+<text text-anchor="middle" x="1346" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">Task to run</text>
+</g>
+<!-- k4_5->k5_5 -->
+<g id="edge96" class="edge"><title>k4_5->k5_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1346,-364C1346,-360 1346,-356 1346,-352"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1349.5,-352 1346,-342 1342.5,-352 1349.5,-352"/>
+</g>
+<!-- k6_5 -->
+<g id="node97" class="node"><title>k6_5</title>
+<polygon style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1422,-268 1274,-268 1270,-264 1270,-194 1418,-194 1422,-198 1422,-268"/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1418,-264 1270,-264 "/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1418,-264 1418,-194 "/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1418,-264 1422,-268 "/>
+<text text-anchor="middle" x="1346" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">Up-to-date task</text>
+<text text-anchor="middle" x="1346" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">forced to rerun</text>
+</g>
+<!-- k5_5->k6_5 -->
+<g id="edge98" class="edge"><title>k5_5->k6_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1346,-290C1346,-286 1346,-282 1346,-278"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1349.5,-278 1346,-268 1342.5,-278 1349.5,-278"/>
+</g>
+<!-- k7_5 -->
+<g id="node99" class="node"><title>k7_5</title>
+<polygon style="fill:#efa03b;stroke:#ffcc3b;" points="1406,-172 1290,-172 1286,-168 1286,-98 1402,-98 1406,-102 1406,-172"/>
+<polyline style="fill:none;stroke:#ffcc3b;" points="1402,-168 1286,-168 "/>
+<polyline style="fill:none;stroke:#ffcc3b;" points="1402,-168 1402,-98 "/>
+<polyline style="fill:none;stroke:#ffcc3b;" points="1402,-168 1406,-172 "/>
+<text text-anchor="middle" x="1346" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#ffcc3b;">Up-to-date</text>
+<text text-anchor="middle" x="1346" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#ffcc3b;">Final target</text>
+</g>
+<!-- k6_5->k7_5 -->
+<g id="edge100" class="edge"><title>k6_5->k7_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1346,-194C1346,-190 1346,-186 1346,-182"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1349.5,-182 1346,-172 1342.5,-182 1349.5,-182"/>
+</g>
+<!-- k8_5 -->
+<g id="node101" class="node"><title>k8_5</title>
+<polygon style="fill:#efa03b;stroke:black;" points="1406,-75.5 1290,-75.5 1286,-71.5 1286,-24.5 1402,-24.5 1406,-28.5 1406,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1402,-71.5 1286,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1402,-71.5 1402,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1402,-71.5 1406,-75.5 "/>
+<text text-anchor="middle" x="1346" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_5->k8_5 -->
+<g id="edge102" class="edge"><title>k7_5->k8_5</title>
+<path style="fill:none;stroke:gray;" d="M1346,-98C1346,-94 1346,-90 1346,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1349.5,-86 1346,-76 1342.5,-86 1349.5,-86"/>
+</g>
+<!-- k1_6 -->
+<g id="node104" class="node"><title>k1_6</title>
+<polygon style="fill:#ff3232;stroke:white;" points="1658,-637.5 1526,-637.5 1522,-633.5 1522,-586.5 1654,-586.5 1658,-590.5 1658,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1654,-633.5 1522,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1654,-633.5 1654,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1654,-633.5 1658,-637.5 "/>
+<text text-anchor="middle" x="1590" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;">Vicious cycle</text>
+</g>
+<!-- k2_6 -->
+<g id="node105" class="node"><title>k2_6</title>
+<polygon style="fill:white;stroke:gray;" points="1657,-563.5 1527,-563.5 1523,-559.5 1523,-512.5 1653,-512.5 1657,-516.5 1657,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1653,-559.5 1523,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1653,-559.5 1653,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1653,-559.5 1657,-563.5 "/>
+<text text-anchor="middle" x="1590" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_6->k2_6 -->
+<g id="edge105" class="edge"><title>k1_6->k2_6</title>
+<path style="fill:none;stroke:#ff3232;" d="M1584,-586C1583,-582 1583,-578 1583,-574"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="1586.49,-574.299 1584,-564 1579.52,-573.602 1586.49,-574.299"/>
+</g>
+<!-- k2_6->k1_6 -->
+<g id="edge107" class="edge"><title>k2_6->k1_6</title>
+<path style="fill:none;stroke:#ff3232;" d="M1596,-564C1597,-568 1597,-572 1597,-576"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="1593.51,-575.701 1596,-586 1600.48,-576.398 1593.51,-575.701"/>
+</g>
+<!-- k3_6 -->
+<g id="node108" class="node"><title>k3_6</title>
+<polygon style="fill:#6cb924;stroke:black;" points="1666,-489.5 1518,-489.5 1514,-485.5 1514,-438.5 1662,-438.5 1666,-442.5 1666,-489.5"/>
+<polyline style="fill:none;stroke:black;" points="1662,-485.5 1514,-485.5 "/>
+<polyline style="fill:none;stroke:black;" points="1662,-485.5 1662,-438.5 "/>
+<polyline style="fill:none;stroke:black;" points="1662,-485.5 1666,-489.5 "/>
+<text text-anchor="middle" x="1590" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;">Up-to-date task</text>
+</g>
+<!-- k2_6->k3_6 -->
+<g id="edge109" class="edge"><title>k2_6->k3_6</title>
+<path style="fill:none;stroke:gray;" d="M1590,-512C1590,-508 1590,-504 1590,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1593.5,-500 1590,-490 1586.5,-500 1593.5,-500"/>
+</g>
+<!-- k4_6 -->
+<g id="node110" class="node"><title>k4_6</title>
+<polygon style="fill:none;stroke:black;" points="1700,-415.5 1484,-415.5 1480,-411.5 1480,-364.5 1696,-364.5 1700,-368.5 1700,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1696,-411.5 1480,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1696,-411.5 1696,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1696,-411.5 1700,-415.5 "/>
+<text text-anchor="middle" x="1590" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_6->k4_6 -->
+<g id="edge111" class="edge"><title>k3_6->k4_6</title>
+<path style="fill:none;stroke:gray;" d="M1590,-438C1590,-434 1590,-430 1590,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1593.5,-426 1590,-416 1586.5,-426 1593.5,-426"/>
+</g>
+<!-- k5_6 -->
+<g id="node112" class="node"><title>k5_6</title>
+<polygon style="fill:#5f52ee;stroke:#0044a0;" points="1648,-341.5 1536,-341.5 1532,-337.5 1532,-290.5 1644,-290.5 1648,-294.5 1648,-341.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="1644,-337.5 1532,-337.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="1644,-337.5 1644,-290.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="1644,-337.5 1648,-341.5 "/>
+<text text-anchor="middle" x="1590" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:lightgrey;">Task to run</text>
+</g>
+<!-- k4_6->k5_6 -->
+<g id="edge113" class="edge"><title>k4_6->k5_6</title>
+<path style="fill:none;stroke:#0044a0;" d="M1590,-364C1590,-360 1590,-356 1590,-352"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="1593.5,-352 1590,-342 1586.5,-352 1593.5,-352"/>
+</g>
+<!-- k6_6 -->
+<g id="node114" class="node"><title>k6_6</title>
+<polygon style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1666,-268 1518,-268 1514,-264 1514,-194 1662,-194 1666,-198 1666,-268"/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1662,-264 1514,-264 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1662,-264 1662,-194 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1662,-264 1666,-268 "/>
+<text text-anchor="middle" x="1590" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Up-to-date task</text>
+<text text-anchor="middle" x="1590" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">forced to rerun</text>
+</g>
+<!-- k5_6->k6_6 -->
+<g id="edge115" class="edge"><title>k5_6->k6_6</title>
+<path style="fill:none;stroke:#0044a0;" d="M1590,-290C1590,-286 1590,-282 1590,-278"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="1593.5,-278 1590,-268 1586.5,-278 1593.5,-278"/>
+</g>
+<!-- k7_6 -->
+<g id="node116" class="node"><title>k7_6</title>
+<polygon style="fill:#ece116;stroke:black;" points="1650,-172 1534,-172 1530,-168 1530,-98 1646,-98 1650,-102 1650,-172"/>
+<polyline style="fill:none;stroke:black;" points="1646,-168 1530,-168 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-168 1646,-98 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-168 1650,-172 "/>
+<text text-anchor="middle" x="1590" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#efa03b;">Up-to-date</text>
+<text text-anchor="middle" x="1590" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#efa03b;">Final target</text>
+</g>
+<!-- k6_6->k7_6 -->
+<g id="edge117" class="edge"><title>k6_6->k7_6</title>
+<path style="fill:none;stroke:#0044a0;" d="M1590,-194C1590,-190 1590,-186 1590,-182"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="1593.5,-182 1590,-172 1586.5,-182 1593.5,-182"/>
+</g>
+<!-- k8_6 -->
+<g id="node118" class="node"><title>k8_6</title>
+<polygon style="fill:#ece116;stroke:black;" points="1650,-75.5 1534,-75.5 1530,-71.5 1530,-24.5 1646,-24.5 1650,-28.5 1650,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1646,-71.5 1530,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-71.5 1646,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-71.5 1650,-75.5 "/>
+<text text-anchor="middle" x="1590" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_6->k8_6 -->
+<g id="edge119" class="edge"><title>k7_6->k8_6</title>
+<path style="fill:none;stroke:gray;" d="M1590,-98C1590,-94 1590,-90 1590,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1593.5,-86 1590,-76 1586.5,-86 1593.5,-86"/>
+</g>
+<!-- k1_7 -->
+<g id="node121" class="node"><title>k1_7</title>
+<polygon style="fill:#b9495e;stroke:white;" points="1902,-637.5 1770,-637.5 1766,-633.5 1766,-586.5 1898,-586.5 1902,-590.5 1902,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1898,-633.5 1766,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1898,-633.5 1898,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1898,-633.5 1902,-637.5 "/>
+<text text-anchor="middle" x="1834" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_7 -->
+<g id="node122" class="node"><title>k2_7</title>
+<polygon style="fill:white;stroke:gray;" points="1901,-563.5 1771,-563.5 1767,-559.5 1767,-512.5 1897,-512.5 1901,-516.5 1901,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1897,-559.5 1767,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1897,-559.5 1897,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1897,-559.5 1901,-563.5 "/>
+<text text-anchor="middle" x="1834" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_7->k2_7 -->
+<g id="edge122" class="edge"><title>k1_7->k2_7</title>
+<path style="fill:none;stroke:#b9495e;" d="M1828,-586C1827,-582 1827,-578 1827,-574"/>
+<polygon style="fill:#b9495e;stroke:#b9495e;" points="1830.49,-574.299 1828,-564 1823.52,-573.602 1830.49,-574.299"/>
+</g>
+<!-- k2_7->k1_7 -->
+<g id="edge124" class="edge"><title>k2_7->k1_7</title>
+<path style="fill:none;stroke:#b9495e;" d="M1840,-564C1841,-568 1841,-572 1841,-576"/>
+<polygon style="fill:#b9495e;stroke:#b9495e;" points="1837.51,-575.701 1840,-586 1844.48,-576.398 1837.51,-575.701"/>
+</g>
+<!-- k3_7 -->
+<g id="node125" class="node"><title>k3_7</title>
+<polygon style="fill:#d3fae3;stroke:#87b379;" points="1910,-489.5 1762,-489.5 1758,-485.5 1758,-438.5 1906,-438.5 1910,-442.5 1910,-489.5"/>
+<polyline style="fill:none;stroke:#87b379;" points="1906,-485.5 1758,-485.5 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1906,-485.5 1906,-438.5 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1906,-485.5 1910,-489.5 "/>
+<text text-anchor="middle" x="1834" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Up-to-date task</text>
+</g>
+<!-- k2_7->k3_7 -->
+<g id="edge126" class="edge"><title>k2_7->k3_7</title>
+<path style="fill:none;stroke:gray;" d="M1834,-512C1834,-508 1834,-504 1834,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1837.5,-500 1834,-490 1830.5,-500 1837.5,-500"/>
+</g>
+<!-- k4_7 -->
+<g id="node127" class="node"><title>k4_7</title>
+<polygon style="fill:none;stroke:black;" points="1944,-415.5 1728,-415.5 1724,-411.5 1724,-364.5 1940,-364.5 1944,-368.5 1944,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1940,-411.5 1724,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1940,-411.5 1940,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1940,-411.5 1944,-415.5 "/>
+<text text-anchor="middle" x="1834" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_7->k4_7 -->
+<g id="edge128" class="edge"><title>k3_7->k4_7</title>
+<path style="fill:none;stroke:gray;" d="M1834,-438C1834,-434 1834,-430 1834,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1837.5,-426 1834,-416 1830.5,-426 1837.5,-426"/>
+</g>
+<!-- k5_7 -->
+<g id="node129" class="node"><title>k5_7</title>
+<polygon style="fill:none;stroke:#87bae4;" points="1892,-341.5 1780,-341.5 1776,-337.5 1776,-290.5 1888,-290.5 1892,-294.5 1892,-341.5"/>
+<polyline style="fill:none;stroke:#87bae4;" points="1888,-337.5 1776,-337.5 "/>
+<polyline style="fill:none;stroke:#87bae4;" points="1888,-337.5 1888,-290.5 "/>
+<polyline style="fill:none;stroke:#87bae4;" points="1888,-337.5 1892,-341.5 "/>
+<text text-anchor="middle" x="1834" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">Task to run</text>
+</g>
+<!-- k4_7->k5_7 -->
+<g id="edge130" class="edge"><title>k4_7->k5_7</title>
+<path style="fill:none;stroke:#87bae4;" d="M1834,-364C1834,-360 1834,-356 1834,-352"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1837.5,-352 1834,-342 1830.5,-352 1837.5,-352"/>
+</g>
+<!-- k6_7 -->
+<g id="node131" class="node"><title>k6_7</title>
+<polygon style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1910,-268 1762,-268 1758,-264 1758,-194 1906,-194 1910,-198 1910,-268"/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1906,-264 1758,-264 "/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1906,-264 1906,-194 "/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1906,-264 1910,-268 "/>
+<text text-anchor="middle" x="1834" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">Up-to-date task</text>
+<text text-anchor="middle" x="1834" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">forced to rerun</text>
+</g>
+<!-- k5_7->k6_7 -->
+<g id="edge132" class="edge"><title>k5_7->k6_7</title>
+<path style="fill:none;stroke:#87bae4;" d="M1834,-290C1834,-286 1834,-282 1834,-278"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1837.5,-278 1834,-268 1830.5,-278 1837.5,-278"/>
+</g>
+<!-- k7_7 -->
+<g id="node133" class="node"><title>k7_7</title>
+<polygon style="fill:#fdba40;stroke:#87b379;" points="1894,-172 1778,-172 1774,-168 1774,-98 1890,-98 1894,-102 1894,-172"/>
+<polyline style="fill:none;stroke:#87b379;" points="1890,-168 1774,-168 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1890,-168 1890,-98 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1890,-168 1894,-172 "/>
+<text text-anchor="middle" x="1834" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Up-to-date</text>
+<text text-anchor="middle" x="1834" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Final target</text>
+</g>
+<!-- k6_7->k7_7 -->
+<g id="edge134" class="edge"><title>k6_7->k7_7</title>
+<path style="fill:none;stroke:#87bae4;" d="M1834,-194C1834,-190 1834,-186 1834,-182"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1837.5,-182 1834,-172 1830.5,-182 1837.5,-182"/>
+</g>
+<!-- k8_7 -->
+<g id="node135" class="node"><title>k8_7</title>
+<polygon style="fill:#fdba40;stroke:black;" points="1894,-75.5 1778,-75.5 1774,-71.5 1774,-24.5 1890,-24.5 1894,-28.5 1894,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1890,-71.5 1774,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1890,-71.5 1890,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1890,-71.5 1894,-75.5 "/>
+<text text-anchor="middle" x="1834" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_7->k8_7 -->
+<g id="edge136" class="edge"><title>k7_7->k8_7</title>
+<path style="fill:none;stroke:gray;" d="M1834,-98C1834,-94 1834,-90 1834,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1837.5,-86 1834,-76 1830.5,-86 1837.5,-86"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_downloads/gallery_big_pipeline.svg b/doc/_build/html/_downloads/gallery_big_pipeline.svg
new file mode 100644
index 0000000..49ca7ed
--- /dev/null
+++ b/doc/_build/html/_downloads/gallery_big_pipeline.svg
@@ -0,0 +1,2699 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (andreas) Andreas Heger -->
+<!-- Title: tree Pages: 1 -->
+<svg width="4320pt" height="480pt"
+ viewBox="0.00 0.00 432.00 48.31" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.0388979 0.0388979) rotate(0) translate(4 1238)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1238 11102,-1238 11102,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="12,-16 12,-1218 10914,-1218 10914,-16 12,-16"/>
+<text text-anchor="middle" x="5463" y="-1187" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="10922,-872 10922,-1226 11090,-1226 11090,-872 10922,-872"/>
+<text text-anchor="middle" x="11006" y="-1195" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1983,-1168 1861,-1168 1857,-1164 1857,-1132 1979,-1132 1983,-1136 1983,-1168"/>
+<polyline style="fill:none;stroke:green;" points="1979,-1164 1857,-1164 "/>
+<polyline style="fill:none;stroke:green;" points="1979,-1164 1979,-1132 "/>
+<polyline style="fill:none;stroke:green;" points="1979,-1164 1983,-1168 "/>
+<text text-anchor="middle" x="1920" y="-1143.5" style="font-family:Times New Roman;font-size:20.00;">indexGenome</text>
+</g>
+<!-- t181 -->
+<g id="node32" class="node"><title>t181</title>
+<polygon style="fill:none;stroke:blue;" points="1134,-312 1062,-312 1058,-308 1058,-276 1130,-276 1134,-280 1134,-312"/>
+<polyline style="fill:none;stroke:blue;" points="1130,-308 1058,-308 "/>
+<polyline style="fill:none;stroke:blue;" points="1130,-308 1130,-276 "/>
+<polyline style="fill:none;stroke:blue;" points="1130,-308 1134,-312 "/>
+<text text-anchor="middle" x="1096" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">prepare</text>
+</g>
+<!-- t0->t181 -->
+<g id="edge383" class="edge"><title>t0->t181</title>
+<path style="fill:none;stroke:gray;" d="M1913,-1132C1903,-1102 1885,-1043 1885,-991 1885,-991 1885,-991 1885,-664 1885,-602 1902,-577 1865,-528 1798,-436 1744,-449 1634,-418 1555,-395 1520,-440 1450,-396 1435,-385 1444,-369 1428,-360 1376,-327 1208,-361 1150,-338 1139,-333 1129,-326 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t16 -->
+<g id="node34" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5835,-1094 5737,-1094 5733,-1090 5733,-1058 5831,-1058 5835,-1062 5835,-1094"/>
+<polyline style="fill:none;stroke:green;" points="5831,-1090 5733,-1090 "/>
+<polyline style="fill:none;stroke:green;" points="5831,-1090 5831,-1058 "/>
+<polyline style="fill:none;stroke:green;" points="5831,-1090 5835,-1094 "/>
+<text text-anchor="middle" x="5784" y="-1069.5" style="font-family:Times New Roman;font-size:20.00;">buildBAM</text>
+</g>
+<!-- t0->t16 -->
+<g id="edge21" class="edge"><title>t0->t16</title>
+<path style="fill:none;stroke:gray;" d="M1983,-1149C2442,-1140 5269,-1086 5723,-1077"/>
+<polygon style="fill:gray;stroke:gray;" points="5723,-1080.5 5733,-1077 5723,-1073.5 5723,-1080.5"/>
+</g>
+<!-- t39 -->
+<g id="node3" class="node"><title>t39</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2811,-396 2693,-396 2689,-392 2689,-360 2807,-360 2811,-364 2811,-396"/>
+<polyline style="fill:none;stroke:green;" points="2807,-392 2689,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2807,-392 2807,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2807,-392 2811,-396 "/>
+<text text-anchor="middle" x="2750" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildGenome</text>
+</g>
+<!-- t39->t181 -->
+<g id="edge349" class="edge"><title>t39->t181</title>
+<path style="fill:none;stroke:gray;" d="M2689,-362C2684,-361 2680,-361 2675,-360 2591,-349 1228,-367 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t36 -->
+<g id="node4" class="node"><title>t36</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7517,-480 7359,-480 7355,-476 7355,-444 7513,-444 7517,-448 7517,-480"/>
+<polyline style="fill:none;stroke:green;" points="7513,-476 7355,-476 "/>
+<polyline style="fill:none;stroke:green;" points="7513,-476 7513,-444 "/>
+<polyline style="fill:none;stroke:green;" points="7513,-476 7517,-480 "/>
+<text text-anchor="middle" x="7436" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildGeneRegions</text>
+</g>
+<!-- t42 -->
+<g id="node20" class="node"><title>t42</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10199,-396 10001,-396 9997,-392 9997,-360 10195,-360 10199,-364 10199,-396"/>
+<polyline style="fill:none;stroke:green;" points="10195,-392 9997,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10195,-392 10195,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10195,-392 10199,-396 "/>
+<text text-anchor="middle" x="10098" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">exportReferenceAsBed</text>
+</g>
+<!-- t36->t42 -->
+<g id="edge339" class="edge"><title>t36->t42</title>
+<path style="fill:none;stroke:gray;" d="M7517,-461C7894,-458 9471,-443 9987,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9987.4,-399.478 9997,-395 9986.7,-392.512 9987.4,-399.478"/>
+</g>
+<!-- t36->t181 -->
+<g id="edge355" class="edge"><title>t36->t181</title>
+<path style="fill:none;stroke:gray;" d="M7355,-460C7088,-453 6213,-430 5489,-418 5459,-417 3311,-411 3284,-396 3269,-386 3280,-369 3264,-360 3214,-330 1219,-355 1163,-338 1149,-334 1137,-326 1125,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="1127.1,-315.2 1117,-312 1122.9,-320.8 1127.1,-315.2"/>
+</g>
+<!-- t97 -->
+<g id="node160" class="node"><title>t97</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8589,-396 8473,-396 8469,-392 8469,-360 8585,-360 8589,-364 8589,-396"/>
+<polyline style="fill:none;stroke:green;" points="8585,-392 8469,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8585,-392 8585,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8585,-392 8589,-396 "/>
+<text text-anchor="middle" x="8529" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTSS</text>
+</g>
+<!-- t36->t97 -->
+<g id="edge143" class="edge"><title>t36->t97</title>
+<path style="fill:none;stroke:gray;" d="M7517,-459C7722,-450 8260,-426 8459,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="8459.88,-399.393 8469,-394 8458.51,-392.529 8459.88,-399.393"/>
+</g>
+<!-- t167 -->
+<g id="node167" class="node"><title>t167</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8889,-396 8611,-396 8607,-392 8607,-360 8885,-360 8889,-364 8889,-396"/>
+<polyline style="fill:none;stroke:green;" points="8885,-392 8607,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8885,-392 8885,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8885,-392 8889,-396 "/>
+<text text-anchor="middle" x="8748" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTSSIntervalAssociations</text>
+</g>
+<!-- t36->t167 -->
+<g id="edge149" class="edge"><title>t36->t167</title>
+<path style="fill:none;stroke:gray;" d="M7517,-459C7705,-452 8185,-431 8597,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="8597.4,-399.478 8607,-395 8596.7,-392.512 8597.4,-399.478"/>
+</g>
+<!-- t165 -->
+<g id="node171" class="node"><title>t165</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8451,-396 8203,-396 8199,-392 8199,-360 8447,-360 8451,-364 8451,-396"/>
+<polyline style="fill:none;stroke:green;" points="8447,-392 8199,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8447,-392 8447,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8447,-392 8451,-396 "/>
+<text text-anchor="middle" x="8325" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTSSIntervalDistance</text>
+</g>
+<!-- t36->t165 -->
+<g id="edge153" class="edge"><title>t36->t165</title>
+<path style="fill:none;stroke:gray;" d="M7517,-456C7655,-445 7942,-422 8185,-396 8186,-396 8188,-396 8189,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="8189.4,-398.478 8199,-394 8188.7,-391.512 8189.4,-398.478"/>
+</g>
+<!-- t106 -->
+<g id="node178" class="node"><title>t106</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9083,-396 8911,-396 8907,-392 8907,-360 9079,-360 9083,-364 9083,-396"/>
+<polyline style="fill:none;stroke:green;" points="9079,-392 8907,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9079,-392 9079,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9079,-392 9083,-396 "/>
+<text text-anchor="middle" x="8995" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildIntervalCounts</text>
+</g>
+<!-- t36->t106 -->
+<g id="edge159" class="edge"><title>t36->t106</title>
+<path style="fill:none;stroke:gray;" d="M7517,-460C7780,-452 8607,-428 8897,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="8897.4,-399.478 8907,-395 8896.7,-392.512 8897.4,-399.478"/>
+</g>
+<!-- t178 -->
+<g id="node186" class="node"><title>t178</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9651,-396 9457,-396 9453,-392 9453,-360 9647,-360 9651,-364 9651,-396"/>
+<polyline style="fill:none;stroke:green;" points="9647,-392 9453,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9647,-392 9647,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9647,-392 9651,-396 "/>
+<text text-anchor="middle" x="9552" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importOverlapRegions</text>
+</g>
+<!-- t36->t178 -->
+<g id="edge167" class="edge"><title>t36->t178</title>
+<path style="fill:none;stroke:gray;" d="M7517,-461C7840,-456 9040,-434 9443,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9443.4,-399.478 9453,-395 9442.7,-392.512 9443.4,-399.478"/>
+</g>
+<!-- t114 -->
+<g id="node195" class="node"><title>t114</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5889,-202 5683,-202 5679,-198 5679,-166 5885,-166 5889,-170 5889,-202"/>
+<polyline style="fill:none;stroke:green;" points="5885,-198 5679,-198 "/>
+<polyline style="fill:none;stroke:green;" points="5885,-198 5885,-166 "/>
+<polyline style="fill:none;stroke:green;" points="5885,-198 5889,-202 "/>
+<text text-anchor="middle" x="5784" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorDistance</text>
+</g>
+<!-- t36->t114 -->
+<g id="edge175" class="edge"><title>t36->t114</title>
+<path style="fill:none;stroke:gray;" d="M7426,-444C7419,-431 7409,-412 7401,-396 7394,-380 7401,-369 7386,-360 7344,-331 6509,-370 6469,-338 6438,-312 6480,-275 6449,-250 6409,-216 6030,-235 5977,-228 5938,-223 5895,-213 5860,-204"/>
+<polygon style="fill:gray;stroke:gray;" points="5860.49,-200.529 5850,-202 5859.12,-207.393 5860.49,-200.529"/>
+</g>
+<!-- t117 -->
+<g id="node197" class="node"><title>t117</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8161,-396 7925,-396 7921,-392 7921,-360 8157,-360 8161,-364 8161,-396"/>
+<polyline style="fill:none;stroke:green;" points="8157,-392 7921,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8157,-392 8157,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8157,-392 8161,-396 "/>
+<text text-anchor="middle" x="8041" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorArchitecture</text>
+</g>
+<!-- t36->t117 -->
+<g id="edge177" class="edge"><title>t36->t117</title>
+<path style="fill:none;stroke:gray;" d="M7517,-451C7618,-437 7791,-412 7911,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="7911.4,-399.478 7921,-395 7910.7,-392.512 7911.4,-399.478"/>
+</g>
+<!-- t130 -->
+<g id="node205" class="node"><title>t130</title>
+<polygon style="fill:#90ee90;stroke:green;" points="6728,-312 6482,-312 6478,-308 6478,-276 6724,-276 6728,-280 6728,-312"/>
+<polyline style="fill:none;stroke:green;" points="6724,-308 6478,-308 "/>
+<polyline style="fill:none;stroke:green;" points="6724,-308 6724,-276 "/>
+<polyline style="fill:none;stroke:green;" points="6724,-308 6728,-312 "/>
+<text text-anchor="middle" x="6603" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorPromotorsGO</text>
+</g>
+<!-- t36->t130 -->
+<g id="edge185" class="edge"><title>t36->t130</title>
+<path style="fill:none;stroke:gray;" d="M7436,-444C7434,-420 7428,-379 7401,-360 7372,-338 6774,-344 6737,-338 6711,-334 6682,-324 6657,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="6658.56,-312.863 6648,-312 6655.72,-319.26 6658.56,-312.863"/>
+</g>
+<!-- t131 -->
+<g id="node209" class="node"><title>t131</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7034,-312 6750,-312 6746,-308 6746,-276 7030,-276 7034,-280 7034,-312"/>
+<polyline style="fill:none;stroke:green;" points="7030,-308 6746,-308 "/>
+<polyline style="fill:none;stroke:green;" points="7030,-308 7030,-276 "/>
+<polyline style="fill:none;stroke:green;" points="7030,-308 7034,-312 "/>
+<text text-anchor="middle" x="6890" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorPromotorsGOSlim</text>
+</g>
+<!-- t36->t131 -->
+<g id="edge189" class="edge"><title>t36->t131</title>
+<path style="fill:none;stroke:gray;" d="M7517,-456C7642,-446 7868,-423 7892,-396 7903,-383 7904,-371 7892,-360 7860,-326 7090,-345 7043,-338 7013,-334 6979,-324 6951,-315"/>
+<polygon style="fill:gray;stroke:gray;" points="6951.58,-311.521 6941,-312 6949.57,-318.226 6951.58,-311.521"/>
+</g>
+<!-- t132 -->
+<g id="node213" class="node"><title>t132</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4810,-312 4562,-312 4558,-308 4558,-276 4806,-276 4810,-280 4810,-312"/>
+<polyline style="fill:none;stroke:green;" points="4806,-308 4558,-308 "/>
+<polyline style="fill:none;stroke:green;" points="4806,-308 4806,-276 "/>
+<polyline style="fill:none;stroke:green;" points="4806,-308 4810,-312 "/>
+<text text-anchor="middle" x="4684" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTerritoriesGO</text>
+</g>
+<!-- t36->t132 -->
+<g id="edge193" class="edge"><title>t36->t132</title>
+<path style="fill:none;stroke:gray;" d="M7355,-461C7056,-456 6030,-435 5973,-396 5959,-386 5973,-369 5958,-360 5906,-324 4882,-348 4819,-338 4792,-334 4763,-324 4738,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="4739.56,-312.863 4729,-312 4736.72,-319.26 4739.56,-312.863"/>
+</g>
+<!-- t133 -->
+<g id="node217" class="node"><title>t133</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5116,-312 4832,-312 4828,-308 4828,-276 5112,-276 5116,-280 5116,-312"/>
+<polyline style="fill:none;stroke:green;" points="5112,-308 4828,-308 "/>
+<polyline style="fill:none;stroke:green;" points="5112,-308 5112,-276 "/>
+<polyline style="fill:none;stroke:green;" points="5112,-308 5116,-312 "/>
+<text text-anchor="middle" x="4972" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTerritoriesGOSlim</text>
+</g>
+<!-- t36->t133 -->
+<g id="edge197" class="edge"><title>t36->t133</title>
+<path style="fill:none;stroke:gray;" d="M7355,-460C7060,-453 6058,-428 5998,-396 5981,-386 5991,-369 5973,-360 5932,-337 5172,-345 5125,-338 5095,-334 5061,-324 5033,-315"/>
+<polygon style="fill:gray;stroke:gray;" points="5033.58,-311.521 5023,-312 5031.57,-318.226 5033.58,-311.521"/>
+</g>
+<!-- t118 -->
+<g id="node225" class="node"><title>t118</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7651,-396 7459,-396 7455,-392 7455,-360 7647,-360 7651,-364 7651,-396"/>
+<polyline style="fill:none;stroke:green;" points="7647,-392 7455,-392 "/>
+<polyline style="fill:none;stroke:green;" points="7647,-392 7647,-360 "/>
+<polyline style="fill:none;stroke:green;" points="7647,-392 7651,-396 "/>
+<text text-anchor="middle" x="7553" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorGWAS</text>
+</g>
+<!-- t36->t118 -->
+<g id="edge205" class="edge"><title>t36->t118</title>
+<path style="fill:none;stroke:gray;" d="M7461,-444C7477,-432 7500,-416 7519,-402"/>
+<polygon style="fill:gray;stroke:gray;" points="7521.1,-404.8 7527,-396 7516.9,-399.2 7521.1,-404.8"/>
+</g>
+<!-- t119 -->
+<g id="node229" class="node"><title>t119</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7883,-396 7673,-396 7669,-392 7669,-360 7879,-360 7883,-364 7883,-396"/>
+<polyline style="fill:none;stroke:green;" points="7879,-392 7669,-392 "/>
+<polyline style="fill:none;stroke:green;" points="7879,-392 7879,-360 "/>
+<polyline style="fill:none;stroke:green;" points="7879,-392 7883,-396 "/>
+<text text-anchor="middle" x="7776" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorSelection</text>
+</g>
+<!-- t36->t119 -->
+<g id="edge209" class="edge"><title>t36->t119</title>
+<path style="fill:none;stroke:gray;" d="M7509,-444C7563,-431 7636,-412 7693,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="7693.88,-401.393 7703,-396 7692.51,-394.529 7693.88,-401.393"/>
+</g>
+<!-- t147 -->
+<g id="node249" class="node"><title>t147</title>
+<polygon style="fill:none;stroke:blue;" points="5737,-396 5417,-396 5413,-392 5413,-360 5733,-360 5737,-364 5737,-396"/>
+<polyline style="fill:none;stroke:blue;" points="5733,-392 5413,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="5733,-392 5733,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="5733,-392 5737,-396 "/>
+<text text-anchor="middle" x="5575" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorArchitectureWithMotif</text>
+</g>
+<!-- t36->t147 -->
+<g id="edge233" class="edge"><title>t36->t147</title>
+<path style="fill:none;stroke:gray;" d="M7355,-461C7112,-456 6372,-440 5747,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="5747.3,-392.512 5737,-395 5746.6,-399.478 5747.3,-392.512"/>
+</g>
+<!-- t148 -->
+<g id="node253" class="node"><title>t148</title>
+<polygon style="fill:none;stroke:blue;" points="5395,-396 5049,-396 5045,-392 5045,-360 5391,-360 5395,-364 5395,-396"/>
+<polyline style="fill:none;stroke:blue;" points="5391,-392 5045,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="5391,-392 5391,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="5391,-392 5395,-396 "/>
+<text text-anchor="middle" x="5220" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorArchitectureWithoutMotif</text>
+</g>
+<!-- t36->t148 -->
+<g id="edge237" class="edge"><title>t36->t148</title>
+<path style="fill:none;stroke:gray;" d="M7355,-461C7081,-458 6170,-445 5405,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="5405.3,-392.512 5395,-395 5404.6,-399.478 5405.3,-392.512"/>
+</g>
+<!-- t139 -->
+<g id="node284" class="node"><title>t139</title>
+<polygon style="fill:none;stroke:blue;" points="6771,-396 6573,-396 6569,-392 6569,-360 6767,-360 6771,-364 6771,-396"/>
+<polyline style="fill:none;stroke:blue;" points="6767,-392 6569,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="6767,-392 6767,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="6767,-392 6771,-396 "/>
+<text text-anchor="middle" x="6670" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIGO</text>
+</g>
+<!-- t36->t139 -->
+<g id="edge277" class="edge"><title>t36->t139</title>
+<path style="fill:none;stroke:gray;" d="M7355,-455C7233,-445 6995,-424 6781,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6781.3,-392.512 6771,-395 6780.6,-399.478 6781.3,-392.512"/>
+</g>
+<!-- t138 -->
+<g id="node288" class="node"><title>t138</title>
+<polygon style="fill:none;stroke:blue;" points="6247,-396 6011,-396 6007,-392 6007,-360 6243,-360 6247,-364 6247,-396"/>
+<polyline style="fill:none;stroke:blue;" points="6243,-392 6007,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="6243,-392 6243,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="6243,-392 6247,-396 "/>
+<text text-anchor="middle" x="6127" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIGOSlim</text>
+</g>
+<!-- t36->t138 -->
+<g id="edge281" class="edge"><title>t36->t138</title>
+<path style="fill:none;stroke:gray;" d="M7355,-460C7165,-454 6676,-436 6257,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6257.3,-392.512 6247,-395 6256.6,-399.478 6257.3,-392.512"/>
+</g>
+<!-- t141 -->
+<g id="node292" class="node"><title>t141</title>
+<polygon style="fill:none;stroke:blue;" points="7055,-396 6793,-396 6789,-392 6789,-360 7051,-360 7055,-364 7055,-396"/>
+<polyline style="fill:none;stroke:blue;" points="7051,-392 6789,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="7051,-392 7051,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="7051,-392 7055,-396 "/>
+<text text-anchor="middle" x="6922" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIOverlapGO</text>
+</g>
+<!-- t36->t141 -->
+<g id="edge285" class="edge"><title>t36->t141</title>
+<path style="fill:none;stroke:gray;" d="M7355,-449C7271,-435 7138,-414 7042,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="7042.49,-394.529 7032,-396 7041.12,-401.393 7042.49,-394.529"/>
+</g>
+<!-- t140 -->
+<g id="node296" class="node"><title>t140</title>
+<polygon style="fill:none;stroke:blue;" points="7377,-396 7077,-396 7073,-392 7073,-360 7373,-360 7377,-364 7377,-396"/>
+<polyline style="fill:none;stroke:blue;" points="7373,-392 7073,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="7373,-392 7373,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="7373,-392 7377,-396 "/>
+<text text-anchor="middle" x="7225" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIOverlapGOSlim</text>
+</g>
+<!-- t36->t140 -->
+<g id="edge289" class="edge"><title>t36->t140</title>
+<path style="fill:none;stroke:gray;" d="M7391,-444C7359,-431 7314,-413 7280,-400"/>
+<polygon style="fill:gray;stroke:gray;" points="7281.56,-396.863 7271,-396 7278.72,-403.26 7281.56,-396.863"/>
+</g>
+<!-- t37 -->
+<g id="node5" class="node"><title>t37</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2618,-480 2498,-480 2494,-476 2494,-444 2614,-444 2618,-448 2618,-480"/>
+<polyline style="fill:none;stroke:green;" points="2614,-476 2494,-476 "/>
+<polyline style="fill:none;stroke:green;" points="2614,-476 2614,-444 "/>
+<polyline style="fill:none;stroke:green;" points="2614,-476 2618,-480 "/>
+<text text-anchor="middle" x="2556" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildGeneSet</text>
+</g>
+<!-- t38 -->
+<g id="node27" class="node"><title>t38</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2631,-396 2485,-396 2481,-392 2481,-360 2627,-360 2631,-364 2631,-396"/>
+<polyline style="fill:none;stroke:green;" points="2627,-392 2481,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2627,-392 2627,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2627,-392 2631,-396 "/>
+<text text-anchor="middle" x="2556" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importGeneStats</text>
+</g>
+<!-- t37->t38 -->
+<g id="edge15" class="edge"><title>t37->t38</title>
+<path style="fill:none;stroke:gray;" d="M2556,-444C2556,-433 2556,-419 2556,-406"/>
+<polygon style="fill:gray;stroke:gray;" points="2559.5,-406 2556,-396 2552.5,-406 2559.5,-406"/>
+</g>
+<!-- t37->t181 -->
+<g id="edge353" class="edge"><title>t37->t181</title>
+<path style="fill:none;stroke:gray;" d="M2528,-444C2510,-432 2488,-415 2472,-396 2461,-382 2468,-369 2452,-360 2390,-322 1216,-363 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t176 -->
+<g id="node188" class="node"><title>t176</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8328,-312 7984,-312 7980,-308 7980,-276 8324,-276 8328,-280 8328,-312"/>
+<polyline style="fill:none;stroke:green;" points="8324,-308 7980,-308 "/>
+<polyline style="fill:none;stroke:green;" points="8324,-308 8324,-276 "/>
+<polyline style="fill:none;stroke:green;" points="8324,-308 8328,-312 "/>
+<text text-anchor="middle" x="8154" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importOverlapRegionsOfInterestEnsembl</text>
+</g>
+<!-- t37->t176 -->
+<g id="edge169" class="edge"><title>t37->t176</title>
+<path style="fill:none;stroke:gray;" d="M2618,-450C2679,-440 2776,-424 2860,-418 2986,-408 7265,-429 7386,-396 7416,-387 7417,-369 7446,-360 7546,-330 7809,-349 7911,-338 7960,-332 8015,-323 8059,-314"/>
+<polygon style="fill:gray;stroke:gray;" points="8059.88,-317.393 8069,-312 8058.51,-310.529 8059.88,-317.393"/>
+</g>
+<!-- t32 -->
+<g id="node6" class="node"><title>t32</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4626,-590 4484,-590 4480,-586 4480,-554 4622,-554 4626,-558 4626,-590"/>
+<polyline style="fill:none;stroke:green;" points="4622,-586 4480,-586 "/>
+<polyline style="fill:none;stroke:green;" points="4622,-586 4622,-554 "/>
+<polyline style="fill:none;stroke:green;" points="4622,-586 4626,-590 "/>
+<text text-anchor="middle" x="4553" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildTranscripts</text>
+</g>
+<!-- t40 -->
+<g id="node7" class="node"><title>t40</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4662,-480 4472,-480 4468,-476 4468,-444 4658,-444 4662,-448 4662,-480"/>
+<polyline style="fill:none;stroke:green;" points="4658,-476 4468,-476 "/>
+<polyline style="fill:none;stroke:green;" points="4658,-476 4658,-444 "/>
+<polyline style="fill:none;stroke:green;" points="4658,-476 4662,-480 "/>
+<text text-anchor="middle" x="4565" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildPromotorRegions</text>
+</g>
+<!-- t32->t40 -->
+<g id="edge3" class="edge"><title>t32->t40</title>
+<path style="fill:none;stroke:gray;" d="M4555,-554C4557,-537 4560,-511 4562,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="4565.49,-490.299 4563,-480 4558.52,-489.602 4565.49,-490.299"/>
+</g>
+<!-- t41 -->
+<g id="node9" class="node"><title>t41</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4836,-480 4684,-480 4680,-476 4680,-444 4832,-444 4836,-448 4836,-480"/>
+<polyline style="fill:none;stroke:green;" points="4832,-476 4680,-476 "/>
+<polyline style="fill:none;stroke:green;" points="4832,-476 4832,-444 "/>
+<polyline style="fill:none;stroke:green;" points="4832,-476 4836,-480 "/>
+<text text-anchor="middle" x="4758" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildTSSRegions</text>
+</g>
+<!-- t32->t41 -->
+<g id="edge5" class="edge"><title>t32->t41</title>
+<path style="fill:none;stroke:gray;" d="M4587,-554C4622,-535 4677,-506 4715,-485"/>
+<polygon style="fill:gray;stroke:gray;" points="4716.96,-487.916 4724,-480 4713.56,-481.797 4716.96,-487.916"/>
+</g>
+<!-- t34 -->
+<g id="node12" class="node"><title>t34</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3027,-480 2873,-480 2869,-476 2869,-444 3023,-444 3027,-448 3027,-480"/>
+<polyline style="fill:none;stroke:green;" points="3023,-476 2869,-476 "/>
+<polyline style="fill:none;stroke:green;" points="3023,-476 3023,-444 "/>
+<polyline style="fill:none;stroke:green;" points="3023,-476 3027,-480 "/>
+<text text-anchor="middle" x="2948" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importTranscripts</text>
+</g>
+<!-- t32->t34 -->
+<g id="edge7" class="edge"><title>t32->t34</title>
+<path style="fill:none;stroke:gray;" d="M4480,-565C4372,-555 4166,-536 3989,-528 3884,-522 3139,-531 3036,-506 3019,-502 3001,-494 2986,-485"/>
+<polygon style="fill:gray;stroke:gray;" points="2987.44,-481.797 2977,-480 2984.04,-487.916 2987.44,-481.797"/>
+</g>
+<!-- t32->t181 -->
+<g id="edge363" class="edge"><title>t32->t181</title>
+<path style="fill:none;stroke:gray;" d="M4480,-565C4372,-555 4166,-536 3989,-528 3864,-521 2982,-537 2860,-506 2760,-479 2725,-472 2655,-396 2644,-383 2655,-369 2640,-360 2572,-313 1228,-366 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t40->t42 -->
+<g id="edge337" class="edge"><title>t40->t42</title>
+<path style="fill:none;stroke:gray;" d="M4599,-444C4619,-434 4646,-423 4671,-418 4814,-388 9690,-407 9987,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9987.4,-399.478 9997,-395 9986.7,-392.512 9987.4,-399.478"/>
+</g>
+<!-- t40->t181 -->
+<g id="edge347" class="edge"><title>t40->t181</title>
+<path style="fill:none;stroke:gray;" d="M4531,-444C4511,-434 4484,-423 4459,-418 4374,-400 2964,-434 2885,-396 2867,-386 2876,-369 2857,-360 2773,-317 1240,-370 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t41->t42 -->
+<g id="edge11" class="edge"><title>t41->t42</title>
+<path style="fill:none;stroke:gray;" d="M4836,-454C4943,-443 5139,-425 5307,-418 5824,-395 9426,-438 9987,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9987.4,-399.478 9997,-395 9986.7,-392.512 9987.4,-399.478"/>
+</g>
+<!-- t41->t181 -->
+<g id="edge345" class="edge"><title>t41->t181</title>
+<path style="fill:none;stroke:gray;" d="M4731,-444C4715,-434 4693,-423 4671,-418 4624,-406 2943,-421 2900,-396 2885,-386 2896,-369 2880,-360 2798,-310 1241,-370 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t35 -->
+<g id="node11" class="node"><title>t35</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2443,-396 2327,-396 2323,-392 2323,-360 2439,-360 2443,-364 2443,-396"/>
+<polyline style="fill:none;stroke:green;" points="2439,-392 2323,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2439,-392 2439,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2439,-392 2443,-396 "/>
+<text text-anchor="middle" x="2383" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildRepeats</text>
+</g>
+<!-- t35->t181 -->
+<g id="edge357" class="edge"><title>t35->t181</title>
+<path style="fill:none;stroke:gray;" d="M2323,-362C2318,-361 2314,-361 2309,-360 2246,-351 1209,-360 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t34->t181 -->
+<g id="edge359" class="edge"><title>t34->t181</title>
+<path style="fill:none;stroke:gray;" d="M2923,-444C2906,-431 2883,-413 2865,-396 2851,-381 2854,-368 2835,-360 2751,-319 1238,-369 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t8 -->
+<g id="node14" class="node"><title>t8</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3105,-590 2895,-590 2891,-586 2891,-554 3101,-554 3105,-558 3105,-590"/>
+<polyline style="fill:none;stroke:green;" points="3101,-586 2891,-586 "/>
+<polyline style="fill:none;stroke:green;" points="3101,-586 3101,-554 "/>
+<polyline style="fill:none;stroke:green;" points="3101,-586 3105,-590 "/>
+<text text-anchor="middle" x="2998" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importRegionsOfInterest</text>
+</g>
+<!-- t14 -->
+<g id="node17" class="node"><title>t14</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3257,-480 3049,-480 3045,-476 3045,-444 3253,-444 3257,-448 3257,-480"/>
+<polyline style="fill:none;stroke:green;" points="3253,-476 3045,-476 "/>
+<polyline style="fill:none;stroke:green;" points="3253,-476 3253,-444 "/>
+<polyline style="fill:none;stroke:green;" points="3253,-476 3257,-480 "/>
+<text text-anchor="middle" x="3151" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">exportRegionsOfInterest</text>
+</g>
+<!-- t8->t14 -->
+<g id="edge335" class="edge"><title>t8->t14</title>
+<path style="fill:none;stroke:gray;" d="M3023,-554C3049,-536 3090,-507 3118,-486"/>
+<polygon style="fill:gray;stroke:gray;" points="3120.1,-488.8 3126,-480 3115.9,-483.2 3120.1,-488.8"/>
+</g>
+<!-- t12 -->
+<g id="node30" class="node"><title>t12</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2310,-480 2050,-480 2046,-476 2046,-444 2306,-444 2310,-448 2310,-480"/>
+<polyline style="fill:none;stroke:green;" points="2306,-476 2046,-476 "/>
+<polyline style="fill:none;stroke:green;" points="2306,-476 2306,-444 "/>
+<polyline style="fill:none;stroke:green;" points="2306,-476 2310,-480 "/>
+<text text-anchor="middle" x="2178" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importRegionsOfInterestGenes</text>
+</g>
+<!-- t8->t12 -->
+<g id="edge343" class="edge"><title>t8->t12</title>
+<path style="fill:none;stroke:gray;" d="M2891,-560C2805,-551 2683,-538 2576,-528 2465,-517 2435,-527 2324,-506 2296,-501 2265,-491 2239,-483"/>
+<polygon style="fill:gray;stroke:gray;" points="2239.58,-479.521 2229,-480 2237.57,-486.226 2239.58,-479.521"/>
+</g>
+<!-- t8->t181 -->
+<g id="edge373" class="edge"><title>t8->t181</title>
+<path style="fill:none;stroke:gray;" d="M2933,-554C2907,-544 2879,-528 2860,-506 2850,-493 2834,-370 2820,-360 2784,-331 1194,-353 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t8->t178 -->
+<g id="edge483" class="edge"><title>t8->t178</title>
+<path style="fill:none;stroke:gray;" d="M3105,-564C3249,-553 3512,-535 3737,-528 5421,-473 5843,-559 7526,-506 8377,-479 8594,-490 9443,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9443.4,-399.478 9453,-395 9442.7,-392.512 9443.4,-399.478"/>
+</g>
+<!-- t10 -->
+<g id="node15" class="node"><title>t10</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2110.05,-559.794 2178,-532.5 2245.95,-559.794 2245.89,-603.956 2110.11,-603.956 2110.05,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="2106.05,-557.091 2178,-528.189 2249.95,-557.091 2249.88,-607.956 2106.12,-607.956 2106.05,-557.091"/>
+<text text-anchor="middle" x="2178" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importGWAS</text>
+</g>
+<!-- t10->t14 -->
+<g id="edge333" class="edge"><title>t10->t14</title>
+<path style="fill:none;stroke:gray;" d="M2239,-553C2271,-543 2311,-533 2348,-528 2500,-506 2887,-536 3036,-506 3059,-502 3083,-493 3104,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="3105.28,-487.26 3113,-480 3102.44,-480.863 3105.28,-487.26"/>
+</g>
+<!-- t10->t12 -->
+<g id="edge341" class="edge"><title>t10->t12</title>
+<path style="fill:none;stroke:gray;" d="M2178,-528C2178,-515 2178,-502 2178,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="2181.5,-490 2178,-480 2174.5,-490 2181.5,-490"/>
+</g>
+<!-- t10->t181 -->
+<g id="edge371" class="edge"><title>t10->t181</title>
+<path style="fill:none;stroke:gray;" d="M2119,-552C2092,-541 2061,-525 2037,-506 1990,-467 1988,-447 1954,-396 1945,-380 1950,-369 1934,-360 1860,-314 1232,-368 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t11 -->
+<g id="node16" class="node"><title>t11</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2360.83,-559.794 2462,-532.5 2563.17,-559.794 2563.08,-603.956 2360.92,-603.956 2360.83,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="2356.84,-556.728 2462,-528.358 2567.16,-556.728 2567.05,-607.956 2356.95,-607.956 2356.84,-556.728"/>
+<text text-anchor="middle" x="2462" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importMergedGWAS</text>
+</g>
+<!-- t11->t14 -->
+<g id="edge9" class="edge"><title>t11->t14</title>
+<path style="fill:none;stroke:gray;" d="M2567,-561C2720,-546 2992,-517 3036,-506 3058,-500 3082,-492 3102,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="3103.28,-487.26 3111,-480 3100.44,-480.863 3103.28,-487.26"/>
+</g>
+<!-- t11->t12 -->
+<g id="edge17" class="edge"><title>t11->t12</title>
+<path style="fill:none;stroke:gray;" d="M2395,-546C2346,-527 2280,-502 2234,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="2235.56,-480.863 2225,-480 2232.72,-487.26 2235.56,-480.863"/>
+</g>
+<!-- t14->t181 -->
+<g id="edge365" class="edge"><title>t14->t181</title>
+<path style="fill:none;stroke:gray;" d="M3114,-444C3092,-434 3063,-423 3036,-418 2956,-401 2373,-431 2299,-396 2281,-386 2288,-369 2269,-360 2157,-306 1267,-380 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t121 -->
+<g id="node221" class="node"><title>t121</title>
+<polygon style="fill:none;stroke:blue;" points="6551,-396 6269,-396 6265,-392 6265,-360 6547,-360 6551,-364 6551,-396"/>
+<polyline style="fill:none;stroke:blue;" points="6547,-392 6265,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="6547,-392 6547,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="6547,-392 6551,-396 "/>
+<text text-anchor="middle" x="6408" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterest</text>
+</g>
+<!-- t14->t121 -->
+<g id="edge201" class="edge"><title>t14->t121</title>
+<path style="fill:none;stroke:gray;" d="M3257,-452C3374,-441 3569,-424 3736,-418 4293,-394 5682,-431 6255,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6255.4,-399.478 6265,-395 6254.7,-392.512 6255.4,-399.478"/>
+</g>
+<!-- t160 -->
+<g id="node236" class="node"><title>t160</title>
+<polygon style="fill:none;stroke:blue;" points="3609,-396 3297,-396 3293,-392 3293,-360 3605,-360 3609,-364 3609,-396"/>
+<polyline style="fill:none;stroke:blue;" points="3605,-392 3293,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="3605,-392 3605,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="3605,-392 3609,-396 "/>
+<text text-anchor="middle" x="3451" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestTop</text>
+</g>
+<!-- t14->t160 -->
+<g id="edge217" class="edge"><title>t14->t160</title>
+<path style="fill:none;stroke:gray;" d="M3215,-444C3263,-431 3327,-413 3376,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="3377.43,-402.226 3386,-396 3375.42,-395.521 3377.43,-402.226"/>
+</g>
+<!-- t161 -->
+<g id="node240" class="node"><title>t161</title>
+<polygon style="fill:none;stroke:blue;" points="3255,-396 2913,-396 2909,-392 2909,-360 3251,-360 3255,-364 3255,-396"/>
+<polyline style="fill:none;stroke:blue;" points="3251,-392 2909,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="3251,-392 3251,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="3251,-392 3255,-396 "/>
+<text text-anchor="middle" x="3082" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestBottom</text>
+</g>
+<!-- t14->t161 -->
+<g id="edge221" class="edge"><title>t14->t161</title>
+<path style="fill:none;stroke:gray;" d="M3136,-444C3127,-432 3114,-417 3103,-404"/>
+<polygon style="fill:gray;stroke:gray;" points="3105.8,-401.9 3097,-396 3100.2,-406.1 3105.8,-401.9"/>
+</g>
+<!-- t151 -->
+<g id="node265" class="node"><title>t151</title>
+<polygon style="fill:none;stroke:blue;" points="4411,-396 4045,-396 4041,-392 4041,-360 4407,-360 4411,-364 4411,-396"/>
+<polyline style="fill:none;stroke:blue;" points="4407,-392 4041,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="4407,-392 4407,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="4407,-392 4411,-396 "/>
+<text text-anchor="middle" x="4226" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestWithMotif</text>
+</g>
+<!-- t14->t151 -->
+<g id="edge249" class="edge"><title>t14->t151</title>
+<path style="fill:none;stroke:gray;" d="M3257,-454C3374,-445 3569,-430 3736,-418 3865,-408 3899,-406 4031,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4031.4,-399.478 4041,-395 4030.7,-392.512 4031.4,-399.478"/>
+</g>
+<!-- t152 -->
+<g id="node269" class="node"><title>t152</title>
+<polygon style="fill:none;stroke:blue;" points="4023,-396 3631,-396 3627,-392 3627,-360 4019,-360 4023,-364 4023,-396"/>
+<polyline style="fill:none;stroke:blue;" points="4019,-392 3627,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="4019,-392 4019,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="4019,-392 4023,-396 "/>
+<text text-anchor="middle" x="3825" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestWithoutMotif</text>
+</g>
+<!-- t14->t152 -->
+<g id="edge253" class="edge"><title>t14->t152</title>
+<path style="fill:none;stroke:gray;" d="M3257,-449C3368,-435 3543,-413 3670,-397"/>
+<polygon style="fill:gray;stroke:gray;" points="3670.4,-400.478 3680,-396 3669.7,-393.512 3670.4,-400.478"/>
+</g>
+<!-- t13 -->
+<g id="node19" class="node"><title>t13</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3749.02,-449.794 3854,-422.5 3958.98,-449.794 3958.88,-493.956 3749.12,-493.956 3749.02,-449.794"/>
+<polygon style="fill:none;stroke:green;" points="3745.03,-446.699 3854,-418.368 3962.97,-446.699 3962.85,-497.956 3745.15,-497.956 3745.03,-446.699"/>
+<text text-anchor="middle" x="3854" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importSNPsOfInterest</text>
+</g>
+<!-- t13->t181 -->
+<g id="edge367" class="edge"><title>t13->t181</title>
+<path style="fill:none;stroke:gray;" d="M3745,-452C3627,-442 3433,-427 3266,-418 3201,-414 2738,-427 2680,-396 2663,-386 2673,-369 2655,-360 2582,-320 1229,-366 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t179 -->
+<g id="node182" class="node"><title>t179</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9435,-396 9269,-396 9265,-392 9265,-360 9431,-360 9435,-364 9435,-396"/>
+<polyline style="fill:none;stroke:green;" points="9431,-392 9265,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9431,-392 9431,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9431,-392 9435,-396 "/>
+<text text-anchor="middle" x="9350" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildSNPCoverage</text>
+</g>
+<!-- t13->t179 -->
+<g id="edge481" class="edge"><title>t13->t179</title>
+<path style="fill:none;stroke:gray;" d="M3959,-446C4029,-435 4124,-423 4207,-418 4764,-382 8652,-449 9255,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9255.4,-399.478 9265,-395 9254.7,-392.512 9255.4,-399.478"/>
+</g>
+<!-- t42->t181 -->
+<g id="edge19" class="edge"><title>t42->t181</title>
+<path style="fill:none;stroke:gray;" d="M9997,-361C9994,-361 9991,-360 9988,-360 9866,-349 1281,-373 1163,-338 1149,-334 1137,-326 1125,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="1127.1,-315.2 1117,-312 1122.9,-320.8 1127.1,-315.2"/>
+</g>
+<!-- t60 -->
+<g id="node141" class="node"><title>t60</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10088.2,-281.794 10154,-254.5 10219.8,-281.794 10219.7,-325.956 10088.3,-325.956 10088.2,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="10084.2,-279.124 10154,-250.169 10223.8,-279.124 10223.7,-329.956 10084.3,-329.956 10084.2,-279.124"/>
+<text text-anchor="middle" x="10154" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">buildOverlap</text>
+</g>
+<!-- t42->t60 -->
+<g id="edge125" class="edge"><title>t42->t60</title>
+<path style="fill:none;stroke:gray;" d="M10110,-360C10114,-354 10119,-346 10124,-339"/>
+<polygon style="fill:gray;stroke:gray;" points="10127.4,-340.262 10130,-330 10121.5,-336.379 10127.4,-340.262"/>
+</g>
+<!-- t62 -->
+<g id="node145" class="node"><title>t62</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10693.9,-281.794 10788,-254.5 10882.1,-281.794 10882,-325.956 10694,-325.956 10693.9,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="10689.9,-278.787 10788,-250.337 10886.1,-278.787 10886,-329.956 10690,-329.956 10689.9,-278.787"/>
+<text text-anchor="middle" x="10788" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">buildUCSCOverlap</text>
+</g>
+<!-- t42->t62 -->
+<g id="edge129" class="edge"><title>t42->t62</title>
+<path style="fill:none;stroke:gray;" d="M10199,-368C10234,-365 10273,-362 10308,-360 10474,-348 10520,-374 10681,-338 10687,-337 10693,-335 10698,-333"/>
+<polygon style="fill:gray;stroke:gray;" points="10699.4,-336.226 10708,-330 10697.4,-329.521 10699.4,-336.226"/>
+</g>
+<!-- t3 -->
+<g id="node22" class="node"><title>t3</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3980,-590 3750,-590 3746,-586 3746,-554 3976,-554 3980,-558 3980,-590"/>
+<polyline style="fill:none;stroke:green;" points="3976,-586 3746,-586 "/>
+<polyline style="fill:none;stroke:green;" points="3976,-586 3976,-554 "/>
+<polyline style="fill:none;stroke:green;" points="3976,-586 3980,-590 "/>
+<text text-anchor="middle" x="3863" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importUCSCEncodeTracks</text>
+</g>
+<!-- t5 -->
+<g id="node23" class="node"><title>t5</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4450,-480 4220,-480 4216,-476 4216,-444 4446,-444 4450,-448 4450,-480"/>
+<polyline style="fill:none;stroke:green;" points="4446,-476 4216,-476 "/>
+<polyline style="fill:none;stroke:green;" points="4446,-476 4446,-444 "/>
+<polyline style="fill:none;stroke:green;" points="4446,-476 4450,-480 "/>
+<text text-anchor="middle" x="4333" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">exportUCSCEncodeTracks</text>
+</g>
+<!-- t3->t5 -->
+<g id="edge13" class="edge"><title>t3->t5</title>
+<path style="fill:none;stroke:gray;" d="M3940,-554C4024,-535 4159,-503 4246,-482"/>
+<polygon style="fill:gray;stroke:gray;" points="4246.88,-485.393 4256,-480 4245.51,-478.529 4246.88,-485.393"/>
+</g>
+<!-- t3->t181 -->
+<g id="edge381" class="edge"><title>t3->t181</title>
+<path style="fill:none;stroke:gray;" d="M3746,-564C3599,-554 3338,-538 3114,-528 3045,-524 2553,-524 2485,-506 2405,-484 2395,-454 2319,-418 2298,-407 2288,-411 2269,-396 2254,-383 2262,-369 2244,-360 2191,-331 1208,-358 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t5->t181 -->
+<g id="edge379" class="edge"><title>t5->t181</title>
+<path style="fill:none;stroke:gray;" d="M4216,-444C4145,-434 4054,-423 3972,-418 3927,-414 2355,-418 2314,-396 2298,-386 2309,-369 2292,-360 2237,-328 1210,-359 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t5->t62 -->
+<g id="edge475" class="edge"><title>t5->t62</title>
+<path style="fill:none;stroke:gray;" d="M4374,-444C4398,-434 4430,-423 4459,-418 4502,-410 10633,-419 10668,-396 10683,-386 10672,-373 10683,-360 10691,-351 10699,-343 10709,-336"/>
+<polygon style="fill:gray;stroke:gray;" points="10711.1,-338.8 10717,-330 10706.9,-333.2 10711.1,-338.8"/>
+</g>
+<!-- t120 -->
+<g id="node201" class="node"><title>t120</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5949,-396 5759,-396 5755,-392 5755,-360 5945,-360 5949,-364 5949,-396"/>
+<polyline style="fill:none;stroke:green;" points="5945,-392 5755,-392 "/>
+<polyline style="fill:none;stroke:green;" points="5945,-392 5945,-360 "/>
+<polyline style="fill:none;stroke:green;" points="5945,-392 5949,-396 "/>
+<text text-anchor="middle" x="5852" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTracks</text>
+</g>
+<!-- t5->t120 -->
+<g id="edge181" class="edge"><title>t5->t120</title>
+<path style="fill:none;stroke:gray;" d="M4374,-444C4398,-434 4430,-423 4459,-418 4597,-393 5564,-408 5745,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="5745.4,-399.478 5755,-395 5744.7,-392.512 5745.4,-399.478"/>
+</g>
+<!-- t149 -->
+<g id="node257" class="node"><title>t149</title>
+<polygon style="fill:none;stroke:blue;" points="4707,-396 4433,-396 4429,-392 4429,-360 4703,-360 4707,-364 4707,-396"/>
+<polyline style="fill:none;stroke:blue;" points="4703,-392 4429,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="4703,-392 4703,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="4703,-392 4707,-396 "/>
+<text text-anchor="middle" x="4568" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorTracksWithMotif</text>
+</g>
+<!-- t5->t149 -->
+<g id="edge241" class="edge"><title>t5->t149</title>
+<path style="fill:none;stroke:gray;" d="M4383,-444C4419,-431 4469,-413 4507,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="4508.43,-402.226 4517,-396 4506.42,-395.521 4508.43,-402.226"/>
+</g>
+<!-- t150 -->
+<g id="node261" class="node"><title>t150</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5027,-396 4729,-396 4725,-392 4725,-360 5023,-360 5027,-364 5027,-396"/>
+<polyline style="fill:none;stroke:green;" points="5023,-392 4725,-392 "/>
+<polyline style="fill:none;stroke:green;" points="5023,-392 5023,-360 "/>
+<polyline style="fill:none;stroke:green;" points="5023,-392 5027,-396 "/>
+<text text-anchor="middle" x="4876" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTracksWithoutMotif</text>
+</g>
+<!-- t5->t150 -->
+<g id="edge245" class="edge"><title>t5->t150</title>
+<path style="fill:none;stroke:gray;" d="M4376,-444C4400,-435 4431,-424 4459,-418 4569,-395 4600,-406 4715,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4715.4,-399.478 4725,-395 4714.7,-392.512 4715.4,-399.478"/>
+</g>
+<!-- t6 -->
+<g id="node25" class="node"><title>t6</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1925,-396 1685,-396 1681,-392 1681,-360 1921,-360 1925,-364 1925,-396"/>
+<polyline style="fill:none;stroke:green;" points="1921,-392 1681,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1921,-392 1921,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1921,-392 1925,-396 "/>
+<text text-anchor="middle" x="1803" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importTranscriptInformation</text>
+</g>
+<!-- t6->t181 -->
+<g id="edge377" class="edge"><title>t6->t181</title>
+<path style="fill:none;stroke:gray;" d="M1681,-361C1678,-361 1675,-360 1672,-360 1614,-354 1203,-359 1148,-338 1137,-333 1128,-326 1119,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.55,-316.596 1112,-312 1116.6,-321.546 1121.55,-316.596"/>
+</g>
+<!-- t7 -->
+<g id="node26" class="node"><title>t7</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1663,-396 1463,-396 1459,-392 1459,-360 1659,-360 1663,-364 1663,-396"/>
+<polyline style="fill:none;stroke:green;" points="1659,-392 1459,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1659,-392 1659,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1659,-392 1663,-396 "/>
+<text text-anchor="middle" x="1561" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importGeneInformation</text>
+</g>
+<!-- t7->t181 -->
+<g id="edge375" class="edge"><title>t7->t181</title>
+<path style="fill:none;stroke:gray;" d="M1459,-361C1454,-361 1450,-360 1445,-360 1380,-352 1210,-363 1148,-338 1137,-333 1128,-326 1119,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.55,-316.596 1112,-312 1116.6,-321.546 1121.55,-316.596"/>
+</g>
+<!-- t38->t181 -->
+<g id="edge351" class="edge"><title>t38->t181</title>
+<path style="fill:none;stroke:gray;" d="M2481,-362C2476,-361 2471,-360 2467,-360 2395,-352 1217,-363 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t33 -->
+<g id="node29" class="node"><title>t33</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2225,-396 1967,-396 1963,-392 1963,-360 2221,-360 2225,-364 2225,-396"/>
+<polyline style="fill:none;stroke:green;" points="2221,-392 1963,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2221,-392 2221,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2221,-392 2225,-396 "/>
+<text text-anchor="middle" x="2094" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildAnnotatorGeneTerritories</text>
+</g>
+<!-- t33->t181 -->
+<g id="edge361" class="edge"><title>t33->t181</title>
+<path style="fill:none;stroke:gray;" d="M1963,-361C1958,-361 1954,-360 1949,-360 1861,-353 1232,-369 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t12->t181 -->
+<g id="edge369" class="edge"><title>t12->t181</title>
+<path style="fill:none;stroke:gray;" d="M2201,-444C2225,-421 2259,-384 2234,-360 2192,-316 1205,-359 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t191 -->
+<g id="node314" class="node"><title>t191</title>
+<polygon style="fill:#fff68f;stroke:black;" points="3551,-60 3501,-60 3497,-56 3497,-24 3547,-24 3551,-28 3551,-60"/>
+<polyline style="fill:none;stroke:black;" points="3547,-56 3497,-56 "/>
+<polyline style="fill:none;stroke:black;" points="3547,-56 3547,-24 "/>
+<polyline style="fill:none;stroke:black;" points="3547,-56 3551,-60 "/>
+<text text-anchor="middle" x="3524" y="-35.5" style="font-family:Times New Roman;font-size:20.00;">full</text>
+</g>
+<!-- t181->t191 -->
+<g id="edge331" class="edge"><title>t181->t191</title>
+<path style="fill:none;stroke:blue;" d="M1113,-276C1123,-266 1136,-255 1150,-250 1206,-227 1368,-257 1420,-228 1462,-204 1441,-163 1483,-140 1575,-90 3218,-49 3487,-43"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-46.5001 3497,-43 3487,-39.5001 3487,-46.5001"/>
+</g>
+<!-- t17 -->
+<g id="node36" class="node"><title>t17</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9753,-1009 9617,-1009 9613,-1005 9613,-973 9749,-973 9753,-977 9753,-1009"/>
+<polyline style="fill:none;stroke:green;" points="9749,-1005 9613,-1005 "/>
+<polyline style="fill:none;stroke:green;" points="9749,-1005 9749,-973 "/>
+<polyline style="fill:none;stroke:green;" points="9749,-1005 9753,-1009 "/>
+<text text-anchor="middle" x="9683" y="-984.5" style="font-family:Times New Roman;font-size:20.00;">buildBAMStats</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge23" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:gray;" d="M5835,-1075C6252,-1066 9086,-1004 9603,-993"/>
+<polygon style="fill:gray;stroke:gray;" points="9603,-996.5 9613,-993 9603,-989.5 9603,-996.5"/>
+</g>
+<!-- t18 -->
+<g id="node38" class="node"><title>t18</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10048,-924 9810,-924 9806,-920 9806,-888 10044,-888 10048,-892 10048,-924"/>
+<polyline style="fill:none;stroke:green;" points="10044,-920 9806,-920 "/>
+<polyline style="fill:none;stroke:green;" points="10044,-920 10044,-888 "/>
+<polyline style="fill:none;stroke:green;" points="10044,-920 10048,-924 "/>
+<text text-anchor="middle" x="9927" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">normalizeBAMPerReplicate</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge25" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:gray;" d="M9735,-973C9773,-960 9825,-942 9865,-927"/>
+<polygon style="fill:gray;stroke:gray;" points="9866.43,-930.226 9875,-924 9864.42,-923.521 9866.43,-930.226"/>
+</g>
+<!-- t20 -->
+<g id="node40" class="node"><title>t20</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9788,-924 9582,-924 9578,-920 9578,-888 9784,-888 9788,-892 9788,-924"/>
+<polyline style="fill:none;stroke:green;" points="9784,-920 9578,-920 "/>
+<polyline style="fill:none;stroke:green;" points="9784,-920 9784,-888 "/>
+<polyline style="fill:none;stroke:green;" points="9784,-920 9788,-924 "/>
+<text text-anchor="middle" x="9683" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">normalizeBAMControls</text>
+</g>
+<!-- t17->t20 -->
+<g id="edge27" class="edge"><title>t17->t20</title>
+<path style="fill:none;stroke:gray;" d="M9683,-973C9683,-962 9683,-947 9683,-934"/>
+<polygon style="fill:gray;stroke:gray;" points="9686.5,-934 9683,-924 9679.5,-934 9686.5,-934"/>
+</g>
+<!-- t21 -->
+<g id="node42" class="node"><title>t21</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9917,-858 9823,-858 9819,-854 9819,-822 9913,-822 9917,-826 9917,-858"/>
+<polyline style="fill:none;stroke:green;" points="9913,-854 9819,-854 "/>
+<polyline style="fill:none;stroke:green;" points="9913,-854 9913,-822 "/>
+<polyline style="fill:none;stroke:green;" points="9913,-854 9917,-858 "/>
+<text text-anchor="middle" x="9868" y="-833.5" style="font-family:Times New Roman;font-size:20.00;">runMACS</text>
+</g>
+<!-- t18->t21 -->
+<g id="edge385" class="edge"><title>t18->t21</title>
+<path style="fill:none;stroke:gray;" d="M9911,-888C9905,-881 9898,-873 9891,-866"/>
+<polygon style="fill:gray;stroke:gray;" points="9893.22,-863.221 9884,-858 9887.95,-867.831 9893.22,-863.221"/>
+</g>
+<!-- t29 -->
+<g id="node54" class="node"><title>t29</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10462,-312 10246,-312 10242,-308 10242,-276 10458,-276 10462,-280 10462,-312"/>
+<polyline style="fill:none;stroke:green;" points="10458,-308 10242,-308 "/>
+<polyline style="fill:none;stroke:green;" points="10458,-308 10458,-276 "/>
+<polyline style="fill:none;stroke:green;" points="10458,-308 10462,-312 "/>
+<text text-anchor="middle" x="10352" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importCombinedIntervals</text>
+</g>
+<!-- t18->t29 -->
+<g id="edge393" class="edge"><title>t18->t29</title>
+<path style="fill:none;stroke:gray;" d="M9939,-888C9957,-860 9987,-806 9987,-756 9987,-756 9987,-756 9987,-664 9987,-554 9930,-496 10006,-418 10040,-384 10175,-413 10218,-396 10242,-386 10243,-375 10263,-360 10282,-346 10302,-330 10319,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="10321.1,-320.8 10327,-312 10316.9,-315.2 10321.1,-320.8"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge29" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:gray;" d="M9734,-888C9757,-879 9785,-870 9809,-861"/>
+<polygon style="fill:gray;stroke:gray;" points="9811.02,-863.964 9819,-857 9808.42,-857.464 9811.02,-863.964"/>
+</g>
+<!-- t22 -->
+<g id="node44" class="node"><title>t22</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10147,-774 10027,-774 10023,-770 10023,-738 10143,-738 10147,-742 10147,-774"/>
+<polyline style="fill:none;stroke:green;" points="10143,-770 10023,-770 "/>
+<polyline style="fill:none;stroke:green;" points="10143,-770 10143,-738 "/>
+<polyline style="fill:none;stroke:green;" points="10143,-770 10147,-774 "/>
+<text text-anchor="middle" x="10085" y="-749.5" style="font-family:Times New Roman;font-size:20.00;">importMACS</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge31" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:gray;" d="M9915,-822C9948,-809 9993,-791 10028,-778"/>
+<polygon style="fill:gray;stroke:gray;" points="10030,-780.964 10038,-774 10027.4,-774.464 10030,-780.964"/>
+</g>
+<!-- t23 -->
+<g id="node56" class="node"><title>t23</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9782.04,-743.794 9868,-716.5 9953.96,-743.794 9953.87,-787.956 9782.13,-787.956 9782.04,-743.794"/>
+<polygon style="fill:none;stroke:green;" points="9778.06,-740.864 9868,-712.305 9957.94,-740.864 9957.85,-791.956 9778.15,-791.956 9778.06,-740.864"/>
+<text text-anchor="middle" x="9868" y="-749.5" style="font-family:Times New Roman;font-size:20.00;">summarizeMACS</text>
+</g>
+<!-- t21->t23 -->
+<g id="edge43" class="edge"><title>t21->t23</title>
+<path style="fill:none;stroke:gray;" d="M9868,-822C9868,-816 9868,-809 9868,-802"/>
+<polygon style="fill:gray;stroke:gray;" points="9871.5,-802 9868,-792 9864.5,-802 9871.5,-802"/>
+</g>
+<!-- t21->t179 -->
+<g id="edge163" class="edge"><title>t21->t179</title>
+<path style="fill:none;stroke:gray;" d="M9819,-832C9698,-812 9396,-753 9396,-664 9396,-664 9396,-664 9396,-572 9396,-511 9374,-443 9361,-406"/>
+<polygon style="fill:gray;stroke:gray;" points="9363.96,-403.985 9357,-396 9357.46,-406.585 9363.96,-403.985"/>
+</g>
+<!-- t25 -->
+<g id="node46" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10198,-682 10010,-682 10006,-678 10006,-646 10194,-646 10198,-650 10198,-682"/>
+<polyline style="fill:none;stroke:green;" points="10194,-678 10006,-678 "/>
+<polyline style="fill:none;stroke:green;" points="10194,-678 10194,-646 "/>
+<polyline style="fill:none;stroke:green;" points="10194,-678 10198,-682 "/>
+<text text-anchor="middle" x="10102" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">exportIntervalsAsBed</text>
+</g>
+<!-- t22->t25 -->
+<g id="edge33" class="edge"><title>t22->t25</title>
+<path style="fill:none;stroke:gray;" d="M10088,-738C10090,-725 10093,-707 10097,-692"/>
+<polygon style="fill:gray;stroke:gray;" points="10100.5,-692.492 10099,-682 10093.6,-691.119 10100.5,-692.492"/>
+</g>
+<!-- t26 -->
+<g id="node48" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10264,-590 10102,-590 10098,-586 10098,-554 10260,-554 10264,-558 10264,-590"/>
+<polyline style="fill:none;stroke:green;" points="10260,-586 10098,-586 "/>
+<polyline style="fill:none;stroke:green;" points="10260,-586 10260,-554 "/>
+<polyline style="fill:none;stroke:green;" points="10260,-586 10264,-590 "/>
+<text text-anchor="middle" x="10181" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">combineReplicates</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge35" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M10118,-646C10130,-632 10146,-613 10159,-598"/>
+<polygon style="fill:gray;stroke:gray;" points="10162,-599.831 10166,-590 10156.8,-595.221 10162,-599.831"/>
+</g>
+<!-- t27 -->
+<g id="node50" class="node"><title>t27</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10206,-480 10040,-480 10036,-476 10036,-444 10202,-444 10206,-448 10206,-480"/>
+<polyline style="fill:none;stroke:green;" points="10202,-476 10036,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10202,-476 10202,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10202,-476 10206,-480 "/>
+<text text-anchor="middle" x="10121" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">combineConditions</text>
+</g>
+<!-- t25->t27 -->
+<g id="edge387" class="edge"><title>t25->t27</title>
+<path style="fill:none;stroke:gray;" d="M10096,-646C10093,-637 10090,-626 10089,-616 10084,-577 10081,-566 10089,-528 10092,-515 10098,-501 10105,-489"/>
+<polygon style="fill:gray;stroke:gray;" points="10108.2,-490.441 10110,-480 10102.1,-487.042 10108.2,-490.441"/>
+</g>
+<!-- t31 -->
+<g id="node131" class="node"><title>t31</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10432,-590 10316,-590 10312,-586 10312,-554 10428,-554 10432,-558 10432,-590"/>
+<polyline style="fill:none;stroke:green;" points="10428,-586 10312,-586 "/>
+<polyline style="fill:none;stroke:green;" points="10428,-586 10428,-554 "/>
+<polyline style="fill:none;stroke:green;" points="10428,-586 10432,-590 "/>
+<text text-anchor="middle" x="10372" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">makeMerged</text>
+</g>
+<!-- t25->t31 -->
+<g id="edge115" class="edge"><title>t25->t31</title>
+<path style="fill:none;stroke:gray;" d="M10174,-646C10205,-638 10241,-627 10273,-616 10291,-609 10309,-602 10325,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="10326.3,-597.26 10334,-590 10323.4,-590.863 10326.3,-597.26"/>
+</g>
+<!-- t25->t60 -->
+<g id="edge467" class="edge"><title>t25->t60</title>
+<path style="fill:none;stroke:gray;" d="M10084,-646C10041,-602 9944,-487 10006,-418 10039,-382 10191,-431 10223,-396 10239,-378 10228,-356 10210,-337"/>
+<polygon style="fill:gray;stroke:gray;" points="10212.5,-334.596 10203,-330 10207.6,-339.546 10212.5,-334.596"/>
+</g>
+<!-- t26->t27 -->
+<g id="edge37" class="edge"><title>t26->t27</title>
+<path style="fill:none;stroke:gray;" d="M10171,-554C10161,-537 10147,-510 10136,-489"/>
+<polygon style="fill:gray;stroke:gray;" points="10138.9,-487.042 10131,-480 10132.8,-490.441 10138.9,-487.042"/>
+</g>
+<!-- t26->t29 -->
+<g id="edge391" class="edge"><title>t26->t29</title>
+<path style="fill:none;stroke:gray;" d="M10098,-556C10071,-546 10043,-530 10027,-506 10006,-473 10001,-446 10027,-418 10063,-379 10219,-424 10263,-396 10279,-385 10272,-374 10283,-360 10295,-345 10309,-330 10322,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="10324.8,-321.219 10330,-312 10320.2,-315.951 10324.8,-321.219"/>
+</g>
+<!-- t26->t62 -->
+<g id="edge473" class="edge"><title>t26->t62</title>
+<path style="fill:none;stroke:gray;" d="M10192,-554C10200,-541 10209,-523 10215,-506 10229,-468 10205,-443 10235,-418 10275,-384 10656,-420 10701,-396 10719,-386 10716,-375 10728,-360 10734,-353 10741,-345 10747,-338"/>
+<polygon style="fill:gray;stroke:gray;" points="10750,-339.831 10754,-330 10744.8,-335.221 10750,-339.831"/>
+</g>
+<!-- t28 -->
+<g id="node52" class="node"><title>t28</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10659,-396 10521,-396 10517,-392 10517,-360 10655,-360 10659,-364 10659,-396"/>
+<polyline style="fill:none;stroke:green;" points="10655,-392 10517,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10655,-392 10655,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10655,-392 10659,-396 "/>
+<text text-anchor="middle" x="10588" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">combineUnstim</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge39" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M10154,-444C10173,-434 10197,-423 10220,-418 10342,-387 10380,-414 10507,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="10507.9,-399.393 10517,-394 10506.5,-392.529 10507.9,-399.393"/>
+</g>
+<!-- t27->t29 -->
+<g id="edge389" class="edge"><title>t27->t29</title>
+<path style="fill:none;stroke:gray;" d="M10160,-444C10178,-435 10200,-426 10220,-418 10248,-407 10260,-414 10283,-396 10285,-394 10315,-350 10334,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="10337.4,-322.262 10340,-312 10331.5,-318.379 10337.4,-322.262"/>
+</g>
+<!-- t27->t62 -->
+<g id="edge471" class="edge"><title>t27->t62</title>
+<path style="fill:none;stroke:gray;" d="M10154,-444C10173,-434 10197,-423 10220,-418 10321,-394 10591,-441 10683,-396 10702,-386 10699,-374 10713,-360 10721,-352 10729,-345 10738,-337"/>
+<polygon style="fill:gray;stroke:gray;" points="10740.4,-339.546 10745,-330 10735.5,-334.596 10740.4,-339.546"/>
+</g>
+<!-- t28->t29 -->
+<g id="edge41" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M10535,-360C10515,-353 10492,-345 10471,-338 10451,-331 10429,-323 10409,-315"/>
+<polygon style="fill:gray;stroke:gray;" points="10410.6,-311.842 10400,-312 10408.4,-318.483 10410.6,-311.842"/>
+</g>
+<!-- t28->t62 -->
+<g id="edge469" class="edge"><title>t28->t62</title>
+<path style="fill:none;stroke:gray;" d="M10631,-360C10649,-352 10671,-343 10693,-334"/>
+<polygon style="fill:gray;stroke:gray;" points="10694.3,-337.26 10702,-330 10691.4,-330.863 10694.3,-337.26"/>
+</g>
+<!-- t30 -->
+<g id="node60" class="node"><title>t30</title>
+<polygon style="fill:none;stroke:blue;" points="10128,-202 10006,-202 10002,-198 10002,-166 10124,-166 10128,-170 10128,-202"/>
+<polyline style="fill:none;stroke:blue;" points="10124,-198 10002,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="10124,-198 10124,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="10124,-198 10128,-202 "/>
+<text text-anchor="middle" x="10065" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">buildIntervals</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge47" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M10305,-276C10255,-257 10176,-227 10121,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="10122.6,-202.863 10112,-202 10119.7,-209.26 10122.6,-202.863"/>
+</g>
+<!-- t24 -->
+<g id="node58" class="node"><title>t24</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9968,-682 9772,-682 9768,-678 9768,-646 9964,-646 9968,-650 9968,-682"/>
+<polyline style="fill:none;stroke:green;" points="9964,-678 9768,-678 "/>
+<polyline style="fill:none;stroke:green;" points="9964,-678 9964,-646 "/>
+<polyline style="fill:none;stroke:green;" points="9964,-678 9968,-682 "/>
+<text text-anchor="middle" x="9868" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">importMACSSummary</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge45" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:gray;" d="M9868,-712C9868,-706 9868,-699 9868,-692"/>
+<polygon style="fill:gray;stroke:gray;" points="9871.5,-692 9868,-682 9864.5,-692 9871.5,-692"/>
+</g>
+<!-- t24->t30 -->
+<g id="edge395" class="edge"><title>t24->t30</title>
+<path style="fill:none;stroke:gray;" d="M9870,-646C9878,-599 9904,-474 9983,-418 10025,-388 10175,-433 10208,-396 10219,-384 10219,-372 10208,-360 10170,-314 10116,-382 10075,-338 10044,-304 10050,-246 10058,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="10061.5,-212.492 10060,-202 10054.6,-211.119 10061.5,-212.492"/>
+</g>
+<!-- t182 -->
+<g id="node63" class="node"><title>t182</title>
+<polygon style="fill:none;stroke:blue;" points="9700,-118 9620,-118 9616,-114 9616,-82 9696,-82 9700,-86 9700,-118"/>
+<polyline style="fill:none;stroke:blue;" points="9696,-114 9616,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="9696,-114 9696,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="9696,-114 9700,-118 "/>
+<text text-anchor="middle" x="9658" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">intervals</text>
+</g>
+<!-- t30->t182 -->
+<g id="edge49" class="edge"><title>t30->t182</title>
+<path style="fill:none;stroke:blue;" d="M10002,-171C9922,-154 9785,-126 9710,-111"/>
+<polygon style="fill:blue;stroke:blue;" points="9710.49,-107.529 9700,-109 9709.12,-114.393 9710.49,-107.529"/>
+</g>
+<!-- t15 -->
+<g id="node62" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9768,-202 9552,-202 9548,-198 9548,-166 9764,-166 9768,-170 9768,-202"/>
+<polyline style="fill:none;stroke:green;" points="9764,-198 9548,-198 "/>
+<polyline style="fill:none;stroke:green;" points="9764,-198 9764,-166 "/>
+<polyline style="fill:none;stroke:green;" points="9764,-198 9768,-202 "/>
+<text text-anchor="middle" x="9658" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importReferenceIntervals</text>
+</g>
+<!-- t15->t182 -->
+<g id="edge397" class="edge"><title>t15->t182</title>
+<path style="fill:none;stroke:gray;" d="M9658,-166C9658,-155 9658,-141 9658,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="9661.5,-128 9658,-118 9654.5,-128 9661.5,-128"/>
+</g>
+<!-- t182->t191 -->
+<g id="edge329" class="edge"><title>t182->t191</title>
+<path style="fill:none;stroke:blue;" d="M9616,-100C9102,-95 4048,-47 3561,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-38.5001 3551,-42 3561,-45.5001 3561,-38.5001"/>
+</g>
+<!-- t73 -->
+<g id="node65" class="node"><title>t73</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1826,-590 1720,-590 1716,-586 1716,-554 1822,-554 1826,-558 1826,-590"/>
+<polyline style="fill:none;stroke:green;" points="1822,-586 1716,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1822,-586 1822,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1822,-586 1826,-590 "/>
+<text text-anchor="middle" x="1771" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">makeMotifs</text>
+</g>
+<!-- t76 -->
+<g id="node69" class="node"><title>t76</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1214,-480 1104,-480 1100,-476 1100,-444 1210,-444 1214,-448 1214,-480"/>
+<polyline style="fill:none;stroke:green;" points="1210,-476 1100,-476 "/>
+<polyline style="fill:none;stroke:green;" points="1210,-476 1210,-444 "/>
+<polyline style="fill:none;stroke:green;" points="1210,-476 1214,-480 "/>
+<text text-anchor="middle" x="1157" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">runTomTom</text>
+</g>
+<!-- t73->t76 -->
+<g id="edge399" class="edge"><title>t73->t76</title>
+<path style="fill:none;stroke:gray;" d="M1744,-554C1728,-544 1707,-533 1687,-528 1518,-481 1465,-538 1292,-506 1266,-501 1237,-492 1213,-483"/>
+<polygon style="fill:gray;stroke:gray;" points="1213.58,-479.521 1203,-480 1211.57,-486.226 1213.58,-479.521"/>
+</g>
+<!-- t185 -->
+<g id="node80" class="node"><title>t185</title>
+<polygon style="fill:none;stroke:blue;" points="744,-312 632,-312 628,-308 628,-276 740,-276 744,-280 744,-312"/>
+<polyline style="fill:none;stroke:blue;" points="740,-308 628,-308 "/>
+<polyline style="fill:none;stroke:blue;" points="740,-308 740,-276 "/>
+<polyline style="fill:none;stroke:blue;" points="740,-308 744,-312 "/>
+<text text-anchor="middle" x="686" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">build_motifs</text>
+</g>
+<!-- t73->t185 -->
+<g id="edge411" class="edge"><title>t73->t185</title>
+<path style="fill:none;stroke:gray;" d="M1744,-554C1728,-544 1707,-533 1687,-528 1554,-490 1509,-546 1376,-506 1301,-483 1299,-440 1223,-418 1138,-392 896,-444 820,-396 805,-386 815,-372 801,-360 784,-343 774,-349 753,-338 742,-332 731,-324 721,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="722.262,-314.635 712,-312 718.379,-320.459 722.262,-314.635"/>
+</g>
+<!-- t79 -->
+<g id="node82" class="node"><title>t79</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1040,-312 946,-312 942,-308 942,-276 1036,-276 1040,-280 1040,-312"/>
+<polyline style="fill:none;stroke:green;" points="1036,-308 942,-308 "/>
+<polyline style="fill:none;stroke:green;" points="1036,-308 1036,-276 "/>
+<polyline style="fill:none;stroke:green;" points="1036,-308 1040,-312 "/>
+<text text-anchor="middle" x="991" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">runMAST</text>
+</g>
+<!-- t73->t79 -->
+<g id="edge417" class="edge"><title>t73->t79</title>
+<path style="fill:none;stroke:gray;" d="M1744,-554C1731,-545 1715,-536 1700,-528 1578,-461 1536,-466 1415,-396 1392,-382 1391,-369 1365,-360 1233,-312 1181,-387 1049,-338 1037,-333 1026,-326 1017,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1019.55,-316.596 1010,-312 1014.6,-321.546 1019.55,-316.596"/>
+</g>
+<!-- t91 -->
+<g id="node86" class="node"><title>t91</title>
+<polygon style="fill:none;stroke:blue;" points="924,-312 766,-312 762,-308 762,-276 920,-276 924,-280 924,-312"/>
+<polyline style="fill:none;stroke:blue;" points="920,-308 762,-308 "/>
+<polyline style="fill:none;stroke:blue;" points="920,-308 920,-276 "/>
+<polyline style="fill:none;stroke:blue;" points="920,-308 924,-312 "/>
+<text text-anchor="middle" x="843" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">runGLAM2SCAN</text>
+</g>
+<!-- t73->t91 -->
+<g id="edge423" class="edge"><title>t73->t91</title>
+<path style="fill:none;stroke:gray;" d="M1749,-554C1735,-544 1718,-533 1700,-528 1597,-493 1553,-556 1456,-506 1400,-476 1398,-450 1365,-396 1357,-381 1365,-369 1350,-360 1313,-333 978,-349 933,-338 915,-333 897,-325 882,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="883.441,-313.797 873,-312 880.042,-319.916 883.441,-313.797"/>
+</g>
+<!-- t58 -->
+<g id="node66" class="node"><title>t58</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1760,-682 1568,-682 1564,-678 1564,-646 1756,-646 1760,-650 1760,-682"/>
+<polyline style="fill:none;stroke:green;" points="1756,-678 1564,-678 "/>
+<polyline style="fill:none;stroke:green;" points="1756,-678 1756,-646 "/>
+<polyline style="fill:none;stroke:green;" points="1756,-678 1760,-682 "/>
+<text text-anchor="middle" x="1662" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">exportMotifSequences</text>
+</g>
+<!-- t74 -->
+<g id="node67" class="node"><title>t74</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1678,-590 1582,-590 1578,-586 1578,-554 1674,-554 1678,-558 1678,-590"/>
+<polyline style="fill:none;stroke:green;" points="1674,-586 1578,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1674,-586 1674,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1674,-586 1678,-590 "/>
+<text text-anchor="middle" x="1628" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">runMEME</text>
+</g>
+<!-- t58->t74 -->
+<g id="edge51" class="edge"><title>t58->t74</title>
+<path style="fill:none;stroke:gray;" d="M1655,-646C1650,-633 1643,-615 1638,-600"/>
+<polygon style="fill:gray;stroke:gray;" points="1641.23,-598.573 1635,-590 1634.52,-600.584 1641.23,-598.573"/>
+</g>
+<!-- t75 -->
+<g id="node75" class="node"><title>t75</title>
+<polygon style="fill:#90ee90;stroke:green;" points="939,-396 833,-396 829,-392 829,-360 935,-360 939,-364 939,-396"/>
+<polyline style="fill:none;stroke:green;" points="935,-392 829,-392 "/>
+<polyline style="fill:none;stroke:green;" points="935,-392 935,-360 "/>
+<polyline style="fill:none;stroke:green;" points="935,-392 939,-396 "/>
+<text text-anchor="middle" x="884" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">runGLAM2</text>
+</g>
+<!-- t58->t75 -->
+<g id="edge59" class="edge"><title>t58->t75</title>
+<path style="fill:none;stroke:gray;" d="M1674,-646C1691,-617 1717,-560 1687,-528 1662,-501 1391,-517 1355,-506 1289,-483 1290,-441 1223,-418 1111,-378 1071,-415 949,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="949.492,-392.529 939,-394 948.119,-399.393 949.492,-392.529"/>
+</g>
+<!-- t80 -->
+<g id="node78" class="node"><title>t80</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1540,-590 1388,-590 1384,-586 1384,-554 1536,-554 1540,-558 1540,-590"/>
+<polyline style="fill:none;stroke:green;" points="1536,-586 1384,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1536,-586 1536,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1536,-586 1540,-590 "/>
+<text text-anchor="middle" x="1462" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">runBioProspector</text>
+</g>
+<!-- t58->t80 -->
+<g id="edge61" class="edge"><title>t58->t80</title>
+<path style="fill:none;stroke:gray;" d="M1621,-646C1601,-637 1576,-626 1554,-616 1539,-609 1523,-602 1509,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="1510.56,-590.863 1500,-590 1507.72,-597.26 1510.56,-590.863"/>
+</g>
+<!-- t58->t185 -->
+<g id="edge415" class="edge"><title>t58->t185</title>
+<path style="fill:none;stroke:gray;" d="M1609,-646C1594,-639 1579,-629 1569,-616 1545,-584 1580,-553 1549,-528 1508,-493 1107,-540 1066,-506 1036,-480 1079,-444 1048,-418 1012,-386 645,-431 611,-396 588,-371 619,-339 648,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="650.621,-320.459 657,-312 646.738,-314.635 650.621,-320.459"/>
+</g>
+<!-- t58->t79 -->
+<g id="edge421" class="edge"><title>t58->t79</title>
+<path style="fill:none;stroke:gray;" d="M1760,-650C1800,-642 1838,-631 1850,-616 1874,-585 1874,-559 1850,-528 1828,-498 1805,-516 1769,-506 1620,-461 1565,-483 1435,-396 1418,-384 1424,-369 1405,-360 1335,-324 1124,-365 1049,-338 1037,-333 1026,-326 1017,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1019.55,-316.596 1010,-312 1014.6,-321.546 1019.55,-316.596"/>
+</g>
+<!-- t58->t91 -->
+<g id="edge427" class="edge"><title>t58->t91</title>
+<path style="fill:none;stroke:gray;" d="M1760,-647C1794,-639 1825,-629 1835,-616 1847,-600 1849,-542 1835,-528 1808,-498 1507,-527 1473,-506 1436,-482 1453,-453 1426,-418 1421,-410 1373,-363 1364,-360 1276,-322 1026,-361 933,-338 915,-333 897,-325 882,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="883.441,-313.797 873,-312 880.042,-319.916 883.441,-313.797"/>
+</g>
+<!-- t74->t76 -->
+<g id="edge53" class="edge"><title>t74->t76</title>
+<path style="fill:none;stroke:gray;" d="M1608,-554C1596,-544 1580,-533 1564,-528 1442,-484 1399,-534 1272,-506 1250,-501 1226,-492 1205,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="1206.56,-480.863 1196,-480 1203.72,-487.26 1206.56,-480.863"/>
+</g>
+<!-- t74->t185 -->
+<g id="edge409" class="edge"><title>t74->t185</title>
+<path style="fill:none;stroke:gray;" d="M1608,-554C1596,-544 1580,-533 1564,-528 1515,-511 1133,-538 1091,-506 1060,-480 1098,-443 1066,-418 1028,-387 657,-431 622,-396 600,-372 628,-341 653,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="655.779,-321.219 661,-312 651.169,-315.951 655.779,-321.219"/>
+</g>
+<!-- t77 -->
+<g id="node71" class="node"><title>t77</title>
+<polygon style="fill:#90ee90;stroke:green;" points="771,-396 635,-396 631,-392 631,-360 767,-360 771,-364 771,-396"/>
+<polyline style="fill:none;stroke:green;" points="767,-392 631,-392 "/>
+<polyline style="fill:none;stroke:green;" points="767,-392 767,-360 "/>
+<polyline style="fill:none;stroke:green;" points="767,-392 771,-396 "/>
+<text text-anchor="middle" x="701" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importTomTom</text>
+</g>
+<!-- t76->t77 -->
+<g id="edge55" class="edge"><title>t76->t77</title>
+<path style="fill:none;stroke:gray;" d="M1135,-444C1121,-434 1104,-423 1086,-418 959,-376 918,-414 785,-396 784,-396 782,-395 781,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="781.299,-391.512 771,-394 780.602,-398.478 781.299,-391.512"/>
+</g>
+<!-- t78 -->
+<g id="node73" class="node"><title>t78</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1065,-396 961,-396 957,-392 957,-360 1061,-360 1065,-364 1065,-396"/>
+<polyline style="fill:none;stroke:green;" points="1061,-392 957,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1061,-392 1061,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1061,-392 1065,-396 "/>
+<text text-anchor="middle" x="1011" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">filterMotifs</text>
+</g>
+<!-- t76->t78 -->
+<g id="edge57" class="edge"><title>t76->t78</title>
+<path style="fill:none;stroke:gray;" d="M1129,-444C1116,-436 1101,-426 1086,-418 1076,-413 1066,-407 1056,-401"/>
+<polygon style="fill:gray;stroke:gray;" points="1057.44,-397.797 1047,-396 1054.04,-403.916 1057.44,-397.797"/>
+</g>
+<!-- t76->t185 -->
+<g id="edge405" class="edge"><title>t76->t185</title>
+<path style="fill:none;stroke:gray;" d="M1135,-444C1121,-434 1104,-423 1086,-418 1027,-398 860,-427 805,-396 789,-386 794,-373 780,-360 770,-350 745,-333 723,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="725.1,-315.2 715,-312 720.9,-320.8 725.1,-315.2"/>
+</g>
+<!-- t77->t185 -->
+<g id="edge403" class="edge"><title>t77->t185</title>
+<path style="fill:none;stroke:gray;" d="M698,-360C696,-349 693,-334 691,-322"/>
+<polygon style="fill:gray;stroke:gray;" points="694.393,-321.119 689,-312 687.529,-322.492 694.393,-321.119"/>
+</g>
+<!-- t78->t185 -->
+<g id="edge401" class="edge"><title>t78->t185</title>
+<path style="fill:none;stroke:gray;" d="M957,-362C954,-361 951,-361 948,-360 863,-343 836,-367 753,-338 740,-333 727,-325 716,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="718.1,-315.2 708,-312 713.9,-320.8 718.1,-315.2"/>
+</g>
+<!-- t78->t79 -->
+<g id="edge65" class="edge"><title>t78->t79</title>
+<path style="fill:none;stroke:gray;" d="M1007,-360C1005,-349 1001,-334 998,-322"/>
+<polygon style="fill:gray;stroke:gray;" points="1001.23,-320.573 995,-312 994.521,-322.584 1001.23,-320.573"/>
+</g>
+<!-- t75->t185 -->
+<g id="edge407" class="edge"><title>t75->t185</title>
+<path style="fill:none;stroke:gray;" d="M829,-363C800,-354 767,-344 753,-338 741,-333 730,-325 719,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="721.1,-315.2 711,-312 716.9,-320.8 721.1,-315.2"/>
+</g>
+<!-- t75->t91 -->
+<g id="edge69" class="edge"><title>t75->t91</title>
+<path style="fill:none;stroke:gray;" d="M875,-360C869,-349 863,-334 856,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="859.26,-319.717 852,-312 852.863,-322.56 859.26,-319.717"/>
+</g>
+<!-- t59 -->
+<g id="node77" class="node"><title>t59</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1341,-396 1087,-396 1083,-392 1083,-360 1337,-360 1341,-364 1341,-396"/>
+<polyline style="fill:none;stroke:green;" points="1337,-392 1083,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1337,-392 1337,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1337,-392 1341,-396 "/>
+<text text-anchor="middle" x="1212" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">exportMotifControlSequences</text>
+</g>
+<!-- t59->t185 -->
+<g id="edge413" class="edge"><title>t59->t185</title>
+<path style="fill:none;stroke:gray;" d="M1083,-361C1080,-361 1077,-360 1074,-360 1003,-352 821,-360 753,-338 740,-333 727,-326 716,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="718.1,-315.2 708,-312 713.9,-320.8 718.1,-315.2"/>
+</g>
+<!-- t59->t79 -->
+<g id="edge419" class="edge"><title>t59->t79</title>
+<path style="fill:none;stroke:gray;" d="M1111,-360C1090,-354 1068,-347 1049,-338 1038,-333 1028,-326 1019,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1020.83,-315.951 1011,-312 1016.22,-321.219 1020.83,-315.951"/>
+</g>
+<!-- t59->t91 -->
+<g id="edge425" class="edge"><title>t59->t91</title>
+<path style="fill:none;stroke:gray;" d="M1083,-360C1014,-350 941,-340 933,-338 916,-332 899,-324 884,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="885.441,-313.797 875,-312 882.042,-319.916 885.441,-313.797"/>
+</g>
+<!-- t80->t185 -->
+<g id="edge63" class="edge"><title>t80->t185</title>
+<path style="fill:none;stroke:gray;" d="M1432,-554C1415,-544 1392,-533 1370,-528 1303,-510 1112,-548 1056,-506 1024,-480 1059,-442 1026,-418 988,-388 621,-431 587,-396 576,-384 580,-374 587,-360 591,-354 621,-334 647,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="648.958,-319.916 656,-312 645.559,-313.797 648.958,-319.916"/>
+</g>
+<!-- t185->t191 -->
+<g id="edge325" class="edge"><title>t185->t191</title>
+<path style="fill:none;stroke:blue;" d="M707,-276C720,-266 737,-255 753,-250 808,-231 969,-265 1012,-228 1043,-201 1004,-168 1032,-140 1109,-62 1163,-95 1271,-82 1500,-52 3210,-43 3487,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-45.5001 3497,-42 3487,-38.5001 3487,-45.5001"/>
+</g>
+<!-- t90 -->
+<g id="node84" class="node"><title>t90</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1396,-202 1276,-202 1272,-198 1272,-166 1392,-166 1396,-170 1396,-202"/>
+<polyline style="fill:none;stroke:green;" points="1392,-198 1272,-198 "/>
+<polyline style="fill:none;stroke:green;" points="1392,-198 1392,-166 "/>
+<polyline style="fill:none;stroke:green;" points="1392,-198 1396,-202 "/>
+<text text-anchor="middle" x="1334" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importMAST</text>
+</g>
+<!-- t79->t90 -->
+<g id="edge67" class="edge"><title>t79->t90</title>
+<path style="fill:none;stroke:gray;" d="M1009,-276C1020,-266 1034,-255 1049,-250 1138,-214 1173,-258 1263,-228 1277,-223 1291,-216 1303,-208"/>
+<polygon style="fill:gray;stroke:gray;" points="1305.1,-210.8 1311,-202 1300.9,-205.2 1305.1,-210.8"/>
+</g>
+<!-- t186 -->
+<g id="node90" class="node"><title>t186</title>
+<polygon style="fill:none;stroke:blue;" points="1388,-118 1284,-118 1280,-114 1280,-82 1384,-82 1388,-86 1388,-118"/>
+<polyline style="fill:none;stroke:blue;" points="1384,-114 1280,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="1384,-114 1384,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="1384,-114 1388,-118 "/>
+<text text-anchor="middle" x="1334" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">find_motifs</text>
+</g>
+<!-- t79->t186 -->
+<g id="edge431" class="edge"><title>t79->t186</title>
+<path style="fill:none;stroke:gray;" d="M1009,-276C1020,-266 1034,-255 1049,-250 1087,-236 1378,-256 1405,-228 1432,-199 1425,-173 1405,-140 1402,-134 1397,-129 1391,-124"/>
+<polygon style="fill:gray;stroke:gray;" points="1393.1,-121.2 1383,-118 1388.9,-126.8 1393.1,-121.2"/>
+</g>
+<!-- t90->t186 -->
+<g id="edge429" class="edge"><title>t90->t186</title>
+<path style="fill:none;stroke:gray;" d="M1334,-166C1334,-155 1334,-141 1334,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="1337.5,-128 1334,-118 1330.5,-128 1337.5,-128"/>
+</g>
+<!-- t92 -->
+<g id="node88" class="node"><title>t92</title>
+<polygon style="fill:none;stroke:blue;" points="1254,-202 1070,-202 1066,-198 1066,-166 1250,-166 1254,-170 1254,-202"/>
+<polyline style="fill:none;stroke:blue;" points="1250,-198 1066,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="1250,-198 1250,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="1250,-198 1254,-202 "/>
+<text text-anchor="middle" x="1160" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importGLAM2SCAN</text>
+</g>
+<!-- t91->t92 -->
+<g id="edge71" class="edge"><title>t91->t92</title>
+<path style="fill:none;stroke:blue;" d="M874,-276C891,-267 913,-256 933,-250 985,-233 1001,-243 1052,-228 1072,-222 1093,-214 1112,-206"/>
+<polygon style="fill:blue;stroke:blue;" points="1113.28,-209.26 1121,-202 1110.44,-202.863 1113.28,-209.26"/>
+</g>
+<!-- t91->t186 -->
+<g id="edge75" class="edge"><title>t91->t186</title>
+<path style="fill:none;stroke:blue;" d="M875,-276C892,-267 913,-257 933,-250 976,-235 1000,-258 1032,-228 1062,-200 1027,-166 1057,-140 1073,-126 1193,-112 1270,-106"/>
+<polygon style="fill:blue;stroke:blue;" points="1270.4,-109.478 1280,-105 1269.7,-102.512 1270.4,-109.478"/>
+</g>
+<!-- t92->t186 -->
+<g id="edge73" class="edge"><title>t92->t186</title>
+<path style="fill:none;stroke:blue;" d="M1197,-166C1223,-153 1259,-136 1287,-122"/>
+<polygon style="fill:blue;stroke:blue;" points="1288.28,-125.26 1296,-118 1285.44,-118.863 1288.28,-125.26"/>
+</g>
+<!-- t186->t191 -->
+<g id="edge323" class="edge"><title>t186->t191</title>
+<path style="fill:none;stroke:blue;" d="M1388,-90C1407,-87 1429,-84 1449,-82 1659,-63 3223,-45 3487,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-45.5001 3497,-42 3487,-38.5001 3487,-45.5001"/>
+</g>
+<!-- t44 -->
+<g id="node93" class="node"><title>t44</title>
+<polygon style="fill:#90ee90;stroke:green;" points="693,-924 451,-924 447,-920 447,-888 689,-888 693,-892 693,-924"/>
+<polyline style="fill:none;stroke:green;" points="689,-920 447,-920 "/>
+<polyline style="fill:none;stroke:green;" points="689,-920 689,-888 "/>
+<polyline style="fill:none;stroke:green;" points="689,-920 693,-924 "/>
+<text text-anchor="middle" x="570" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">importAffymetrixAnnotation</text>
+</g>
+<!-- t45 -->
+<g id="node94" class="node"><title>t45</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1105,-858 893,-858 889,-854 889,-822 1101,-822 1105,-826 1105,-858"/>
+<polyline style="fill:none;stroke:green;" points="1101,-854 889,-854 "/>
+<polyline style="fill:none;stroke:green;" points="1101,-854 1101,-822 "/>
+<polyline style="fill:none;stroke:green;" points="1101,-854 1105,-858 "/>
+<text text-anchor="middle" x="997" y="-833.5" style="font-family:Times New Roman;font-size:20.00;">buildProbeset2Transcript</text>
+</g>
+<!-- t44->t45 -->
+<g id="edge77" class="edge"><title>t44->t45</title>
+<path style="fill:none;stroke:gray;" d="M687,-888C747,-879 819,-867 879,-858"/>
+<polygon style="fill:gray;stroke:gray;" points="879.398,-861.478 889,-857 878.701,-854.512 879.398,-861.478"/>
+</g>
+<!-- t184 -->
+<g id="node128" class="node"><title>t184</title>
+<polygon style="fill:none;stroke:blue;" points="791,-118 695,-118 691,-114 691,-82 787,-82 791,-86 791,-118"/>
+<polyline style="fill:none;stroke:blue;" points="787,-114 691,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="787,-114 787,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="787,-114 791,-118 "/>
+<text text-anchor="middle" x="741" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">expression</text>
+</g>
+<!-- t44->t184 -->
+<g id="edge461" class="edge"><title>t44->t184</title>
+<path style="fill:none;stroke:gray;" d="M447,-897C285,-882 21,-843 21,-756 21,-756 21,-756 21,-294 21,-225 -21,-188 26,-140 49,-116 512,-104 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t46 -->
+<g id="node96" class="node"><title>t46</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1111,-774 887,-774 883,-770 883,-738 1107,-738 1111,-742 1111,-774"/>
+<polyline style="fill:none;stroke:green;" points="1107,-770 883,-770 "/>
+<polyline style="fill:none;stroke:green;" points="1107,-770 1107,-738 "/>
+<polyline style="fill:none;stroke:green;" points="1107,-770 1111,-774 "/>
+<text text-anchor="middle" x="997" y="-749.5" style="font-family:Times New Roman;font-size:20.00;">importProbeset2Transcript</text>
+</g>
+<!-- t45->t46 -->
+<g id="edge79" class="edge"><title>t45->t46</title>
+<path style="fill:none;stroke:gray;" d="M997,-822C997,-811 997,-797 997,-784"/>
+<polygon style="fill:gray;stroke:gray;" points="1000.5,-784 997,-774 993.5,-784 1000.5,-784"/>
+</g>
+<!-- t45->t184 -->
+<g id="edge459" class="edge"><title>t45->t184</title>
+<path style="fill:none;stroke:gray;" d="M1065,-822C1084,-816 1103,-809 1120,-800 1238,-740 1299,-736 1352,-616 1368,-580 1379,-556 1352,-528 1303,-474 1079,-556 1026,-506 998,-478 1045,-444 1016,-418 981,-384 611,-431 577,-396 566,-384 572,-375 577,-360 582,-348 590,-349 595,-338 614,-301 588,-275 619,-250 681,-199 922,-285 976,-228 1003,-199 1002,-169 976,-140 965,-127 867,-113 801,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-102.512 791,-105 800.602,-109.478 801.299,-102.512"/>
+</g>
+<!-- t49 -->
+<g id="node98" class="node"><title>t49</title>
+<polygon style="fill:#90ee90;stroke:green;" points="903.007,-670.774 763,-685.92 622.993,-670.774 623.124,-646.266 902.876,-646.266 903.007,-670.774"/>
+<polygon style="fill:none;stroke:green;" points="907.026,-674.362 763,-689.944 618.974,-674.362 619.145,-642.266 906.855,-642.266 907.026,-674.362"/>
+<text text-anchor="middle" x="763" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionTracks</text>
+</g>
+<!-- t46->t49 -->
+<g id="edge81" class="edge"><title>t46->t49</title>
+<path style="fill:none;stroke:gray;" d="M951,-738C915,-723 864,-703 824,-688"/>
+<polygon style="fill:gray;stroke:gray;" points="825.56,-684.863 815,-684 822.717,-691.26 825.56,-684.863"/>
+</g>
+<!-- t46->t184 -->
+<g id="edge457" class="edge"><title>t46->t184</title>
+<path style="fill:none;stroke:gray;" d="M1071,-738C1196,-704 1427,-626 1335,-528 1287,-476 1072,-549 1016,-506 984,-480 1021,-443 988,-418 950,-388 581,-431 547,-396 536,-384 541,-374 547,-360 554,-345 566,-350 575,-338 599,-304 572,-275 604,-250 667,-201 906,-285 960,-228 987,-199 986,-170 960,-140 940,-116 859,-106 801,-103"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-99.5125 791,-102 800.602,-106.478 801.299,-99.5125"/>
+</g>
+<!-- t50 -->
+<g id="node100" class="node"><title>t50</title>
+<polygon style="fill:#90ee90;stroke:green;" points="868,-590 662,-590 658,-586 658,-554 864,-554 868,-558 868,-590"/>
+<polyline style="fill:none;stroke:green;" points="864,-586 658,-586 "/>
+<polyline style="fill:none;stroke:green;" points="864,-586 864,-554 "/>
+<polyline style="fill:none;stroke:green;" points="864,-586 868,-590 "/>
+<text text-anchor="middle" x="763" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionTracks</text>
+</g>
+<!-- t49->t50 -->
+<g id="edge83" class="edge"><title>t49->t50</title>
+<path style="fill:none;stroke:gray;" d="M763,-642C763,-629 763,-614 763,-600"/>
+<polygon style="fill:gray;stroke:gray;" points="766.5,-600 763,-590 759.5,-600 766.5,-600"/>
+</g>
+<!-- t54 -->
+<g id="node102" class="node"><title>t54</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1116,-590 890,-590 886,-586 886,-554 1112,-554 1116,-558 1116,-590"/>
+<polyline style="fill:none;stroke:green;" points="1112,-586 886,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1112,-586 1112,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1112,-586 1116,-590 "/>
+<text text-anchor="middle" x="1001" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionProbesets</text>
+</g>
+<!-- t49->t54 -->
+<g id="edge85" class="edge"><title>t49->t54</title>
+<path style="fill:none;stroke:gray;" d="M819,-642C857,-627 907,-608 944,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="946.015,-596.964 954,-590 943.415,-590.464 946.015,-596.964"/>
+</g>
+<!-- t55 -->
+<g id="node104" class="node"><title>t55</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1326,-590 1138,-590 1134,-586 1134,-554 1322,-554 1326,-558 1326,-590"/>
+<polyline style="fill:none;stroke:green;" points="1322,-586 1134,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1322,-586 1322,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1322,-586 1326,-590 "/>
+<text text-anchor="middle" x="1230" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionMap</text>
+</g>
+<!-- t49->t55 -->
+<g id="edge87" class="edge"><title>t49->t55</title>
+<path style="fill:none;stroke:gray;" d="M907,-654C974,-647 1055,-635 1125,-616 1145,-610 1166,-602 1184,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="1185.28,-597.26 1193,-590 1182.44,-590.863 1185.28,-597.26"/>
+</g>
+<!-- t51 -->
+<g id="node106" class="node"><title>t51</title>
+<polygon style="fill:#90ee90;stroke:green;" points="44.2538,-559.794 172,-532.5 299.746,-559.794 299.627,-603.956 44.373,-603.956 44.2538,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="40.2451,-556.56 172,-528.41 303.755,-556.56 303.616,-607.956 40.3838,-607.956 40.2451,-556.56"/>
+<text text-anchor="middle" x="172" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionCorrelation</text>
+</g>
+<!-- t49->t51 -->
+<g id="edge89" class="edge"><title>t49->t51</title>
+<path style="fill:none;stroke:gray;" d="M619,-656C530,-649 414,-637 313,-616 305,-615 298,-613 291,-611"/>
+<polygon style="fill:gray;stroke:gray;" points="291.584,-607.521 281,-608 289.573,-614.226 291.584,-607.521"/>
+</g>
+<!-- t52 -->
+<g id="node108" class="node"><title>t52</title>
+<polygon style="fill:#90ee90;stroke:green;" points="325.802,-559.794 471,-532.5 616.198,-559.794 616.062,-603.956 325.938,-603.956 325.802,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="321.805,-556.476 471,-528.43 620.195,-556.476 620.037,-607.956 321.963,-607.956 321.805,-556.476"/>
+<text text-anchor="middle" x="471" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionFullCorrelation</text>
+</g>
+<!-- t49->t52 -->
+<g id="edge91" class="edge"><title>t49->t52</title>
+<path style="fill:none;stroke:gray;" d="M694,-642C664,-633 629,-622 595,-611"/>
+<polygon style="fill:gray;stroke:gray;" points="595.584,-607.521 585,-608 593.573,-614.226 595.584,-607.521"/>
+</g>
+<!-- t49->t184 -->
+<g id="edge455" class="edge"><title>t49->t184</title>
+<path style="fill:none;stroke:gray;" d="M684,-642C671,-636 658,-627 649,-616 624,-585 660,-554 629,-528 580,-485 385,-548 335,-506 305,-479 344,-445 315,-418 277,-380 247,-414 196,-396 146,-378 119,-383 91,-338 80,-319 76,-156 91,-140 112,-118 523,-106 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t102 -->
+<g id="node112" class="node"><title>t102</title>
+<polygon style="fill:#90ee90;stroke:green;" points="922,-480 648,-480 644,-476 644,-444 918,-444 922,-448 922,-480"/>
+<polyline style="fill:none;stroke:green;" points="918,-476 644,-476 "/>
+<polyline style="fill:none;stroke:green;" points="918,-476 918,-444 "/>
+<polyline style="fill:none;stroke:green;" points="918,-476 922,-480 "/>
+<text text-anchor="middle" x="783" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionDifferencesSAM</text>
+</g>
+<!-- t50->t102 -->
+<g id="edge95" class="edge"><title>t50->t102</title>
+<path style="fill:none;stroke:gray;" d="M766,-554C769,-537 774,-511 778,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="781.471,-490.492 780,-480 774.607,-489.119 781.471,-490.492"/>
+</g>
+<!-- t101 -->
+<g id="node114" class="node"><title>t101</title>
+<polygon style="fill:#90ee90;stroke:green;" points="626,-480 348,-480 344,-476 344,-444 622,-444 626,-448 626,-480"/>
+<polyline style="fill:none;stroke:green;" points="622,-476 344,-476 "/>
+<polyline style="fill:none;stroke:green;" points="622,-476 622,-444 "/>
+<polyline style="fill:none;stroke:green;" points="622,-476 626,-480 "/>
+<text text-anchor="middle" x="485" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionDifferencesTTest</text>
+</g>
+<!-- t50->t101 -->
+<g id="edge97" class="edge"><title>t50->t101</title>
+<path style="fill:none;stroke:gray;" d="M717,-554C669,-535 592,-505 540,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="541.56,-480.863 531,-480 538.717,-487.26 541.56,-480.863"/>
+</g>
+<!-- t50->t184 -->
+<g id="edge453" class="edge"><title>t50->t184</title>
+<path style="fill:none;stroke:gray;" d="M810,-554C859,-535 930,-507 931,-506 954,-473 958,-446 931,-418 897,-382 522,-431 487,-396 476,-384 480,-374 487,-360 495,-345 507,-351 516,-338 540,-304 512,-275 544,-250 609,-199 856,-287 912,-228 939,-199 937,-170 912,-140 898,-122 845,-112 801,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-102.512 791,-105 800.602,-109.478 801.299,-102.512"/>
+</g>
+<!-- t54->t184 -->
+<g id="edge447" class="edge"><title>t54->t184</title>
+<path style="fill:none;stroke:gray;" d="M994,-554C989,-541 982,-522 976,-506 962,-467 979,-442 945,-418 906,-388 541,-431 507,-396 496,-384 501,-374 507,-360 514,-345 526,-350 535,-338 559,-304 532,-275 564,-250 629,-200 873,-287 928,-228 955,-199 953,-170 928,-140 912,-119 850,-109 801,-104"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-100.512 791,-103 800.602,-107.478 801.299,-100.512"/>
+</g>
+<!-- t55->t184 -->
+<g id="edge445" class="edge"><title>t55->t184</title>
+<path style="fill:none;stroke:gray;" d="M1193,-554C1173,-545 1148,-535 1125,-528 1070,-511 1041,-543 996,-506 965,-479 1000,-442 967,-418 929,-388 561,-431 527,-396 516,-384 521,-374 527,-360 534,-345 546,-350 555,-338 579,-304 552,-275 584,-250 648,-201 890,-286 944,-228 971,-199 969,-170 944,-140 926,-118 854,-108 801,-104"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-100.512 791,-103 800.602,-107.478 801.299,-100.512"/>
+</g>
+<!-- t53 -->
+<g id="node110" class="node"><title>t53</title>
+<polygon style="fill:#90ee90;stroke:green;" points="306,-480 64,-480 60,-476 60,-444 302,-444 306,-448 306,-480"/>
+<polyline style="fill:none;stroke:green;" points="302,-476 60,-476 "/>
+<polyline style="fill:none;stroke:green;" points="302,-476 302,-444 "/>
+<polyline style="fill:none;stroke:green;" points="302,-476 306,-480 "/>
+<text text-anchor="middle" x="183" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionCorrelation</text>
+</g>
+<!-- t51->t53 -->
+<g id="edge433" class="edge"><title>t51->t53</title>
+<path style="fill:none;stroke:gray;" d="M176,-529C178,-516 179,-502 180,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="183.488,-490.299 181,-480 176.522,-489.602 183.488,-490.299"/>
+</g>
+<!-- t51->t184 -->
+<g id="edge451" class="edge"><title>t51->t184</title>
+<path style="fill:none;stroke:gray;" d="M95,-545C78,-535 62,-522 51,-506 20,-458 41,-435 41,-378 41,-378 41,-378 41,-294 41,-225 6,-187 55,-140 78,-118 517,-106 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t52->t53 -->
+<g id="edge93" class="edge"><title>t52->t53</title>
+<path style="fill:none;stroke:gray;" d="M394,-543C346,-524 284,-501 240,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="240.585,-480.464 230,-480 237.985,-486.964 240.585,-480.464"/>
+</g>
+<!-- t53->t184 -->
+<g id="edge449" class="edge"><title>t53->t184</title>
+<path style="fill:none;stroke:gray;" d="M160,-444C134,-422 93,-383 76,-338 61,-296 46,-172 76,-140 98,-118 521,-106 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t103 -->
+<g id="node116" class="node"><title>t103</title>
+<polygon style="fill:#90ee90;stroke:green;" points="453,-396 209,-396 205,-392 205,-360 449,-360 453,-364 453,-396"/>
+<polyline style="fill:none;stroke:green;" points="449,-392 205,-392 "/>
+<polyline style="fill:none;stroke:green;" points="449,-392 449,-360 "/>
+<polyline style="fill:none;stroke:green;" points="449,-392 453,-396 "/>
+<text text-anchor="middle" x="329" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionDifferences</text>
+</g>
+<!-- t102->t103 -->
+<g id="edge99" class="edge"><title>t102->t103</title>
+<path style="fill:none;stroke:gray;" d="M730,-444C703,-435 667,-425 635,-418 634,-417 545,-406 463,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="463.299,-391.512 453,-394 462.602,-398.478 463.299,-391.512"/>
+</g>
+<!-- t101->t103 -->
+<g id="edge435" class="edge"><title>t101->t103</title>
+<path style="fill:none;stroke:gray;" d="M452,-444C429,-432 397,-414 372,-401"/>
+<polygon style="fill:gray;stroke:gray;" points="373.441,-397.797 363,-396 370.042,-403.916 373.441,-397.797"/>
+</g>
+<!-- t104 -->
+<g id="node118" class="node"><title>t104</title>
+<polygon style="fill:#90ee90;stroke:green;" points="406,-202 104,-202 100,-198 100,-166 402,-166 406,-170 406,-202"/>
+<polyline style="fill:none;stroke:green;" points="402,-198 100,-198 "/>
+<polyline style="fill:none;stroke:green;" points="402,-198 402,-166 "/>
+<polyline style="fill:none;stroke:green;" points="402,-198 406,-202 "/>
+<text text-anchor="middle" x="253" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">exportDifferentiallyExpressedGenes</text>
+</g>
+<!-- t103->t104 -->
+<g id="edge101" class="edge"><title>t103->t104</title>
+<path style="fill:none;stroke:gray;" d="M205,-369C163,-363 124,-353 111,-338 86,-307 90,-282 111,-250 124,-230 145,-216 166,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="167.283,-209.26 175,-202 164.44,-202.863 167.283,-209.26"/>
+</g>
+<!-- t172 -->
+<g id="node120" class="node"><title>t172</title>
+<polygon style="fill:#90ee90;stroke:green;" points="314,-312 124,-312 120,-308 120,-276 310,-276 314,-280 314,-312"/>
+<polyline style="fill:none;stroke:green;" points="310,-308 120,-308 "/>
+<polyline style="fill:none;stroke:green;" points="310,-308 310,-276 "/>
+<polyline style="fill:none;stroke:green;" points="310,-308 314,-312 "/>
+<text text-anchor="middle" x="217" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">runExpressionGOSlim</text>
+</g>
+<!-- t103->t172 -->
+<g id="edge103" class="edge"><title>t103->t172</title>
+<path style="fill:none;stroke:gray;" d="M305,-360C289,-348 267,-332 249,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="251.1,-315.2 241,-312 246.9,-320.8 251.1,-315.2"/>
+</g>
+<!-- t173 -->
+<g id="node122" class="node"><title>t173</title>
+<polygon style="fill:#90ee90;stroke:green;" points="490,-312 336,-312 332,-308 332,-276 486,-276 490,-280 490,-312"/>
+<polyline style="fill:none;stroke:green;" points="486,-308 332,-308 "/>
+<polyline style="fill:none;stroke:green;" points="486,-308 486,-276 "/>
+<polyline style="fill:none;stroke:green;" points="486,-308 490,-312 "/>
+<text text-anchor="middle" x="411" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">runExpressionGO</text>
+</g>
+<!-- t103->t173 -->
+<g id="edge105" class="edge"><title>t103->t173</title>
+<path style="fill:none;stroke:gray;" d="M347,-360C359,-348 374,-332 386,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="388.404,-321.546 393,-312 383.454,-316.596 388.404,-321.546"/>
+</g>
+<!-- t103->t184 -->
+<g id="edge443" class="edge"><title>t103->t184</title>
+<path style="fill:none;stroke:gray;" d="M442,-360C468,-354 490,-347 499,-338 529,-309 493,-275 524,-250 589,-198 841,-288 897,-228 924,-199 921,-170 897,-140 886,-124 840,-114 801,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="801.492,-104.529 791,-106 800.119,-111.393 801.492,-104.529"/>
+</g>
+<!-- t104->t184 -->
+<g id="edge441" class="edge"><title>t104->t184</title>
+<path style="fill:none;stroke:gray;" d="M313,-166C345,-157 384,-147 420,-140 510,-122 617,-111 681,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="681.398,-108.478 691,-104 680.701,-101.512 681.398,-108.478"/>
+</g>
+<!-- t175 -->
+<g id="node124" class="node"><title>t175</title>
+<polygon style="fill:none;stroke:blue;" points="666,-202 448,-202 444,-198 444,-166 662,-166 666,-170 666,-202"/>
+<polyline style="fill:none;stroke:blue;" points="662,-198 444,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="662,-198 662,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="662,-198 666,-202 "/>
+<text text-anchor="middle" x="555" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importExpressionGOSlim</text>
+</g>
+<!-- t172->t175 -->
+<g id="edge107" class="edge"><title>t172->t175</title>
+<path style="fill:none;stroke:gray;" d="M255,-276C275,-267 300,-257 323,-250 370,-235 384,-240 430,-228 453,-222 477,-214 499,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="500.283,-209.26 508,-202 497.44,-202.863 500.283,-209.26"/>
+</g>
+<!-- t172->t184 -->
+<g id="edge439" class="edge"><title>t172->t184</title>
+<path style="fill:none;stroke:gray;" d="M256,-276C276,-267 300,-257 323,-250 363,-237 386,-257 415,-228 444,-199 406,-167 435,-140 453,-123 597,-110 681,-104"/>
+<polygon style="fill:gray;stroke:gray;" points="681.398,-107.478 691,-103 680.701,-100.512 681.398,-107.478"/>
+</g>
+<!-- t174 -->
+<g id="node126" class="node"><title>t174</title>
+<polygon style="fill:none;stroke:blue;" points="888,-202 708,-202 704,-198 704,-166 884,-166 888,-170 888,-202"/>
+<polyline style="fill:none;stroke:blue;" points="884,-198 704,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="884,-198 884,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="884,-198 888,-202 "/>
+<text text-anchor="middle" x="796" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importExpressionGO</text>
+</g>
+<!-- t173->t174 -->
+<g id="edge109" class="edge"><title>t173->t174</title>
+<path style="fill:none;stroke:gray;" d="M443,-276C460,-266 483,-256 504,-250 585,-227 610,-249 690,-228 710,-223 732,-214 751,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="752.283,-209.26 760,-202 749.44,-202.863 752.283,-209.26"/>
+</g>
+<!-- t173->t184 -->
+<g id="edge437" class="edge"><title>t173->t184</title>
+<path style="fill:none;stroke:gray;" d="M443,-276C460,-266 483,-256 504,-250 541,-239 647,-253 675,-228 705,-201 675,-174 695,-140 698,-135 702,-130 707,-125"/>
+<polygon style="fill:gray;stroke:gray;" points="709.404,-127.546 714,-118 704.454,-122.596 709.404,-127.546"/>
+</g>
+<!-- t175->t184 -->
+<g id="edge111" class="edge"><title>t175->t184</title>
+<path style="fill:none;stroke:blue;" d="M595,-166C623,-153 662,-136 692,-122"/>
+<polygon style="fill:blue;stroke:blue;" points="693.283,-125.26 701,-118 690.44,-118.863 693.283,-125.26"/>
+</g>
+<!-- t174->t184 -->
+<g id="edge113" class="edge"><title>t174->t184</title>
+<path style="fill:none;stroke:blue;" d="M784,-166C777,-155 767,-139 758,-126"/>
+<polygon style="fill:blue;stroke:blue;" points="761.268,-124.625 753,-118 755.332,-128.335 761.268,-124.625"/>
+</g>
+<!-- t184->t191 -->
+<g id="edge327" class="edge"><title>t184->t191</title>
+<path style="fill:none;stroke:blue;" d="M791,-96C847,-92 941,-85 1022,-82 1532,-61 3214,-44 3487,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-45.5001 3497,-42 3487,-38.5001 3487,-45.5001"/>
+</g>
+<!-- t69 -->
+<g id="node133" class="node"><title>t69</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10456,-480 10248,-480 10244,-476 10244,-444 10452,-444 10456,-448 10456,-480"/>
+<polyline style="fill:none;stroke:green;" points="10452,-476 10244,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10452,-476 10452,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10452,-476 10456,-480 "/>
+<text text-anchor="middle" x="10350" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">makePeakvalCorrelation</text>
+</g>
+<!-- t31->t69 -->
+<g id="edge117" class="edge"><title>t31->t69</title>
+<path style="fill:none;stroke:gray;" d="M10368,-554C10365,-537 10360,-511 10356,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="10359.4,-489.119 10354,-480 10352.5,-490.492 10359.4,-489.119"/>
+</g>
+<!-- t70 -->
+<g id="node135" class="node"><title>t70</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10682,-480 10478,-480 10474,-476 10474,-444 10678,-444 10682,-448 10682,-480"/>
+<polyline style="fill:none;stroke:green;" points="10678,-476 10474,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10678,-476 10678,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10678,-476 10682,-480 "/>
+<text text-anchor="middle" x="10578" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">makeAvgvalCorrelation</text>
+</g>
+<!-- t31->t70 -->
+<g id="edge119" class="edge"><title>t31->t70</title>
+<path style="fill:none;stroke:gray;" d="M10406,-554C10442,-535 10497,-506 10535,-485"/>
+<polygon style="fill:gray;stroke:gray;" points="10537,-487.916 10544,-480 10533.6,-481.797 10537,-487.916"/>
+</g>
+<!-- t71 -->
+<g id="node137" class="node"><title>t71</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10906,-480 10704,-480 10700,-476 10700,-444 10902,-444 10906,-448 10906,-480"/>
+<polyline style="fill:none;stroke:green;" points="10902,-476 10700,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10902,-476 10902,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10902,-476 10906,-480 "/>
+<text text-anchor="middle" x="10803" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">makeLengthCorrelation</text>
+</g>
+<!-- t31->t71 -->
+<g id="edge121" class="edge"><title>t31->t71</title>
+<path style="fill:none;stroke:gray;" d="M10432,-563C10497,-552 10603,-532 10691,-506 10712,-500 10734,-492 10753,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="10754.3,-487.26 10762,-480 10751.4,-480.863 10754.3,-487.26"/>
+</g>
+<!-- t72 -->
+<g id="node139" class="node"><title>t72</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10897,-396 10741,-396 10737,-392 10737,-360 10893,-360 10897,-364 10897,-396"/>
+<polyline style="fill:none;stroke:green;" points="10893,-392 10737,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10893,-392 10893,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10893,-392 10897,-396 "/>
+<text text-anchor="middle" x="10817" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importCorrelation</text>
+</g>
+<!-- t69->t72 -->
+<g id="edge465" class="edge"><title>t69->t72</title>
+<path style="fill:none;stroke:gray;" d="M10389,-444C10411,-435 10439,-424 10465,-418 10578,-392 10609,-411 10723,-396 10724,-396 10726,-396 10727,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="10727.4,-398.478 10737,-394 10726.7,-391.512 10727.4,-398.478"/>
+</g>
+<!-- t70->t72 -->
+<g id="edge463" class="edge"><title>t70->t72</title>
+<path style="fill:none;stroke:gray;" d="M10622,-444C10643,-436 10668,-426 10691,-418 10710,-411 10729,-405 10747,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="10748.4,-402.226 10757,-396 10746.4,-395.521 10748.4,-402.226"/>
+</g>
+<!-- t71->t72 -->
+<g id="edge123" class="edge"><title>t71->t72</title>
+<path style="fill:none;stroke:gray;" d="M10806,-444C10808,-433 10810,-418 10812,-406"/>
+<polygon style="fill:gray;stroke:gray;" points="10815.5,-406.492 10814,-396 10808.6,-405.119 10815.5,-406.492"/>
+</g>
+<!-- t188 -->
+<g id="node154" class="node"><title>t188</title>
+<polygon style="fill:none;stroke:blue;" points="10627,-118 10529,-118 10525,-114 10525,-82 10623,-82 10627,-86 10627,-118"/>
+<polyline style="fill:none;stroke:blue;" points="10623,-114 10525,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="10623,-114 10623,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="10623,-114 10627,-118 "/>
+<text text-anchor="middle" x="10576" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">correlation</text>
+</g>
+<!-- t72->t188 -->
+<g id="edge137" class="edge"><title>t72->t188</title>
+<path style="fill:none;stroke:gray;" d="M10872,-360C10881,-354 10889,-347 10895,-338 10942,-262 10938,-200 10873,-140 10857,-124 10719,-111 10637,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="10637.3,-101.512 10627,-104 10636.6,-108.478 10637.3,-101.512"/>
+</g>
+<!-- t61 -->
+<g id="node143" class="node"><title>t61</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10384,-202 10256,-202 10252,-198 10252,-166 10380,-166 10384,-170 10384,-202"/>
+<polyline style="fill:none;stroke:green;" points="10380,-198 10252,-198 "/>
+<polyline style="fill:none;stroke:green;" points="10380,-198 10380,-166 "/>
+<polyline style="fill:none;stroke:green;" points="10380,-198 10384,-202 "/>
+<text text-anchor="middle" x="10318" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importOverlap</text>
+</g>
+<!-- t60->t61 -->
+<g id="edge127" class="edge"><title>t60->t61</title>
+<path style="fill:none;stroke:gray;" d="M10195,-267C10222,-249 10257,-225 10283,-208"/>
+<polygon style="fill:gray;stroke:gray;" points="10285.1,-210.8 10291,-202 10280.9,-205.2 10285.1,-210.8"/>
+</g>
+<!-- t61->t188 -->
+<g id="edge479" class="edge"><title>t61->t188</title>
+<path style="fill:none;stroke:gray;" d="M10373,-166C10415,-152 10472,-134 10515,-120"/>
+<polygon style="fill:gray;stroke:gray;" points="10516.4,-123.226 10525,-117 10514.4,-116.521 10516.4,-123.226"/>
+</g>
+<!-- t63 -->
+<g id="node147" class="node"><title>t63</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10864,-202 10682,-202 10678,-198 10678,-166 10860,-166 10864,-170 10864,-202"/>
+<polyline style="fill:none;stroke:green;" points="10860,-198 10678,-198 "/>
+<polyline style="fill:none;stroke:green;" points="10860,-198 10860,-166 "/>
+<polyline style="fill:none;stroke:green;" points="10860,-198 10864,-202 "/>
+<text text-anchor="middle" x="10771" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importUCSCOverlap</text>
+</g>
+<!-- t62->t63 -->
+<g id="edge131" class="edge"><title>t62->t63</title>
+<path style="fill:none;stroke:gray;" d="M10782,-252C10780,-239 10777,-225 10776,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="10779.4,-211.119 10774,-202 10772.5,-212.492 10779.4,-211.119"/>
+</g>
+<!-- t63->t188 -->
+<g id="edge477" class="edge"><title>t63->t188</title>
+<path style="fill:none;stroke:gray;" d="M10729,-166C10699,-153 10659,-136 10628,-122"/>
+<polygon style="fill:gray;stroke:gray;" points="10628.6,-118.464 10618,-118 10626,-124.964 10628.6,-118.464"/>
+</g>
+<!-- t64 -->
+<g id="node149" class="node"><title>t64</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10499,-396 10321,-396 10317,-392 10317,-360 10495,-360 10499,-364 10499,-396"/>
+<polyline style="fill:none;stroke:green;" points="10495,-392 10317,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10495,-392 10495,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10495,-392 10499,-396 "/>
+<text text-anchor="middle" x="10408" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeReproducibility</text>
+</g>
+<!-- t65 -->
+<g id="node150" class="node"><title>t65</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10672,-312 10484,-312 10480,-308 10480,-276 10668,-276 10672,-280 10672,-312"/>
+<polyline style="fill:none;stroke:green;" points="10668,-308 10480,-308 "/>
+<polyline style="fill:none;stroke:green;" points="10668,-308 10668,-276 "/>
+<polyline style="fill:none;stroke:green;" points="10668,-308 10672,-312 "/>
+<text text-anchor="middle" x="10576" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importReproducibility</text>
+</g>
+<!-- t64->t65 -->
+<g id="edge133" class="edge"><title>t64->t65</title>
+<path style="fill:none;stroke:gray;" d="M10444,-360C10469,-347 10503,-330 10531,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="10532.3,-319.26 10540,-312 10529.4,-312.863 10532.3,-319.26"/>
+</g>
+<!-- t66 -->
+<g id="node152" class="node"><title>t66</title>
+<polygon style="fill:none;stroke:blue;" points="10643,-202 10513,-202 10509,-198 10509,-166 10639,-166 10643,-170 10643,-202"/>
+<polyline style="fill:none;stroke:blue;" points="10639,-198 10509,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="10639,-198 10639,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="10639,-198 10643,-202 "/>
+<text text-anchor="middle" x="10576" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">reproducibility</text>
+</g>
+<!-- t65->t66 -->
+<g id="edge135" class="edge"><title>t65->t66</title>
+<path style="fill:none;stroke:gray;" d="M10576,-276C10576,-259 10576,-233 10576,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="10579.5,-212 10576,-202 10572.5,-212 10579.5,-212"/>
+</g>
+<!-- t66->t188 -->
+<g id="edge139" class="edge"><title>t66->t188</title>
+<path style="fill:none;stroke:blue;" d="M10576,-166C10576,-155 10576,-141 10576,-128"/>
+<polygon style="fill:blue;stroke:blue;" points="10579.5,-128 10576,-118 10572.5,-128 10579.5,-128"/>
+</g>
+<!-- t188->t191 -->
+<g id="edge321" class="edge"><title>t188->t191</title>
+<path style="fill:none;stroke:blue;" d="M10525,-99C10391,-95 10019,-86 9709,-82 7128,-46 3939,-42 3561,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-38.5001 3551,-42 3561,-45.5001 3561,-38.5001"/>
+</g>
+<!-- t93 -->
+<g id="node157" class="node"><title>t93</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9979,-396 9829,-396 9825,-392 9825,-360 9975,-360 9979,-364 9979,-396"/>
+<polyline style="fill:none;stroke:green;" points="9975,-392 9825,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9975,-392 9975,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9975,-392 9979,-396 "/>
+<text text-anchor="middle" x="9902" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateIntervals</text>
+</g>
+<!-- t94 -->
+<g id="node158" class="node"><title>t94</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10026,-312 9866,-312 9862,-308 9862,-276 10022,-276 10026,-280 10026,-312"/>
+<polyline style="fill:none;stroke:green;" points="10022,-308 9862,-308 "/>
+<polyline style="fill:none;stroke:green;" points="10022,-308 10022,-276 "/>
+<polyline style="fill:none;stroke:green;" points="10022,-308 10026,-312 "/>
+<text text-anchor="middle" x="9944" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotations</text>
+</g>
+<!-- t93->t94 -->
+<g id="edge141" class="edge"><title>t93->t94</title>
+<path style="fill:none;stroke:gray;" d="M9911,-360C9917,-349 9924,-334 9930,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="9933.2,-322.441 9935,-312 9927.08,-319.042 9933.2,-322.441"/>
+</g>
+<!-- t189 -->
+<g id="node190" class="node"><title>t189</title>
+<polygon style="fill:none;stroke:blue;" points="9315,-202 9235,-202 9231,-198 9231,-166 9311,-166 9315,-170 9315,-202"/>
+<polyline style="fill:none;stroke:blue;" points="9311,-198 9231,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="9311,-198 9311,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="9311,-198 9315,-202 "/>
+<text text-anchor="middle" x="9273" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotate</text>
+</g>
+<!-- t93->t189 -->
+<g id="edge517" class="edge"><title>t93->t189</title>
+<path style="fill:none;stroke:gray;" d="M9975,-360C10003,-352 10031,-343 10035,-338 10059,-306 10063,-278 10035,-250 10016,-230 9567,-230 9539,-228 9465,-220 9379,-205 9325,-194"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-190.529 9315,-192 9324.12,-197.393 9325.49,-190.529"/>
+</g>
+<!-- t94->t189 -->
+<g id="edge515" class="edge"><title>t94->t189</title>
+<path style="fill:none;stroke:gray;" d="M9915,-276C9897,-266 9875,-255 9853,-250 9718,-214 9678,-244 9539,-228 9465,-220 9379,-204 9325,-194"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-190.529 9315,-192 9324.12,-197.393 9325.49,-190.529"/>
+</g>
+<!-- t99 -->
+<g id="node162" class="node"><title>t99</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8744,-312 8644,-312 8640,-308 8640,-276 8740,-276 8744,-280 8744,-312"/>
+<polyline style="fill:none;stroke:green;" points="8740,-308 8640,-308 "/>
+<polyline style="fill:none;stroke:green;" points="8740,-308 8740,-276 "/>
+<polyline style="fill:none;stroke:green;" points="8740,-308 8744,-312 "/>
+<text text-anchor="middle" x="8692" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTSS</text>
+</g>
+<!-- t97->t99 -->
+<g id="edge145" class="edge"><title>t97->t99</title>
+<path style="fill:none;stroke:gray;" d="M8577,-360C8593,-353 8611,-346 8626,-338 8637,-332 8648,-325 8659,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="8661.1,-320.8 8667,-312 8656.9,-315.2 8661.1,-320.8"/>
+</g>
+<!-- t97->t189 -->
+<g id="edge509" class="edge"><title>t97->t189</title>
+<path style="fill:none;stroke:gray;" d="M8583,-360C8594,-354 8604,-347 8611,-338 8636,-306 8601,-276 8631,-250 8675,-211 9076,-192 9221,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-188.5 9231,-185 9221,-181.5 9221,-188.5"/>
+</g>
+<!-- t99->t189 -->
+<g id="edge505" class="edge"><title>t99->t189</title>
+<path style="fill:none;stroke:gray;" d="M8714,-276C8726,-266 8742,-256 8758,-250 8842,-218 9107,-195 9221,-188"/>
+<polygon style="fill:gray;stroke:gray;" points="9221.4,-191.478 9231,-187 9220.7,-184.512 9221.4,-191.478"/>
+</g>
+<!-- t98 -->
+<g id="node164" class="node"><title>t98</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9247,-396 9105,-396 9101,-392 9101,-360 9243,-360 9247,-364 9247,-396"/>
+<polyline style="fill:none;stroke:green;" points="9243,-392 9101,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9243,-392 9243,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9243,-392 9247,-396 "/>
+<text text-anchor="middle" x="9174" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateRepeats</text>
+</g>
+<!-- t100 -->
+<g id="node165" class="node"><title>t100</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9444,-312 9316,-312 9312,-308 9312,-276 9440,-276 9444,-280 9444,-312"/>
+<polyline style="fill:none;stroke:green;" points="9440,-308 9312,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9440,-308 9440,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9440,-308 9444,-312 "/>
+<text text-anchor="middle" x="9378" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importRepeats</text>
+</g>
+<!-- t98->t100 -->
+<g id="edge147" class="edge"><title>t98->t100</title>
+<path style="fill:none;stroke:gray;" d="M9237,-360C9257,-354 9279,-346 9298,-338 9312,-332 9326,-324 9339,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="9340.96,-319.916 9348,-312 9337.56,-313.797 9340.96,-319.916"/>
+</g>
+<!-- t98->t189 -->
+<g id="edge507" class="edge"><title>t98->t189</title>
+<path style="fill:none;stroke:gray;" d="M9238,-360C9256,-353 9273,-346 9278,-338 9303,-300 9293,-245 9283,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="9286.23,-210.573 9280,-202 9279.52,-212.584 9286.23,-210.573"/>
+</g>
+<!-- t100->t189 -->
+<g id="edge503" class="edge"><title>t100->t189</title>
+<path style="fill:none;stroke:gray;" d="M9361,-276C9343,-258 9317,-230 9297,-209"/>
+<polygon style="fill:gray;stroke:gray;" points="9299.55,-206.596 9290,-202 9294.6,-211.546 9299.55,-206.596"/>
+</g>
+<!-- t168 -->
+<g id="node169" class="node"><title>t168</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9048,-312 8786,-312 8782,-308 8782,-276 9044,-276 9048,-280 9048,-312"/>
+<polyline style="fill:none;stroke:green;" points="9044,-308 8782,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9044,-308 9044,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9044,-308 9048,-312 "/>
+<text text-anchor="middle" x="8915" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTSSIntervalAssociations</text>
+</g>
+<!-- t167->t168 -->
+<g id="edge151" class="edge"><title>t167->t168</title>
+<path style="fill:none;stroke:gray;" d="M8784,-360C8809,-347 8843,-330 8870,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="8871.96,-319.916 8879,-312 8868.56,-313.797 8871.96,-319.916"/>
+</g>
+<!-- t167->t189 -->
+<g id="edge493" class="edge"><title>t167->t189</title>
+<path style="fill:none;stroke:gray;" d="M8750,-360C8755,-326 8765,-257 8773,-250 8805,-219 9099,-195 9221,-188"/>
+<polygon style="fill:gray;stroke:gray;" points="9221.4,-191.478 9231,-187 9220.7,-184.512 9221.4,-191.478"/>
+</g>
+<!-- t168->t189 -->
+<g id="edge491" class="edge"><title>t168->t189</title>
+<path style="fill:none;stroke:gray;" d="M8973,-276C8999,-268 9029,-258 9057,-250 9113,-232 9177,-213 9221,-200"/>
+<polygon style="fill:gray;stroke:gray;" points="9222.43,-203.226 9231,-197 9220.42,-196.521 9222.43,-203.226"/>
+</g>
+<!-- t166 -->
+<g id="node173" class="node"><title>t166</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8602,-312 8370,-312 8366,-308 8366,-276 8598,-276 8602,-280 8602,-312"/>
+<polyline style="fill:none;stroke:green;" points="8598,-308 8366,-308 "/>
+<polyline style="fill:none;stroke:green;" points="8598,-308 8598,-276 "/>
+<polyline style="fill:none;stroke:green;" points="8598,-308 8602,-312 "/>
+<text text-anchor="middle" x="8484" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTSSIntervalDistance</text>
+</g>
+<!-- t165->t166 -->
+<g id="edge155" class="edge"><title>t165->t166</title>
+<path style="fill:none;stroke:gray;" d="M8359,-360C8383,-348 8415,-330 8441,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="8442.96,-319.916 8450,-312 8439.56,-313.797 8442.96,-319.916"/>
+</g>
+<!-- t165->t189 -->
+<g id="edge497" class="edge"><title>t165->t189</title>
+<path style="fill:none;stroke:gray;" d="M8331,-360C8333,-353 8335,-345 8337,-338 8348,-299 8327,-276 8357,-250 8390,-221 9032,-194 9221,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-189.5 9231,-186 9221,-182.5 9221,-189.5"/>
+</g>
+<!-- t166->t189 -->
+<g id="edge495" class="edge"><title>t166->t189</title>
+<path style="fill:none;stroke:gray;" d="M8529,-276C8554,-267 8587,-256 8616,-250 8838,-202 9109,-189 9221,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-188.5 9231,-185 9221,-181.5 9221,-188.5"/>
+</g>
+<!-- t95 -->
+<g id="node175" class="node"><title>t95</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9807,-396 9673,-396 9669,-392 9669,-360 9803,-360 9807,-364 9807,-396"/>
+<polyline style="fill:none;stroke:green;" points="9803,-392 9669,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9803,-392 9803,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9803,-392 9807,-396 "/>
+<text text-anchor="middle" x="9738" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTracks</text>
+</g>
+<!-- t96 -->
+<g id="node176" class="node"><title>t96</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9844,-312 9726,-312 9722,-308 9722,-276 9840,-276 9844,-280 9844,-312"/>
+<polyline style="fill:none;stroke:green;" points="9840,-308 9722,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9840,-308 9840,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9840,-308 9844,-312 "/>
+<text text-anchor="middle" x="9783" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTracks</text>
+</g>
+<!-- t95->t96 -->
+<g id="edge157" class="edge"><title>t95->t96</title>
+<path style="fill:none;stroke:gray;" d="M9748,-360C9754,-349 9762,-334 9768,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="9771.2,-322.441 9773,-312 9765.08,-319.042 9771.2,-322.441"/>
+</g>
+<!-- t95->t189 -->
+<g id="edge513" class="edge"><title>t95->t189</title>
+<path style="fill:none;stroke:gray;" d="M9725,-360C9721,-353 9717,-345 9713,-338 9697,-300 9719,-276 9688,-250 9663,-227 9573,-233 9539,-228 9465,-216 9379,-202 9325,-193"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-189.529 9315,-191 9324.12,-196.393 9325.49,-189.529"/>
+</g>
+<!-- t96->t189 -->
+<g id="edge511" class="edge"><title>t96->t189</title>
+<path style="fill:none;stroke:gray;" d="M9758,-276C9744,-266 9726,-256 9708,-250 9637,-225 9614,-239 9539,-228 9465,-217 9379,-202 9325,-193"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-189.529 9315,-191 9324.12,-196.393 9325.49,-189.529"/>
+</g>
+<!-- t107 -->
+<g id="node180" class="node"><title>t107</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9254,-312 9070,-312 9066,-308 9066,-276 9250,-276 9254,-280 9254,-312"/>
+<polyline style="fill:none;stroke:green;" points="9250,-308 9066,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9250,-308 9250,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9250,-308 9254,-312 "/>
+<text text-anchor="middle" x="9160" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importIntervalCounts</text>
+</g>
+<!-- t106->t107 -->
+<g id="edge161" class="edge"><title>t106->t107</title>
+<path style="fill:none;stroke:gray;" d="M9030,-360C9055,-347 9089,-330 9115,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="9116.96,-319.916 9124,-312 9113.56,-313.797 9116.96,-319.916"/>
+</g>
+<!-- t106->t189 -->
+<g id="edge501" class="edge"><title>t106->t189</title>
+<path style="fill:none;stroke:gray;" d="M9083,-361C9086,-361 9089,-360 9092,-360 9130,-354 9237,-365 9263,-338 9279,-320 9278,-251 9276,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="9279.48,-211.602 9275,-202 9272.51,-212.299 9279.48,-211.602"/>
+</g>
+<!-- t107->t189 -->
+<g id="edge499" class="edge"><title>t107->t189</title>
+<path style="fill:none;stroke:gray;" d="M9179,-276C9198,-258 9227,-230 9247,-209"/>
+<polygon style="fill:gray;stroke:gray;" points="9249.4,-211.546 9254,-202 9244.45,-206.596 9249.4,-211.546"/>
+</g>
+<!-- t180 -->
+<g id="node184" class="node"><title>t180</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9664,-312 9486,-312 9482,-308 9482,-276 9660,-276 9664,-280 9664,-312"/>
+<polyline style="fill:none;stroke:green;" points="9660,-308 9482,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9660,-308 9660,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9660,-308 9664,-312 "/>
+<text text-anchor="middle" x="9573" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importSNPCoverage</text>
+</g>
+<!-- t179->t180 -->
+<g id="edge165" class="edge"><title>t179->t180</title>
+<path style="fill:none;stroke:gray;" d="M9406,-360C9425,-353 9448,-345 9468,-338 9486,-331 9506,-323 9523,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="9524.28,-319.26 9532,-312 9521.44,-312.863 9524.28,-319.26"/>
+</g>
+<!-- t179->t189 -->
+<g id="edge485" class="edge"><title>t179->t189</title>
+<path style="fill:none;stroke:gray;" d="M9413,-360C9432,-353 9448,-346 9453,-338 9476,-305 9476,-282 9453,-250 9438,-229 9373,-208 9325,-196"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-192.529 9315,-194 9324.12,-199.393 9325.49,-192.529"/>
+</g>
+<!-- t180->t189 -->
+<g id="edge171" class="edge"><title>t180->t189</title>
+<path style="fill:none;stroke:gray;" d="M9533,-276C9513,-267 9490,-258 9468,-250 9420,-232 9364,-213 9325,-200"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.58,-196.521 9315,-197 9323.57,-203.226 9325.58,-196.521"/>
+</g>
+<!-- t178->t189 -->
+<g id="edge487" class="edge"><title>t178->t189</title>
+<path style="fill:none;stroke:gray;" d="M9629,-360C9649,-354 9667,-346 9673,-338 9696,-306 9699,-279 9673,-250 9654,-226 9569,-232 9539,-228 9465,-216 9379,-201 9325,-193"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-189.529 9315,-191 9324.12,-196.393 9325.49,-189.529"/>
+</g>
+<!-- t176->t189 -->
+<g id="edge489" class="edge"><title>t176->t189</title>
+<path style="fill:none;stroke:gray;" d="M8219,-276C8255,-267 8301,-256 8342,-250 8675,-199 9078,-188 9221,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-188.5 9231,-185 9221,-181.5 9221,-188.5"/>
+</g>
+<!-- t189->t191 -->
+<g id="edge319" class="edge"><title>t189->t191</title>
+<path style="fill:none;stroke:blue;" d="M9231,-182C9068,-176 8464,-155 7966,-140 6937,-109 6679,-104 5649,-82 4804,-63 3769,-46 3561,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-38.5001 3551,-42 3561,-45.5001 3561,-38.5001"/>
+</g>
+<!-- t1 -->
+<g id="node192" class="node"><title>t1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5528,-590 5272,-590 5268,-586 5268,-554 5524,-554 5528,-558 5528,-590"/>
+<polyline style="fill:none;stroke:green;" points="5524,-586 5268,-586 "/>
+<polyline style="fill:none;stroke:green;" points="5524,-586 5524,-554 "/>
+<polyline style="fill:none;stroke:green;" points="5524,-586 5528,-590 "/>
+<text text-anchor="middle" x="5398" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildGenomeGCSegmentation</text>
+</g>
+<!-- t2 -->
+<g id="node193" class="node"><title>t2</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5480,-480 5320,-480 5316,-476 5316,-444 5476,-444 5480,-448 5480,-480"/>
+<polyline style="fill:none;stroke:green;" points="5476,-476 5316,-476 "/>
+<polyline style="fill:none;stroke:green;" points="5476,-476 5476,-444 "/>
+<polyline style="fill:none;stroke:green;" points="5476,-476 5480,-480 "/>
+<text text-anchor="middle" x="5398" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildAnnotatorGC</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge173" class="edge"><title>t1->t2</title>
+<path style="fill:none;stroke:gray;" d="M5398,-554C5398,-537 5398,-511 5398,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="5401.5,-490 5398,-480 5394.5,-490 5401.5,-490"/>
+</g>
+<!-- t2->t117 -->
+<g id="edge519" class="edge"><title>t2->t117</title>
+<path style="fill:none;stroke:gray;" d="M5480,-460C5884,-452 7648,-414 7892,-396 7898,-395 7904,-395 7911,-394"/>
+<polygon style="fill:gray;stroke:gray;" points="7911.4,-397.478 7921,-393 7910.7,-390.512 7911.4,-397.478"/>
+</g>
+<!-- t2->t120 -->
+<g id="edge521" class="edge"><title>t2->t120</title>
+<path style="fill:none;stroke:gray;" d="M5480,-447C5555,-434 5665,-413 5745,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="5745.88,-401.393 5755,-396 5744.51,-394.529 5745.88,-401.393"/>
+</g>
+<!-- t2->t121 -->
+<g id="edge523" class="edge"><title>t2->t121</title>
+<path style="fill:none;stroke:gray;" d="M5480,-457C5631,-447 5964,-425 6255,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6255.4,-399.478 6265,-395 6254.7,-392.512 6255.4,-399.478"/>
+</g>
+<!-- t2->t118 -->
+<g id="edge525" class="edge"><title>t2->t118</title>
+<path style="fill:none;stroke:gray;" d="M5480,-460C5810,-454 7035,-428 7421,-396 7429,-395 7437,-395 7445,-394"/>
+<polygon style="fill:gray;stroke:gray;" points="7445.4,-397.478 7455,-393 7444.7,-390.512 7445.4,-397.478"/>
+</g>
+<!-- t2->t119 -->
+<g id="edge527" class="edge"><title>t2->t119</title>
+<path style="fill:none;stroke:gray;" d="M5480,-461C5831,-456 7204,-435 7659,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="7659.4,-399.478 7669,-395 7658.7,-392.512 7659.4,-399.478"/>
+</g>
+<!-- t2->t160 -->
+<g id="edge531" class="edge"><title>t2->t160</title>
+<path style="fill:none;stroke:gray;" d="M5316,-454C5208,-443 5013,-425 4845,-418 4304,-391 4165,-430 3619,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="3619.3,-392.512 3609,-395 3618.6,-399.478 3619.3,-392.512"/>
+</g>
+<!-- t2->t161 -->
+<g id="edge533" class="edge"><title>t2->t161</title>
+<path style="fill:none;stroke:gray;" d="M5316,-454C5208,-443 5013,-425 4845,-418 4146,-385 3969,-434 3269,-396 3268,-396 3266,-396 3265,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="3265.3,-392.512 3255,-395 3264.6,-399.478 3265.3,-392.512"/>
+</g>
+<!-- t2->t147 -->
+<g id="edge535" class="edge"><title>t2->t147</title>
+<path style="fill:none;stroke:gray;" d="M5436,-444C5463,-431 5499,-414 5528,-400"/>
+<polygon style="fill:gray;stroke:gray;" points="5529.28,-403.26 5537,-396 5526.44,-396.863 5529.28,-403.26"/>
+</g>
+<!-- t2->t148 -->
+<g id="edge537" class="edge"><title>t2->t148</title>
+<path style="fill:none;stroke:gray;" d="M5360,-444C5333,-431 5297,-414 5268,-400"/>
+<polygon style="fill:gray;stroke:gray;" points="5269.56,-396.863 5259,-396 5266.72,-403.26 5269.56,-396.863"/>
+</g>
+<!-- t2->t149 -->
+<g id="edge539" class="edge"><title>t2->t149</title>
+<path style="fill:none;stroke:gray;" d="M5316,-455C5190,-443 4940,-421 4717,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4717.3,-392.512 4707,-395 4716.6,-399.478 4717.3,-392.512"/>
+</g>
+<!-- t2->t150 -->
+<g id="edge541" class="edge"><title>t2->t150</title>
+<path style="fill:none;stroke:gray;" d="M5316,-449C5231,-435 5096,-414 4998,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="4998.49,-394.529 4988,-396 4997.12,-401.393 4998.49,-394.529"/>
+</g>
+<!-- t2->t151 -->
+<g id="edge543" class="edge"><title>t2->t151</title>
+<path style="fill:none;stroke:gray;" d="M5316,-455C5208,-445 5013,-429 4845,-418 4660,-405 4611,-409 4421,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4421.3,-392.512 4411,-395 4420.6,-399.478 4421.3,-392.512"/>
+</g>
+<!-- t2->t152 -->
+<g id="edge545" class="edge"><title>t2->t152</title>
+<path style="fill:none;stroke:gray;" d="M5316,-454C5208,-444 5013,-427 4845,-418 4488,-398 4395,-416 4033,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4033.3,-392.512 4023,-395 4032.6,-399.478 4033.3,-392.512"/>
+</g>
+<!-- t190 -->
+<g id="node309" class="node"><title>t190</title>
+<polygon style="fill:none;stroke:blue;" points="5640,-118 5552,-118 5548,-114 5548,-82 5636,-82 5640,-86 5640,-118"/>
+<polyline style="fill:none;stroke:blue;" points="5636,-114 5548,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="5636,-114 5636,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="5636,-114 5640,-118 "/>
+<text text-anchor="middle" x="5594" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator</text>
+</g>
+<!-- t2->t190 -->
+<g id="edge577" class="edge"><title>t2->t190</title>
+<path style="fill:none;stroke:gray;" d="M5480,-460C5939,-450 8167,-399 8170,-396 8182,-384 8181,-371 8170,-360 8141,-326 8005,-367 7971,-338 7941,-311 7970,-285 7950,-250 7919,-191 7909,-166 7847,-140 7794,-116 5821,-127 5650,-118"/>
+<polygon style="fill:gray;stroke:gray;" points="5650.3,-114.512 5640,-117 5649.6,-121.478 5650.3,-114.512"/>
+</g>
+<!-- t114->t190 -->
+<g id="edge575" class="edge"><title>t114->t190</title>
+<path style="fill:none;stroke:gray;" d="M5743,-166C5714,-153 5675,-136 5644,-122"/>
+<polygon style="fill:gray;stroke:gray;" points="5645.56,-118.863 5635,-118 5642.72,-125.26 5645.56,-118.863"/>
+</g>
+<!-- t124 -->
+<g id="node199" class="node"><title>t124</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7483.1,-171.794 7619,-144.5 7754.9,-171.794 7754.77,-215.956 7483.23,-215.956 7483.1,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="7479.09,-168.519 7619,-140.42 7758.91,-168.519 7758.76,-219.956 7479.24,-219.956 7479.09,-168.519"/>
+<text text-anchor="middle" x="7619" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorArchitecture</text>
+</g>
+<!-- t117->t124 -->
+<g id="edge179" class="edge"><title>t117->t124</title>
+<path style="fill:none;stroke:gray;" d="M7968,-360C7956,-354 7945,-347 7936,-338 7909,-308 7941,-277 7911,-250 7889,-229 7829,-214 7769,-204"/>
+<polygon style="fill:gray;stroke:gray;" points="7769.49,-200.529 7759,-202 7768.12,-207.393 7769.49,-200.529"/>
+</g>
+<!-- t117->t190 -->
+<g id="edge573" class="edge"><title>t117->t190</title>
+<path style="fill:none;stroke:gray;" d="M7986,-360C7975,-354 7964,-347 7956,-338 7930,-307 7955,-282 7930,-250 7879,-179 7852,-166 7768,-140 7658,-105 5841,-137 5650,-118"/>
+<polygon style="fill:gray;stroke:gray;" points="5650.3,-114.512 5640,-117 5649.6,-121.478 5650.3,-114.512"/>
+</g>
+<!-- t124->t190 -->
+<g id="edge567" class="edge"><title>t124->t190</title>
+<path style="fill:none;stroke:gray;" d="M7518,-161C7478,-152 7431,-144 7388,-140 7212,-121 5936,-104 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t123 -->
+<g id="node203" class="node"><title>t123</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4294.11,-171.794 4405,-144.5 4515.89,-171.794 4515.79,-215.956 4294.21,-215.956 4294.11,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="4290.1,-168.661 4405,-140.381 4519.9,-168.661 4519.78,-219.956 4290.22,-219.956 4290.1,-168.661"/>
+<text text-anchor="middle" x="4405" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTracks</text>
+</g>
+<!-- t120->t123 -->
+<g id="edge183" class="edge"><title>t120->t123</title>
+<path style="fill:none;stroke:gray;" d="M5755,-361C5752,-361 5749,-360 5746,-360 5713,-356 4562,-357 4534,-338 4501,-313 4530,-282 4504,-250 4497,-241 4489,-234 4480,-226"/>
+<polygon style="fill:gray;stroke:gray;" points="4482.1,-223.2 4472,-220 4477.9,-228.8 4482.1,-223.2"/>
+</g>
+<!-- t120->t190 -->
+<g id="edge571" class="edge"><title>t120->t190</title>
+<path style="fill:none;stroke:gray;" d="M5755,-361C5752,-361 5749,-360 5746,-360 5713,-356 4573,-361 4549,-338 4518,-306 4538,-168 4571,-140 4608,-109 5329,-101 5538,-100"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-103.5 5548,-100 5538,-96.5001 5538,-103.5"/>
+</g>
+<!-- t123->t190 -->
+<g id="edge569" class="edge"><title>t123->t190</title>
+<path style="fill:none;stroke:gray;" d="M4473,-157C4492,-150 4514,-144 4534,-140 4633,-121 5333,-106 5538,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t134 -->
+<g id="node207" class="node"><title>t134</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5990.15,-171.794 6131,-144.5 6271.85,-171.794 6271.72,-215.956 5990.28,-215.956 5990.15,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="5986.15,-168.494 6131,-140.426 6275.85,-168.494 6275.69,-219.956 5986.31,-219.956 5986.15,-168.494"/>
+<text text-anchor="middle" x="6131" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorPromotorsGO</text>
+</g>
+<!-- t130->t134 -->
+<g id="edge187" class="edge"><title>t130->t134</title>
+<path style="fill:none;stroke:gray;" d="M6552,-276C6526,-267 6494,-257 6464,-250 6405,-235 6389,-239 6329,-228 6315,-225 6301,-222 6286,-219"/>
+<polygon style="fill:gray;stroke:gray;" points="6286.49,-215.529 6276,-217 6285.12,-222.393 6286.49,-215.529"/>
+</g>
+<!-- t130->t190 -->
+<g id="edge565" class="edge"><title>t130->t190</title>
+<path style="fill:none;stroke:gray;" d="M6551,-276C6525,-267 6493,-257 6464,-250 6421,-238 6404,-250 6364,-228 6319,-201 6333,-162 6285,-140 6228,-113 5805,-103 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t134->t190 -->
+<g id="edge557" class="edge"><title>t134->t190</title>
+<path style="fill:none;stroke:gray;" d="M6021,-162C5983,-154 5939,-146 5898,-140 5812,-126 5712,-113 5650,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="5650.3,-102.512 5640,-105 5649.6,-109.478 5650.3,-102.512"/>
+</g>
+<!-- t135 -->
+<g id="node211" class="node"><title>t135</title>
+<polygon style="fill:#90ee90;stroke:green;" points="6377.04,-171.794 6538,-144.5 6698.96,-171.794 6698.81,-215.956 6377.19,-215.956 6377.04,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="6373.04,-168.415 6538,-140.443 6702.96,-168.415 6702.79,-219.956 6373.21,-219.956 6373.04,-168.415"/>
+<text text-anchor="middle" x="6538" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorPromotorsGOSlim</text>
+</g>
+<!-- t131->t135 -->
+<g id="edge191" class="edge"><title>t131->t135</title>
+<path style="fill:none;stroke:gray;" d="M6832,-276C6786,-262 6720,-241 6663,-223"/>
+<polygon style="fill:gray;stroke:gray;" points="6663.58,-219.521 6653,-220 6661.57,-226.226 6663.58,-219.521"/>
+</g>
+<!-- t131->t190 -->
+<g id="edge563" class="edge"><title>t131->t190</title>
+<path style="fill:none;stroke:gray;" d="M6876,-276C6850,-242 6786,-169 6712,-140 6663,-120 5869,-105 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t135->t190 -->
+<g id="edge555" class="edge"><title>t135->t190</title>
+<path style="fill:none;stroke:gray;" d="M6435,-158C6405,-151 6371,-144 6339,-140 6206,-121 5801,-107 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t136 -->
+<g id="node215" class="node"><title>t136</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4584.12,-171.794 4726,-144.5 4867.88,-171.794 4867.75,-215.956 4584.25,-215.956 4584.12,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="4580.11,-168.492 4726,-140.427 4871.89,-168.492 4871.74,-219.956 4580.26,-219.956 4580.11,-168.492"/>
+<text text-anchor="middle" x="4726" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTerritoriesGO</text>
+</g>
+<!-- t132->t136 -->
+<g id="edge195" class="edge"><title>t132->t136</title>
+<path style="fill:none;stroke:gray;" d="M4691,-276C4696,-263 4702,-246 4708,-230"/>
+<polygon style="fill:gray;stroke:gray;" points="4711.54,-230.585 4712,-220 4705.04,-227.985 4711.54,-230.585"/>
+</g>
+<!-- t132->t190 -->
+<g id="edge561" class="edge"><title>t132->t190</title>
+<path style="fill:none;stroke:gray;" d="M4739,-276C4795,-258 4875,-231 4881,-228 4941,-197 4940,-162 5002,-140 5052,-121 5400,-107 5538,-102"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-105.5 5548,-102 5538,-98.5001 5538,-105.5"/>
+</g>
+<!-- t136->t190 -->
+<g id="edge553" class="edge"><title>t136->t190</title>
+<path style="fill:none;stroke:gray;" d="M4832,-161C4870,-153 4913,-145 4953,-140 5168,-112 5426,-104 5538,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t137 -->
+<g id="node219" class="node"><title>t137</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5014.95,-171.794 5177,-144.5 5339.05,-171.794 5338.9,-215.956 5015.1,-215.956 5014.95,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="5010.95,-168.411 5177,-140.444 5343.05,-168.411 5342.87,-219.956 5011.13,-219.956 5010.95,-168.411"/>
+<text text-anchor="middle" x="5177" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTerritoriesGOSlim</text>
+</g>
+<!-- t133->t137 -->
+<g id="edge199" class="edge"><title>t133->t137</title>
+<path style="fill:none;stroke:gray;" d="M5006,-276C5032,-262 5068,-243 5101,-225"/>
+<polygon style="fill:gray;stroke:gray;" points="5102.96,-227.916 5110,-220 5099.56,-221.797 5102.96,-227.916"/>
+</g>
+<!-- t133->t190 -->
+<g id="edge559" class="edge"><title>t133->t190</title>
+<path style="fill:none;stroke:gray;" d="M5025,-276C5055,-267 5092,-256 5125,-250 5225,-231 5255,-257 5352,-228 5374,-221 5489,-158 5552,-123"/>
+<polygon style="fill:gray;stroke:gray;" points="5553.96,-125.916 5561,-118 5550.56,-119.797 5553.96,-125.916"/>
+</g>
+<!-- t137->t190 -->
+<g id="edge551" class="edge"><title>t137->t190</title>
+<path style="fill:none;stroke:gray;" d="M5286,-159C5314,-152 5345,-146 5374,-140 5430,-129 5493,-117 5538,-110"/>
+<polygon style="fill:gray;stroke:gray;" points="5538.88,-113.393 5548,-108 5537.51,-106.529 5538.88,-113.393"/>
+</g>
+<!-- t125 -->
+<g id="node223" class="node"><title>t125</title>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="7056.09,-281.794 7217,-254.5 7377.91,-281.794 7377.76,-325.956 7056.24,-325.956 7056.09,-281.794"/>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="7052.08,-278.417 7217,-250.443 7381.92,-278.417 7381.74,-329.956 7052.26,-329.956 7052.08,-278.417"/>
+<text text-anchor="middle" x="7217" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterest</text>
+</g>
+<!-- t121->t125 -->
+<g id="edge203" class="edge"><title>t121->t125</title>
+<path style="fill:none;stroke:blue;" d="M6551,-361C6554,-360 6557,-360 6560,-360 6775,-342 6831,-369 7043,-338 7054,-336 7065,-334 7076,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="7076.88,-335.393 7086,-330 7075.51,-328.529 7076.88,-335.393"/>
+</g>
+<!-- t128 -->
+<g id="node233" class="node"><title>t128</title>
+<polygon style="fill:none;stroke:blue;" points="7379,-202 7223,-202 7219,-198 7219,-166 7375,-166 7379,-170 7379,-202"/>
+<polyline style="fill:none;stroke:blue;" points="7375,-198 7219,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="7375,-198 7375,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="7375,-198 7379,-202 "/>
+<text text-anchor="middle" x="7299" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_regions</text>
+</g>
+<!-- t125->t128 -->
+<g id="edge215" class="edge"><title>t125->t128</title>
+<path style="fill:none;stroke:blue;" d="M7246,-255C7258,-240 7270,-224 7280,-210"/>
+<polygon style="fill:blue;stroke:blue;" points="7282.8,-212.1 7286,-202 7277.2,-207.9 7282.8,-212.1"/>
+</g>
+<!-- t126 -->
+<g id="node227" class="node"><title>t126</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7404.02,-281.794 7516,-254.5 7627.98,-281.794 7627.88,-325.956 7404.12,-325.956 7404.02,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="7400.01,-278.653 7516,-250.383 7631.99,-278.653 7631.87,-329.956 7400.13,-329.956 7400.01,-278.653"/>
+<text text-anchor="middle" x="7516" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorGWAS</text>
+</g>
+<!-- t118->t126 -->
+<g id="edge207" class="edge"><title>t118->t126</title>
+<path style="fill:none;stroke:gray;" d="M7545,-360C7542,-354 7539,-347 7536,-340"/>
+<polygon style="fill:gray;stroke:gray;" points="7538.96,-337.985 7532,-330 7532.46,-340.585 7538.96,-337.985"/>
+</g>
+<!-- t126->t128 -->
+<g id="edge529" class="edge"><title>t126->t128</title>
+<path style="fill:none;stroke:gray;" d="M7458,-265C7423,-247 7377,-224 7344,-207"/>
+<polygon style="fill:gray;stroke:gray;" points="7345.44,-203.797 7335,-202 7342.04,-209.916 7345.44,-203.797"/>
+</g>
+<!-- t127 -->
+<g id="node231" class="node"><title>t127</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7654.17,-281.794 7776,-254.5 7897.83,-281.794 7897.71,-325.956 7654.29,-325.956 7654.17,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="7650.18,-278.59 7776,-250.402 7901.82,-278.59 7901.69,-329.956 7650.31,-329.956 7650.18,-278.59"/>
+<text text-anchor="middle" x="7776" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorSelection</text>
+</g>
+<!-- t119->t127 -->
+<g id="edge211" class="edge"><title>t119->t127</title>
+<path style="fill:none;stroke:gray;" d="M7776,-360C7776,-354 7776,-347 7776,-340"/>
+<polygon style="fill:gray;stroke:gray;" points="7779.5,-340 7776,-330 7772.5,-340 7779.5,-340"/>
+</g>
+<!-- t127->t128 -->
+<g id="edge213" class="edge"><title>t127->t128</title>
+<path style="fill:none;stroke:gray;" d="M7702,-267C7683,-260 7661,-254 7641,-250 7567,-233 7546,-242 7470,-228 7438,-222 7402,-213 7372,-205"/>
+<polygon style="fill:gray;stroke:gray;" points="7372.58,-201.521 7362,-202 7370.57,-208.226 7372.58,-201.521"/>
+</g>
+<!-- t128->t190 -->
+<g id="edge315" class="edge"><title>t128->t190</title>
+<path style="fill:none;stroke:blue;" d="M7219,-177C7110,-167 6909,-150 6737,-140 6320,-117 5817,-105 5650,-101"/>
+<polygon style="fill:blue;stroke:blue;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t162 -->
+<g id="node238" class="node"><title>t162</title>
+<polygon style="fill:none;stroke:blue;" points="1608.24,-281.794 1786,-254.5 1963.76,-281.794 1963.59,-325.956 1608.41,-325.956 1608.24,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="1604.23,-278.363 1786,-250.453 1967.77,-278.363 1967.58,-329.956 1604.42,-329.956 1604.23,-278.363"/>
+<text text-anchor="middle" x="1786" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestTop</text>
+</g>
+<!-- t160->t162 -->
+<g id="edge219" class="edge"><title>t160->t162</title>
+<path style="fill:none;stroke:blue;" d="M3293,-361C3288,-361 3284,-360 3279,-360 2991,-342 2265,-373 1977,-338 1964,-336 1951,-334 1938,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="1938.49,-328.529 1928,-330 1937.12,-335.393 1938.49,-328.529"/>
+</g>
+<!-- t164 -->
+<g id="node244" class="node"><title>t164</title>
+<polygon style="fill:none;stroke:blue;" points="1678,-202 1496,-202 1492,-198 1492,-166 1674,-166 1678,-170 1678,-202"/>
+<polyline style="fill:none;stroke:blue;" points="1674,-198 1492,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="1674,-198 1674,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="1674,-198 1678,-202 "/>
+<text text-anchor="middle" x="1585" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_proportion</text>
+</g>
+<!-- t160->t164 -->
+<g id="edge231" class="edge"><title>t160->t164</title>
+<path style="fill:none;stroke:blue;" d="M3293,-361C3288,-361 3284,-360 3279,-360 3256,-358 1612,-354 1595,-338 1579,-322 1580,-252 1582,-212"/>
+<polygon style="fill:blue;stroke:blue;" points="1585.49,-212.299 1583,-202 1578.52,-211.602 1585.49,-212.299"/>
+</g>
+<!-- t162->t164 -->
+<g id="edge227" class="edge"><title>t162->t164</title>
+<path style="fill:none;stroke:blue;" d="M1724,-260C1692,-243 1655,-222 1627,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="1628.44,-203.797 1618,-202 1625.04,-209.916 1628.44,-203.797"/>
+</g>
+<!-- t163 -->
+<g id="node242" class="node"><title>t163</title>
+<polygon style="fill:none;stroke:blue;" points="1175.97,-281.794 1369,-254.5 1562.03,-281.794 1561.85,-325.956 1176.15,-325.956 1175.97,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="1171.96,-278.32 1369,-250.46 1566.04,-278.32 1565.83,-329.956 1172.17,-329.956 1171.96,-278.32"/>
+<text text-anchor="middle" x="1369" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestBottom</text>
+</g>
+<!-- t161->t163 -->
+<g id="edge223" class="edge"><title>t161->t163</title>
+<path style="fill:none;stroke:blue;" d="M2909,-361C2904,-360 2900,-360 2895,-360 2604,-343 1871,-370 1580,-338 1565,-336 1551,-334 1536,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="1536.49,-328.529 1526,-330 1535.12,-335.393 1536.49,-328.529"/>
+</g>
+<!-- t161->t164 -->
+<g id="edge229" class="edge"><title>t161->t164</title>
+<path style="fill:none;stroke:blue;" d="M2909,-361C2904,-360 2900,-360 2895,-360 2871,-358 1180,-355 1163,-338 1150,-324 1150,-264 1163,-250 1186,-225 1427,-234 1459,-228 1484,-223 1510,-214 1532,-206"/>
+<polygon style="fill:blue;stroke:blue;" points="1534.02,-208.964 1542,-202 1531.42,-202.464 1534.02,-208.964"/>
+</g>
+<!-- t163->t164 -->
+<g id="edge225" class="edge"><title>t163->t164</title>
+<path style="fill:none;stroke:blue;" d="M1436,-260C1470,-243 1511,-222 1541,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="1542.96,-209.916 1550,-202 1539.56,-203.797 1542.96,-209.916"/>
+</g>
+<!-- t164->t190 -->
+<g id="edge309" class="edge"><title>t164->t190</title>
+<path style="fill:none;stroke:blue;" d="M1678,-181C1926,-173 2624,-152 3204,-140 4138,-119 5274,-104 5538,-101"/>
+<polygon style="fill:blue;stroke:blue;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t153 -->
+<g id="node251" class="node"><title>t153</title>
+<polygon style="fill:none;stroke:blue;" points="4113.93,-281.794 4295,-254.5 4476.07,-281.794 4475.9,-325.956 4114.1,-325.956 4113.93,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="4109.92,-278.352 4295,-250.455 4480.08,-278.352 4479.88,-329.956 4110.12,-329.956 4109.92,-278.352"/>
+<text text-anchor="middle" x="4295" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorArchitectureWithMotif</text>
+</g>
+<!-- t147->t153 -->
+<g id="edge235" class="edge"><title>t147->t153</title>
+<path style="fill:none;stroke:blue;" d="M5413,-360C5410,-360 5407,-360 5404,-360 5001,-333 4896,-386 4494,-338 4481,-336 4467,-334 4453,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="4453.49,-328.529 4443,-330 4452.12,-335.393 4453.49,-328.529"/>
+</g>
+<!-- t159 -->
+<g id="node273" class="node"><title>t159</title>
+<polygon style="fill:none;stroke:blue;" points="3365,-202 3217,-202 3213,-198 3213,-166 3361,-166 3365,-170 3365,-202"/>
+<polyline style="fill:none;stroke:blue;" points="3361,-198 3213,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="3361,-198 3361,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="3361,-198 3365,-202 "/>
+<text text-anchor="middle" x="3289" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_motifs</text>
+</g>
+<!-- t147->t159 -->
+<g id="edge275" class="edge"><title>t147->t159</title>
+<path style="fill:none;stroke:blue;" d="M5413,-360C5410,-360 5407,-360 5404,-360 5380,-358 4523,-354 4504,-338 4474,-312 4519,-276 4489,-250 4469,-231 3647,-198 3375,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3375,-183.5 3365,-187 3375,-190.5 3375,-183.5"/>
+</g>
+<!-- t153->t159 -->
+<g id="edge265" class="edge"><title>t153->t159</title>
+<path style="fill:none;stroke:blue;" d="M4188,-267C4160,-260 4129,-254 4101,-250 3839,-210 3524,-193 3375,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3375,-183.5 3365,-187 3375,-190.5 3375,-183.5"/>
+</g>
+<!-- t154 -->
+<g id="node255" class="node"><title>t154</title>
+<polygon style="fill:none;stroke:blue;" points="3697.79,-281.794 3893,-254.5 4088.21,-281.794 4088.02,-325.956 3697.98,-325.956 3697.79,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="3693.79,-278.315 3893,-250.461 4092.21,-278.315 4092,-329.956 3694,-329.956 3693.79,-278.315"/>
+<text text-anchor="middle" x="3893" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorArchitectureWithoutMotif</text>
+</g>
+<!-- t148->t154 -->
+<g id="edge239" class="edge"><title>t148->t154</title>
+<path style="fill:none;stroke:blue;" d="M5045,-360C5042,-360 5039,-360 5036,-360 4622,-334 4514,-386 4101,-338 4087,-336 4073,-334 4058,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="4058.49,-328.529 4048,-330 4057.12,-335.393 4058.49,-328.529"/>
+</g>
+<!-- t148->t159 -->
+<g id="edge273" class="edge"><title>t148->t159</title>
+<path style="fill:none;stroke:blue;" d="M5045,-360C5042,-360 5039,-360 5036,-360 4999,-357 3715,-361 3685,-338 3654,-312 3695,-277 3665,-250 3624,-211 3473,-195 3375,-189"/>
+<polygon style="fill:blue;stroke:blue;" points="3375.3,-185.512 3365,-188 3374.6,-192.478 3375.3,-185.512"/>
+</g>
+<!-- t154->t159 -->
+<g id="edge263" class="edge"><title>t154->t159</title>
+<path style="fill:none;stroke:blue;" d="M3771,-268C3741,-261 3710,-255 3680,-250 3575,-230 3454,-210 3375,-198"/>
+<polygon style="fill:blue;stroke:blue;" points="3375.49,-194.529 3365,-196 3374.12,-201.393 3375.49,-194.529"/>
+</g>
+<!-- t155 -->
+<g id="node259" class="node"><title>t155</title>
+<polygon style="fill:none;stroke:blue;" points="2953.93,-281.794 3110,-254.5 3266.07,-281.794 3265.92,-325.956 2954.08,-325.956 2953.93,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="2949.93,-278.433 3110,-250.44 3270.07,-278.433 3269.9,-329.956 2950.1,-329.956 2949.93,-278.433"/>
+<text text-anchor="middle" x="3110" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorTracksWithMotif</text>
+</g>
+<!-- t149->t155 -->
+<g id="edge243" class="edge"><title>t149->t155</title>
+<path style="fill:none;stroke:blue;" d="M4429,-361C4426,-360 4423,-360 4420,-360 4169,-342 3535,-371 3284,-338 3273,-336 3261,-334 3250,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="3250.49,-328.529 3240,-330 3249.12,-335.393 3250.49,-328.529"/>
+</g>
+<!-- t149->t159 -->
+<g id="edge271" class="edge"><title>t149->t159</title>
+<path style="fill:none;stroke:blue;" d="M4429,-361C4426,-360 4423,-360 4420,-360 4380,-357 2970,-367 2941,-338 2928,-324 2929,-265 2941,-250 2974,-210 3111,-194 3203,-188"/>
+<polygon style="fill:blue;stroke:blue;" points="3203.4,-191.478 3213,-187 3202.7,-184.512 3203.4,-191.478"/>
+</g>
+<!-- t155->t159 -->
+<g id="edge261" class="edge"><title>t155->t159</title>
+<path style="fill:none;stroke:blue;" d="M3166,-260C3194,-243 3226,-223 3251,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="3252.96,-209.916 3260,-202 3249.56,-203.797 3252.96,-209.916"/>
+</g>
+<!-- t156 -->
+<g id="node263" class="node"><title>t156</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3311.85,-281.794 3482,-254.5 3652.15,-281.794 3651.99,-325.956 3312.01,-325.956 3311.85,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="3307.84,-278.386 3482,-250.449 3656.16,-278.386 3655.97,-329.956 3308.03,-329.956 3307.84,-278.386"/>
+<text text-anchor="middle" x="3482" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTracksWithoutMotif</text>
+</g>
+<!-- t150->t156 -->
+<g id="edge247" class="edge"><title>t150->t156</title>
+<path style="fill:none;stroke:gray;" d="M4725,-361C4722,-360 4719,-360 4716,-360 4485,-344 3901,-367 3670,-338 3657,-336 3645,-334 3632,-332"/>
+<polygon style="fill:gray;stroke:gray;" points="3632.49,-328.529 3622,-330 3631.12,-335.393 3632.49,-328.529"/>
+</g>
+<!-- t150->t159 -->
+<g id="edge549" class="edge"><title>t150->t159</title>
+<path style="fill:none;stroke:gray;" d="M4725,-361C4722,-360 4719,-360 4716,-360 4677,-357 3328,-365 3299,-338 3283,-322 3284,-252 3286,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="3289.49,-212.299 3287,-202 3282.52,-211.602 3289.49,-212.299"/>
+</g>
+<!-- t156->t159 -->
+<g id="edge547" class="edge"><title>t156->t159</title>
+<path style="fill:none;stroke:gray;" d="M3422,-260C3392,-243 3356,-223 3330,-207"/>
+<polygon style="fill:gray;stroke:gray;" points="3331.44,-203.797 3321,-202 3328.04,-209.916 3331.44,-203.797"/>
+</g>
+<!-- t157 -->
+<g id="node267" class="node"><title>t157</title>
+<polygon style="fill:none;stroke:blue;" points="2475.92,-281.794 2682,-254.5 2888.08,-281.794 2887.89,-325.956 2476.11,-325.956 2475.92,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="2471.92,-278.29 2682,-250.465 2892.08,-278.29 2891.86,-329.956 2472.14,-329.956 2471.92,-278.29"/>
+<text text-anchor="middle" x="2682" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestWithMotif</text>
+</g>
+<!-- t151->t157 -->
+<g id="edge251" class="edge"><title>t151->t157</title>
+<path style="fill:none;stroke:blue;" d="M4041,-360C4038,-360 4035,-360 4032,-360 3783,-345 3155,-364 2906,-338 2891,-336 2875,-334 2859,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="2859.49,-328.529 2849,-330 2858.12,-335.393 2859.49,-328.529"/>
+</g>
+<!-- t151->t159 -->
+<g id="edge269" class="edge"><title>t151->t159</title>
+<path style="fill:none;stroke:blue;" d="M4041,-360C4038,-360 4035,-360 4032,-360 4002,-358 2948,-359 2926,-338 2899,-310 2901,-280 2926,-250 2961,-208 3107,-193 3203,-188"/>
+<polygon style="fill:blue;stroke:blue;" points="3203.4,-191.478 3213,-187 3202.7,-184.512 3203.4,-191.478"/>
+</g>
+<!-- t157->t159 -->
+<g id="edge259" class="edge"><title>t157->t159</title>
+<path style="fill:none;stroke:blue;" d="M2813,-268C2843,-262 2876,-255 2906,-250 3007,-231 3124,-211 3203,-198"/>
+<polygon style="fill:blue;stroke:blue;" points="3203.88,-201.393 3213,-196 3202.51,-194.529 3203.88,-201.393"/>
+</g>
+<!-- t158 -->
+<g id="node271" class="node"><title>t158</title>
+<polygon style="fill:none;stroke:blue;" points="1989.84,-281.794 2210,-254.5 2430.16,-281.794 2429.95,-325.956 1990.05,-325.956 1989.84,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="1985.83,-278.261 2210,-250.469 2434.17,-278.261 2433.93,-329.956 1986.07,-329.956 1985.83,-278.261"/>
+<text text-anchor="middle" x="2210" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestWithoutMotif</text>
+</g>
+<!-- t152->t158 -->
+<g id="edge255" class="edge"><title>t152->t158</title>
+<path style="fill:none;stroke:blue;" d="M3627,-360C3624,-360 3621,-360 3618,-360 3359,-346 2707,-364 2448,-338 2431,-336 2415,-334 2397,-331"/>
+<polygon style="fill:blue;stroke:blue;" points="2397.3,-327.512 2387,-330 2396.6,-334.478 2397.3,-327.512"/>
+</g>
+<!-- t152->t159 -->
+<g id="edge267" class="edge"><title>t152->t159</title>
+<path style="fill:none;stroke:blue;" d="M3627,-360C3624,-360 3621,-360 3618,-360 3586,-358 2486,-360 2463,-338 2450,-324 2450,-264 2463,-250 2489,-223 2996,-197 3203,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3203,-190.5 3213,-187 3203,-183.5 3203,-190.5"/>
+</g>
+<!-- t158->t159 -->
+<g id="edge257" class="edge"><title>t158->t159</title>
+<path style="fill:none;stroke:blue;" d="M2343,-267C2377,-261 2414,-254 2448,-250 2723,-214 3050,-195 3203,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3203,-190.5 3213,-187 3203,-183.5 3203,-190.5"/>
+</g>
+<!-- t159->t190 -->
+<g id="edge311" class="edge"><title>t159->t190</title>
+<path style="fill:none;stroke:blue;" d="M3365,-180C3531,-172 3939,-152 4281,-140 4768,-121 5356,-106 5538,-101"/>
+<polygon style="fill:blue;stroke:blue;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t142 -->
+<g id="node286" class="node"><title>t142</title>
+<polygon style="fill:none;stroke:blue;" points="5474.24,-281.794 5589,-254.5 5703.76,-281.794 5703.65,-325.956 5474.35,-325.956 5474.24,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="5470.25,-278.632 5589,-250.389 5707.75,-278.632 5707.63,-329.956 5470.37,-329.956 5470.25,-278.632"/>
+<text text-anchor="middle" x="5589" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIGO</text>
+</g>
+<!-- t139->t142 -->
+<g id="edge279" class="edge"><title>t139->t142</title>
+<path style="fill:none;stroke:blue;" d="M6569,-361C6566,-361 6563,-360 6560,-360 6375,-342 5906,-370 5722,-338 5714,-337 5706,-335 5698,-333"/>
+<polygon style="fill:blue;stroke:blue;" points="5698.58,-329.521 5688,-330 5696.57,-336.226 5698.58,-329.521"/>
+</g>
+<!-- t146 -->
+<g id="node300" class="node"><title>t146</title>
+<polygon style="fill:none;stroke:blue;" points="5656,-202 5536,-202 5532,-198 5532,-166 5652,-166 5656,-170 5656,-202"/>
+<polyline style="fill:none;stroke:blue;" points="5652,-198 5532,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="5652,-198 5652,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="5652,-198 5656,-202 "/>
+<text text-anchor="middle" x="5594" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_roi</text>
+</g>
+<!-- t139->t146 -->
+<g id="edge305" class="edge"><title>t139->t146</title>
+<path style="fill:none;stroke:blue;" d="M6569,-361C6566,-361 6563,-360 6560,-360 6530,-357 5483,-359 5461,-338 5448,-324 5451,-266 5461,-250 5475,-227 5499,-212 5522,-202"/>
+<polygon style="fill:blue;stroke:blue;" points="5523.43,-205.226 5532,-199 5521.42,-198.521 5523.43,-205.226"/>
+</g>
+<!-- t142->t146 -->
+<g id="edge299" class="edge"><title>t142->t146</title>
+<path style="fill:none;stroke:blue;" d="M5591,-251C5592,-238 5592,-224 5592,-212"/>
+<polygon style="fill:blue;stroke:blue;" points="5595.49,-212.299 5593,-202 5588.52,-211.602 5595.49,-212.299"/>
+</g>
+<!-- t143 -->
+<g id="node290" class="node"><title>t143</title>
+<polygon style="fill:none;stroke:blue;" points="5138.13,-281.794 5273,-254.5 5407.87,-281.794 5407.75,-325.956 5138.25,-325.956 5138.13,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="5134.13,-278.522 5273,-250.419 5411.87,-278.522 5411.72,-329.956 5134.28,-329.956 5134.13,-278.522"/>
+<text text-anchor="middle" x="5273" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIGOSlim</text>
+</g>
+<!-- t138->t143 -->
+<g id="edge283" class="edge"><title>t138->t143</title>
+<path style="fill:none;stroke:blue;" d="M6007,-361C6002,-361 5998,-360 5993,-360 5742,-338 5675,-378 5426,-338 5417,-337 5408,-334 5398,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="5398.49,-328.529 5388,-330 5397.12,-335.393 5398.49,-328.529"/>
+</g>
+<!-- t138->t146 -->
+<g id="edge307" class="edge"><title>t138->t146</title>
+<path style="fill:none;stroke:blue;" d="M6007,-361C6002,-361 5998,-360 5993,-360 5963,-357 5468,-359 5446,-338 5419,-309 5425,-282 5446,-250 5463,-223 5494,-207 5522,-198"/>
+<polygon style="fill:blue;stroke:blue;" points="5523.43,-201.226 5532,-195 5521.42,-194.521 5523.43,-201.226"/>
+</g>
+<!-- t143->t146 -->
+<g id="edge297" class="edge"><title>t143->t146</title>
+<path style="fill:none;stroke:blue;" d="M5353,-267C5408,-248 5481,-223 5531,-205"/>
+<polygon style="fill:blue;stroke:blue;" points="5532.43,-208.226 5541,-202 5530.42,-201.521 5532.43,-208.226"/>
+</g>
+<!-- t144 -->
+<g id="node294" class="node"><title>t144</title>
+<polygon style="fill:none;stroke:blue;" points="5749.97,-281.794 5900,-254.5 6050.03,-281.794 6049.89,-325.956 5750.11,-325.956 5749.97,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="5745.96,-278.458 5900,-250.434 6054.04,-278.458 6053.88,-329.956 5746.12,-329.956 5745.96,-278.458"/>
+<text text-anchor="middle" x="5900" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIOverlapGO</text>
+</g>
+<!-- t141->t144 -->
+<g id="edge287" class="edge"><title>t141->t144</title>
+<path style="fill:none;stroke:blue;" d="M6789,-361C6786,-360 6783,-360 6780,-360 6465,-335 6382,-383 6068,-338 6057,-337 6047,-334 6037,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="6037.49,-328.529 6027,-330 6036.12,-335.393 6037.49,-328.529"/>
+</g>
+<!-- t141->t146 -->
+<g id="edge301" class="edge"><title>t141->t146</title>
+<path style="fill:none;stroke:blue;" d="M6789,-361C6786,-360 6783,-360 6780,-360 6752,-357 5760,-356 5737,-338 5706,-312 5743,-281 5717,-250 5703,-232 5691,-238 5670,-228 5658,-222 5645,-214 5634,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="5635.44,-203.797 5625,-202 5632.04,-209.916 5635.44,-203.797"/>
+</g>
+<!-- t144->t146 -->
+<g id="edge295" class="edge"><title>t144->t146</title>
+<path style="fill:none;stroke:blue;" d="M5802,-268C5751,-254 5694,-238 5670,-228 5657,-222 5643,-215 5631,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="5632.44,-203.797 5622,-202 5629.04,-209.916 5632.44,-203.797"/>
+</g>
+<!-- t145 -->
+<g id="node298" class="node"><title>t145</title>
+<polygon style="fill:none;stroke:blue;" points="6095.8,-281.794 6266,-254.5 6436.2,-281.794 6436.04,-325.956 6095.96,-325.956 6095.8,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="6091.8,-278.385 6266,-250.449 6440.2,-278.385 6440.02,-329.956 6091.98,-329.956 6091.8,-278.385"/>
+<text text-anchor="middle" x="6266" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIOverlapGOSlim</text>
+</g>
+<!-- t140->t145 -->
+<g id="edge291" class="edge"><title>t140->t145</title>
+<path style="fill:none;stroke:blue;" d="M7073,-361C7070,-360 7067,-360 7064,-360 6794,-340 6723,-373 6454,-338 6442,-336 6430,-334 6418,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="6418.49,-328.529 6408,-330 6417.12,-335.393 6418.49,-328.529"/>
+</g>
+<!-- t140->t146 -->
+<g id="edge303" class="edge"><title>t140->t146</title>
+<path style="fill:none;stroke:blue;" d="M7073,-361C7070,-360 7067,-360 7064,-360 7037,-358 6105,-355 6083,-338 6052,-312 6094,-275 6063,-250 6030,-221 5712,-240 5670,-228 5655,-223 5640,-216 5627,-208"/>
+<polygon style="fill:blue;stroke:blue;" points="5629.1,-205.2 5619,-202 5624.9,-210.8 5629.1,-205.2"/>
+</g>
+<!-- t145->t146 -->
+<g id="edge293" class="edge"><title>t145->t146</title>
+<path style="fill:none;stroke:blue;" d="M6164,-267C6136,-260 6106,-254 6078,-250 5989,-237 5758,-253 5670,-228 5655,-223 5640,-216 5627,-208"/>
+<polygon style="fill:blue;stroke:blue;" points="5629.1,-205.2 5619,-202 5624.9,-210.8 5629.1,-205.2"/>
+</g>
+<!-- t146->t190 -->
+<g id="edge313" class="edge"><title>t146->t190</title>
+<path style="fill:none;stroke:blue;" d="M5594,-166C5594,-155 5594,-141 5594,-128"/>
+<polygon style="fill:blue;stroke:blue;" points="5597.5,-128 5594,-118 5590.5,-128 5597.5,-128"/>
+</g>
+<!-- t190->t191 -->
+<g id="edge317" class="edge"><title>t190->t191</title>
+<path style="fill:none;stroke:blue;" d="M5548,-99C5269,-91 3813,-50 3561,-43"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-39.5001 3551,-43 3561,-46.5001 3561,-39.5001"/>
+</g>
+<!-- k1 -->
+<g id="node447" class="node"><title>k1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="11082,-1175.5 10934,-1175.5 10930,-1171.5 10930,-1124.5 11078,-1124.5 11082,-1128.5 11082,-1175.5"/>
+<polyline style="fill:none;stroke:green;" points="11078,-1171.5 10930,-1171.5 "/>
+<polyline style="fill:none;stroke:green;" points="11078,-1171.5 11078,-1124.5 "/>
+<polyline style="fill:none;stroke:green;" points="11078,-1171.5 11082,-1175.5 "/>
+<text text-anchor="middle" x="11006" y="-1143.5" style="font-family:Times New Roman;font-size:20.00;">Up-to-date task</text>
+</g>
+<!-- k2 -->
+<g id="node448" class="node"><title>k2</title>
+<polygon style="fill:none;stroke:blue;" points="11065,-1101.5 10951,-1101.5 10947,-1097.5 10947,-1050.5 11061,-1050.5 11065,-1054.5 11065,-1101.5"/>
+<polyline style="fill:none;stroke:blue;" points="11061,-1097.5 10947,-1097.5 "/>
+<polyline style="fill:none;stroke:blue;" points="11061,-1097.5 11061,-1050.5 "/>
+<polyline style="fill:none;stroke:blue;" points="11061,-1097.5 11065,-1101.5 "/>
+<text text-anchor="middle" x="11006" y="-1069.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">Task to run</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge580" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:gray;" d="M11006,-1124C11006,-1120 11006,-1116 11006,-1112"/>
+<polygon style="fill:gray;stroke:gray;" points="11009.5,-1112 11006,-1102 11002.5,-1112 11009.5,-1112"/>
+</g>
+<!-- k3 -->
+<g id="node450" class="node"><title>k3</title>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11082,-1028 10934,-1028 10930,-1024 10930,-954 11078,-954 11082,-958 11082,-1028"/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11078,-1024 10930,-1024 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11078,-1024 11078,-954 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11078,-1024 11082,-1028 "/>
+<text text-anchor="middle" x="11006" y="-996" style="font-family:Times New Roman;font-size:20.00;fill:blue;">Up-to-date task</text>
+<text text-anchor="middle" x="11006" y="-973" style="font-family:Times New Roman;font-size:20.00;fill:blue;">forced to rerun</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge582" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M11006,-1050C11006,-1046 11006,-1042 11006,-1038"/>
+<polygon style="fill:blue;stroke:blue;" points="11009.5,-1038 11006,-1028 11002.5,-1038 11009.5,-1038"/>
+</g>
+<!-- k4 -->
+<g id="node452" class="node"><title>k4</title>
+<polygon style="fill:#fff68f;stroke:black;" points="11067,-931.5 10949,-931.5 10945,-927.5 10945,-880.5 11063,-880.5 11067,-884.5 11067,-931.5"/>
+<polyline style="fill:none;stroke:black;" points="11063,-927.5 10945,-927.5 "/>
+<polyline style="fill:none;stroke:black;" points="11063,-927.5 11063,-880.5 "/>
+<polyline style="fill:none;stroke:black;" points="11063,-927.5 11067,-931.5 "/>
+<text text-anchor="middle" x="11006" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">Final target</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge584" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M11006,-954C11006,-950 11006,-946 11006,-942"/>
+<polygon style="fill:blue;stroke:blue;" points="11009.5,-942 11006,-932 11002.5,-942 11009.5,-942"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_downloads/gallery_dless.svg b/doc/_build/html/_downloads/gallery_dless.svg
new file mode 100644
index 0000000..c1fcd49
--- /dev/null
+++ b/doc/_build/html/_downloads/gallery_dless.svg
@@ -0,0 +1,197 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (lg) Leo Goodstadt -->
+<!-- Title: dless2 Pages: 1 -->
+<svg width="834pt" height="480pt"
+ viewBox="0.00 0.00 834.00 480.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 476)">
+<title>dless2</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-476 830,-476 830,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-456 574,-456 574,-16 8,-16"/>
+<text text-anchor="middle" x="291" y="-424.892" style="font-family:Times New Roman;font-size:30.0px;fill:#ff3232;">dless2</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey1</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="582,-132 582,-464 818,-464 818,-132 582,-132"/>
+<text text-anchor="middle" x="700" y="-432.892" style="font-family:Times New Roman;font-size:30.0px;">Key:</text>
+</g>
+<!-- t1 -->
+<g id="node2" class="node"><title>t1</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="551,-406 289,-406 285,-402 285,-370 547,-370 551,-374 551,-406"/>
+<polyline style="fill:none;stroke:#006000;" points="547,-402 285,-402 "/>
+<polyline style="fill:none;stroke:#006000;" points="547,-402 547,-370 "/>
+<polyline style="fill:none;stroke:#006000;" points="547,-402 551,-406 "/>
+<text text-anchor="middle" x="418" y="-381.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Make directories [DLESS, test]</text>
+</g>
+<!-- t0 -->
+<g id="node3" class="node"><title>t0</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="566,-332 274,-332 270,-328 270,-296 562,-296 566,-300 566,-332"/>
+<polyline style="fill:none;stroke:#006000;" points="562,-328 270,-328 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-328 562,-296 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-328 566,-332 "/>
+<text text-anchor="middle" x="418" y="-307.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">copy_maf_into_working_directory</text>
+</g>
+<!-- t1->t0 -->
+<g id="edge3" class="edge"><title>t1->t0</title>
+<path style="fill:none;stroke:gray;" d="M418,-370C418,-361 418,-352 418,-342"/>
+<polygon style="fill:gray;stroke:gray;" points="421.5,-342 418,-332 414.5,-342 421.5,-342"/>
+</g>
+<!-- t4 -->
+<g id="node5" class="node"><title>t4</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="566,-258 360,-258 356,-254 356,-222 562,-222 566,-226 566,-258"/>
+<polyline style="fill:none;stroke:#006000;" points="562,-254 356,-254 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-254 562,-222 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-254 566,-258 "/>
+<text text-anchor="middle" x="461" y="-233.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">project_maf_alignments</text>
+</g>
+<!-- t0->t4 -->
+<g id="edge5" class="edge"><title>t0->t4</title>
+<path style="fill:none;stroke:gray;" d="M429,-296C434,-287 440,-277 445,-267"/>
+<polygon style="fill:gray;stroke:gray;" points="448.203,-268.441 450,-258 442.084,-265.042 448.203,-268.441"/>
+</g>
+<!-- t6 -->
+<g id="node7" class="node"><title>t6</title>
+<polygon style="fill:none;stroke:black;" points="544,-184 382,-184 378,-180 378,-148 540,-148 544,-152 544,-184"/>
+<polyline style="fill:none;stroke:black;" points="540,-180 378,-180 "/>
+<polyline style="fill:none;stroke:black;" points="540,-180 540,-148 "/>
+<polyline style="fill:none;stroke:black;" points="540,-180 544,-184 "/>
+<text text-anchor="middle" x="461" y="-159.392" style="font-family:Times New Roman;font-size:20.0px;">convert_maf2fasta</text>
+</g>
+<!-- t4->t6 -->
+<g id="edge7" class="edge"><title>t4->t6</title>
+<path style="fill:none;stroke:gray;" d="M461,-222C461,-213 461,-204 461,-194"/>
+<polygon style="fill:gray;stroke:gray;" points="464.5,-194 461,-184 457.5,-194 464.5,-194"/>
+</g>
+<!-- t5 -->
+<g id="node12" class="node"><title>t5</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="352,-184 146,-184 142,-180 142,-148 348,-148 352,-152 352,-184"/>
+<polyline style="fill:none;stroke:#0044a0;" points="348,-180 142,-180 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="348,-180 348,-148 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="348,-180 352,-184 "/>
+<text text-anchor="middle" x="247" y="-159.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">generate_neutral_model</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge11" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:gray;" d="M409,-222C379,-211 341,-199 309,-187"/>
+<polygon style="fill:gray;stroke:gray;" points="309.584,-183.521 299,-184 307.573,-190.226 309.584,-183.521"/>
+</g>
+<!-- t7 -->
+<g id="node15" class="node"><title>t7</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="450,-118 254,-118 250,-114 250,-82 446,-82 450,-86 450,-118"/>
+<polyline style="fill:none;stroke:#0044a0;" points="446,-114 250,-114 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="446,-114 446,-82 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="446,-114 450,-118 "/>
+<text text-anchor="middle" x="350" y="-93.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">generate_indel_history</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge15" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:#0044a0;" d="M431,-148C418,-141 403,-131 390,-123"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="391.441,-119.797 381,-118 388.042,-125.916 391.441,-119.797"/>
+</g>
+<!-- t8 -->
+<g id="node18" class="node"><title>t8</title>
+<polygon style="fill:#efa03b;stroke:black;" points="396,-60 308,-60 304,-56 304,-24 392,-24 396,-28 396,-60"/>
+<polyline style="fill:none;stroke:black;" points="392,-56 304,-56 "/>
+<polyline style="fill:none;stroke:black;" points="392,-56 392,-24 "/>
+<polyline style="fill:none;stroke:black;" points="392,-56 396,-60 "/>
+<text text-anchor="middle" x="350" y="-35.392" style="font-family:Times New Roman;font-size:20.0px;">run_dless</text>
+</g>
+<!-- t6->t8 -->
+<g id="edge21" class="edge"><title>t6->t8</title>
+<path style="fill:none;stroke:#0044a0;" d="M466,-148C470,-129 473,-101 459,-82 452,-72 429,-64 406,-56"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="406.584,-52.5212 396,-53 404.573,-59.2259 406.584,-52.5212"/>
+</g>
+<!-- t3 -->
+<g id="node9" class="node"><title>t3</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="252,-332 26,-332 22,-328 22,-296 248,-296 252,-300 252,-332"/>
+<polyline style="fill:none;stroke:#006000;" points="248,-328 22,-328 "/>
+<polyline style="fill:none;stroke:#006000;" points="248,-328 248,-296 "/>
+<polyline style="fill:none;stroke:#006000;" points="248,-328 252,-332 "/>
+<text text-anchor="middle" x="137" y="-307.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Make directories [DLESS]</text>
+</g>
+<!-- t2 -->
+<g id="node10" class="node"><title>t2</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="338,-258 20,-258 16,-254 16,-222 334,-222 338,-226 338,-258"/>
+<polyline style="fill:none;stroke:#0044a0;" points="334,-254 16,-254 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="334,-254 334,-222 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="334,-254 338,-258 "/>
+<text text-anchor="middle" x="177" y="-233.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">copy_repeats_into_working_directory</text>
+</g>
+<!-- t3->t2 -->
+<g id="edge9" class="edge"><title>t3->t2</title>
+<path style="fill:none;stroke:gray;" d="M147,-296C152,-287 157,-277 162,-267"/>
+<polygon style="fill:gray;stroke:gray;" points="165.203,-268.441 167,-258 159.084,-265.042 165.203,-268.441"/>
+</g>
+<!-- t2->t5 -->
+<g id="edge13" class="edge"><title>t2->t5</title>
+<path style="fill:none;stroke:#0044a0;" d="M194,-222C203,-212 213,-202 223,-192"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="226.049,-193.831 230,-184 220.781,-189.221 226.049,-193.831"/>
+</g>
+<!-- t5->t7 -->
+<g id="edge17" class="edge"><title>t5->t7</title>
+<path style="fill:none;stroke:#0044a0;" d="M275,-148C287,-141 301,-131 313,-123"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="314.958,-125.916 322,-118 311.559,-119.797 314.958,-125.916"/>
+</g>
+<!-- t5->t8 -->
+<g id="edge23" class="edge"><title>t5->t8</title>
+<path style="fill:none;stroke:#0044a0;" d="M240,-148C234,-129 228,-101 241,-82 248,-72 271,-63 294,-56"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="295.427,-59.2259 304,-53 293.416,-52.5212 295.427,-59.2259"/>
+</g>
+<!-- t7->t8 -->
+<g id="edge19" class="edge"><title>t7->t8</title>
+<path style="fill:none;stroke:#0044a0;" d="M350,-82C350,-78 350,-74 350,-70"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="353.5,-70 350,-60 346.5,-70 353.5,-70"/>
+</g>
+<!-- k1_1 -->
+<g id="node23" class="node"><title>k1_1</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="776,-413.5 628,-413.5 624,-409.5 624,-362.5 772,-362.5 776,-366.5 776,-413.5"/>
+<polyline style="fill:none;stroke:#006000;" points="772,-409.5 624,-409.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="772,-409.5 772,-362.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="772,-409.5 776,-413.5 "/>
+<text text-anchor="middle" x="700" y="-381.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date task</text>
+</g>
+<!-- k2_1 -->
+<g id="node24" class="node"><title>k2_1</title>
+<polygon style="fill:none;stroke:black;" points="810,-339.5 594,-339.5 590,-335.5 590,-288.5 806,-288.5 810,-292.5 810,-339.5"/>
+<polyline style="fill:none;stroke:black;" points="806,-335.5 590,-335.5 "/>
+<polyline style="fill:none;stroke:black;" points="806,-335.5 806,-288.5 "/>
+<polyline style="fill:none;stroke:black;" points="806,-335.5 810,-339.5 "/>
+<text text-anchor="middle" x="700" y="-307.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge26" class="edge"><title>k1_1->k2_1</title>
+<path style="fill:none;stroke:gray;" d="M700,-362C700,-358 700,-354 700,-350"/>
+<polygon style="fill:gray;stroke:gray;" points="703.5,-350 700,-340 696.5,-350 703.5,-350"/>
+</g>
+<!-- k3_1 -->
+<g id="node26" class="node"><title>k3_1</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="758,-265.5 646,-265.5 642,-261.5 642,-214.5 754,-214.5 758,-218.5 758,-265.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="754,-261.5 642,-261.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="754,-261.5 754,-214.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="754,-261.5 758,-265.5 "/>
+<text text-anchor="middle" x="700" y="-233.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Task to run</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge28" class="edge"><title>k2_1->k3_1</title>
+<path style="fill:none;stroke:#0044a0;" d="M700,-288C700,-284 700,-280 700,-276"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="703.5,-276 700,-266 696.5,-276 703.5,-276"/>
+</g>
+<!-- k4_1 -->
+<g id="node28" class="node"><title>k4_1</title>
+<polygon style="fill:#efa03b;stroke:black;" points="760,-191.5 644,-191.5 640,-187.5 640,-140.5 756,-140.5 760,-144.5 760,-191.5"/>
+<polyline style="fill:none;stroke:black;" points="756,-187.5 640,-187.5 "/>
+<polyline style="fill:none;stroke:black;" points="756,-187.5 756,-140.5 "/>
+<polyline style="fill:none;stroke:black;" points="756,-187.5 760,-191.5 "/>
+<text text-anchor="middle" x="700" y="-159.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge30" class="edge"><title>k3_1->k4_1</title>
+<path style="fill:none;stroke:#0044a0;" d="M700,-214C700,-210 700,-206 700,-202"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="703.5,-202 700,-192 696.5,-202 703.5,-202"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_downloads/gallery_rna_seq.svg b/doc/_build/html/_downloads/gallery_rna_seq.svg
new file mode 100644
index 0000000..43a6e5b
--- /dev/null
+++ b/doc/_build/html/_downloads/gallery_rna_seq.svg
@@ -0,0 +1,672 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: Christoffer Nellaker -->
+<!-- Title: tree Pages: 1 -->
+<svg width="347pt" height="576pt"
+ viewBox="0.00 0.00 346.75 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.358209 0.358209) rotate(0) translate(4 1604)">
+<title>RNASeq Pipeline</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1604 964,-1604 964,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1592 952,-1592 952,-16 8,-16"/>
+<text text-anchor="middle" x="480" y="-1561" style="font-family:Times New Roman;font-size:20.00pt;fill:red;"><tspan font-weight = "bold">RNASeq Pipeline:</tspan></text>
+</g>
+<!-- t1 -->
+<g id="node2" class="node"><title>t1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="381,-1542 239,-1542 235,-1538 235,-1506 377,-1506 381,-1510 381,-1542"/>
+<polyline style="fill:none;stroke:green;" points="377,-1538 235,-1538 "/>
+<polyline style="fill:none;stroke:green;" points="377,-1538 377,-1506 "/>
+<polyline style="fill:none;stroke:green;" points="377,-1538 381,-1542 "/>
+<text text-anchor="middle" x="308" y="-1517.5" style="font-family:Times New Roman;font-size:20.00;">prepare_indexes</text>
+</g>
+<!-- t3 -->
+<g id="node3" class="node"><title>t3</title>
+<polygon style="fill:#90ee90;stroke:green;" points="454,-1484 276,-1484 272,-1480 272,-1448 450,-1448 454,-1452 454,-1484"/>
+<polyline style="fill:none;stroke:green;" points="450,-1480 272,-1480 "/>
+<polyline style="fill:none;stroke:green;" points="450,-1480 450,-1448 "/>
+<polyline style="fill:none;stroke:green;" points="450,-1480 454,-1484 "/>
+<text text-anchor="middle" x="363" y="-1459.5" style="font-family:Times New Roman;font-size:20.00;">symbolic_link_make</text>
+</g>
+<!-- t1->t3 -->
+<g id="edge3" class="edge"><title>t1->t3</title>
+<path style="fill:none;stroke:gray;" d="M325,-1506C329,-1501 334,-1496 339,-1491"/>
+<polygon style="fill:gray;stroke:gray;" points="341.404,-1493.55 346,-1484 336.454,-1488.6 341.404,-1493.55"/>
+</g>
+<!-- t2 -->
+<g id="node46" class="node"><title>t2</title>
+<polygon style="fill:#90ee90;stroke:green;" points="610,-814 420,-814 416,-810 416,-778 606,-778 610,-782 610,-814"/>
+<polyline style="fill:none;stroke:green;" points="606,-810 416,-810 "/>
+<polyline style="fill:none;stroke:green;" points="606,-810 606,-778 "/>
+<polyline style="fill:none;stroke:green;" points="606,-810 610,-814 "/>
+<text text-anchor="middle" x="513" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">chromosome_size_list</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge43" class="edge"><title>t1->t2</title>
+<path style="fill:none;stroke:gray;" d="M282,-1506C275,-1500 268,-1492 263,-1484 242,-1447 266,-1426 243,-1390 212,-1340 175,-1356 139,-1310 123,-1287 122,-1279 116,-1252 112,-1229 114,-1222 116,-1200 118,-1190 120,-1187 121,-1178 128,-1144 131,-1136 131,-1102 131,-1102 131,-1102 131,-920 131,-824 169,-882 344,-836 370,-829 398,-823 423,-816"/>
+<polygon style="fill:gray;stroke:gray;" points="423.881,-819.393 433,-814 422.508,-812.529 423.881,-819.393"/>
+</g>
+<!-- t4 -->
+<g id="node6" class="node"><title>t4</title>
+<polygon style="fill:#90ee90;stroke:green;" points="419,-1426 311,-1426 307,-1422 307,-1390 415,-1390 419,-1394 419,-1426"/>
+<polyline style="fill:none;stroke:green;" points="415,-1422 307,-1422 "/>
+<polyline style="fill:none;stroke:green;" points="415,-1422 415,-1390 "/>
+<polyline style="fill:none;stroke:green;" points="415,-1422 419,-1426 "/>
+<text text-anchor="middle" x="363" y="-1401.5" style="font-family:Times New Roman;font-size:20.00;">TopHat_run</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge5" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:gray;" d="M363,-1448C363,-1444 363,-1440 363,-1436"/>
+<polygon style="fill:gray;stroke:gray;" points="366.5,-1436 363,-1426 359.5,-1436 366.5,-1436"/>
+</g>
+<!-- t0 -->
+<g id="node5" class="node"><title>t0</title>
+<polygon style="fill:#90ee90;stroke:green;" points="226,-1484 42,-1484 38,-1480 38,-1448 222,-1448 226,-1452 226,-1484"/>
+<polyline style="fill:none;stroke:green;" points="222,-1480 38,-1480 "/>
+<polyline style="fill:none;stroke:green;" points="222,-1480 222,-1448 "/>
+<polyline style="fill:none;stroke:green;" points="222,-1480 226,-1484 "/>
+<text text-anchor="middle" x="132" y="-1459.5" style="font-family:Times New Roman;font-size:20.00;">export_to_fastq_files</text>
+</g>
+<!-- t0->t4 -->
+<g id="edge85" class="edge"><title>t0->t4</title>
+<path style="fill:none;stroke:gray;" d="M204,-1448C234,-1441 268,-1432 297,-1425"/>
+<polygon style="fill:gray;stroke:gray;" points="298.427,-1428.23 307,-1422 296.416,-1421.52 298.427,-1428.23"/>
+</g>
+<!-- t43 -->
+<g id="node50" class="node"><title>t43</title>
+<polygon style="fill:#90ee90;stroke:green;" points="234,-1426 20,-1426 16,-1422 16,-1390 230,-1390 234,-1394 234,-1426"/>
+<polyline style="fill:none;stroke:green;" points="230,-1422 16,-1422 "/>
+<polyline style="fill:none;stroke:green;" points="230,-1422 230,-1390 "/>
+<polyline style="fill:none;stroke:green;" points="230,-1422 234,-1426 "/>
+<text text-anchor="middle" x="125" y="-1401.5" style="font-family:Times New Roman;font-size:20.00;">prepare_fastq_for_velvet</text>
+</g>
+<!-- t0->t43 -->
+<g id="edge47" class="edge"><title>t0->t43</title>
+<path style="fill:none;stroke:gray;" d="M130,-1448C129,-1444 129,-1440 128,-1436"/>
+<polygon style="fill:gray;stroke:gray;" points="131.478,-1435.6 127,-1426 124.512,-1436.3 131.478,-1435.6"/>
+</g>
+<!-- t6 -->
+<g id="node8" class="node"><title>t6</title>
+<polygon style="fill:#90ee90;stroke:green;" points="458,-1368 272,-1368 268,-1364 268,-1332 454,-1332 458,-1336 458,-1368"/>
+<polyline style="fill:none;stroke:green;" points="454,-1364 268,-1364 "/>
+<polyline style="fill:none;stroke:green;" points="454,-1364 454,-1332 "/>
+<polyline style="fill:none;stroke:green;" points="454,-1364 458,-1368 "/>
+<text text-anchor="middle" x="363" y="-1343.5" style="font-family:Times New Roman;font-size:20.00;">tophatSAM_to_BAM</text>
+</g>
+<!-- t4->t6 -->
+<g id="edge7" class="edge"><title>t4->t6</title>
+<path style="fill:none;stroke:gray;" d="M363,-1390C363,-1386 363,-1382 363,-1378"/>
+<polygon style="fill:gray;stroke:gray;" points="366.5,-1378 363,-1368 359.5,-1378 366.5,-1378"/>
+</g>
+<!-- t46 -->
+<g id="node82" class="node"><title>t46</title>
+<polygon style="fill:none;stroke:blue;" points="451,-118 313,-118 309,-114 309,-82 447,-82 451,-86 451,-118"/>
+<polyline style="fill:none;stroke:blue;" points="447,-114 309,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="447,-114 447,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="447,-114 451,-118 "/>
+<text text-anchor="middle" x="380" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">compile_results</text>
+</g>
+<!-- t4->t46 -->
+<g id="edge127" class="edge"><title>t4->t46</title>
+<path style="fill:none;stroke:gray;" d="M419,-1403C565,-1388 943,-1344 943,-1292 943,-1292 943,-1292 943,-216 943,-185 935,-172 848,-140 779,-114 576,-105 461,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="461,-97.5001 451,-101 461,-104.5 461,-97.5001"/>
+</g>
+<!-- t7 -->
+<g id="node10" class="node"><title>t7</title>
+<polygon style="fill:#90ee90;stroke:green;" points="523,-1310 227,-1310 223,-1306 223,-1274 519,-1274 523,-1278 523,-1310"/>
+<polyline style="fill:none;stroke:green;" points="519,-1306 223,-1306 "/>
+<polyline style="fill:none;stroke:green;" points="519,-1306 519,-1274 "/>
+<polyline style="fill:none;stroke:green;" points="519,-1306 523,-1310 "/>
+<text text-anchor="middle" x="373" y="-1285.5" style="font-family:Times New Roman;font-size:20.00;">get_req_qnames_from_Tophat_run</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge9" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:gray;" d="M366,-1332C367,-1328 367,-1324 368,-1320"/>
+<polygon style="fill:gray;stroke:gray;" points="371.471,-1320.49 370,-1310 364.607,-1319.12 371.471,-1320.49"/>
+</g>
+<!-- t22 -->
+<g id="node60" class="node"><title>t22</title>
+<polygon style="fill:#90ee90;stroke:green;" points="786,-814 632,-814 628,-810 628,-778 782,-778 786,-782 786,-814"/>
+<polyline style="fill:none;stroke:green;" points="782,-810 628,-810 "/>
+<polyline style="fill:none;stroke:green;" points="782,-810 782,-778 "/>
+<polyline style="fill:none;stroke:green;" points="782,-810 786,-814 "/>
+<text text-anchor="middle" x="707" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">uniques_repairing</text>
+</g>
+<!-- t6->t22 -->
+<g id="edge97" class="edge"><title>t6->t22</title>
+<path style="fill:none;stroke:gray;" d="M279,-1332C258,-1326 235,-1319 214,-1310 171,-1290 148,-1293 125,-1252 114,-1231 120,-1222 125,-1200 128,-1189 133,-1188 136,-1178 148,-1145 151,-1136 151,-1102 151,-1102 151,-1102 151,-920 151,-914 460,-848 618,-815"/>
+<polygon style="fill:gray;stroke:gray;" points="618.881,-818.393 628,-813 617.508,-811.529 618.881,-818.393"/>
+</g>
+<!-- t8 -->
+<g id="node12" class="node"><title>t8</title>
+<polygon style="fill:#90ee90;stroke:green;" points="607.975,-1232.77 373,-1247.92 138.025,-1232.77 138.244,-1208.27 607.756,-1208.27 607.975,-1232.77"/>
+<polygon style="fill:none;stroke:green;" points="612,-1236.52 373,-1251.93 134,-1236.52 134.288,-1204.27 611.712,-1204.27 612,-1236.52"/>
+<text text-anchor="middle" x="373" y="-1219.5" style="font-family:Times New Roman;font-size:20.00;">prepare_raw_files_of_unmapped_reads</text>
+</g>
+<!-- t7->t8 -->
+<g id="edge11" class="edge"><title>t7->t8</title>
+<path style="fill:none;stroke:gray;" d="M373,-1274C373,-1270 373,-1266 373,-1262"/>
+<polygon style="fill:gray;stroke:gray;" points="376.5,-1262 373,-1252 369.5,-1262 376.5,-1262"/>
+</g>
+<!-- t17 -->
+<g id="node23" class="node"><title>t17</title>
+<polygon style="fill:#90ee90;stroke:green;" points="797,-1004 503,-1004 499,-1000 499,-968 793,-968 797,-972 797,-1004"/>
+<polyline style="fill:none;stroke:green;" points="793,-1000 499,-1000 "/>
+<polyline style="fill:none;stroke:green;" points="793,-1000 793,-968 "/>
+<polyline style="fill:none;stroke:green;" points="793,-1000 797,-1004 "/>
+<text text-anchor="middle" x="648" y="-979.5" style="font-family:Times New Roman;font-size:20.00;">get_req_qnames_post_xenoMRNA</text>
+</g>
+<!-- t7->t17 -->
+<g id="edge91" class="edge"><title>t7->t17</title>
+<path style="fill:none;stroke:gray;" d="M523,-1275C571,-1268 613,-1260 621,-1252 650,-1222 631,-1201 631,-1160 631,-1160 631,-1160 631,-1102 631,-1072 637,-1038 642,-1014"/>
+<polygon style="fill:gray;stroke:gray;" points="645.471,-1014.49 644,-1004 638.607,-1013.12 645.471,-1014.49"/>
+</g>
+<!-- t14 -->
+<g id="node17" class="node"><title>t14</title>
+<polygon style="fill:#90ee90;stroke:green;" points="612,-1178 334,-1178 330,-1174 330,-1142 608,-1142 612,-1146 612,-1178"/>
+<polyline style="fill:none;stroke:green;" points="608,-1174 330,-1174 "/>
+<polyline style="fill:none;stroke:green;" points="608,-1174 608,-1142 "/>
+<polyline style="fill:none;stroke:green;" points="608,-1174 612,-1178 "/>
+<text text-anchor="middle" x="471" y="-1153.5" style="font-family:Times New Roman;font-size:20.00;">run_Bowtie_against_xenomRNA</text>
+</g>
+<!-- t8->t14 -->
+<g id="edge87" class="edge"><title>t8->t14</title>
+<path style="fill:none;stroke:gray;" d="M405,-1204C415,-1198 426,-1190 436,-1184"/>
+<polygon style="fill:gray;stroke:gray;" points="438.1,-1186.8 444,-1178 433.9,-1181.2 438.1,-1186.8"/>
+</g>
+<!-- t12 -->
+<g id="node14" class="node"><title>t12</title>
+<polygon style="fill:#90ee90;stroke:green;" points="906,-1310 672,-1310 668,-1306 668,-1274 902,-1274 906,-1278 906,-1310"/>
+<polyline style="fill:none;stroke:green;" points="902,-1306 668,-1306 "/>
+<polyline style="fill:none;stroke:green;" points="902,-1306 902,-1274 "/>
+<polyline style="fill:none;stroke:green;" points="902,-1306 906,-1310 "/>
+<text text-anchor="middle" x="787" y="-1285.5" style="font-family:Times New Roman;font-size:20.00;">prepare_xenoMrna_indexes</text>
+</g>
+<!-- t13 -->
+<g id="node15" class="node"><title>t13</title>
+<polygon style="fill:#90ee90;stroke:green;" points="924,-1244 654,-1244 650,-1240 650,-1208 920,-1208 924,-1212 924,-1244"/>
+<polyline style="fill:none;stroke:green;" points="920,-1240 650,-1240 "/>
+<polyline style="fill:none;stroke:green;" points="920,-1240 920,-1208 "/>
+<polyline style="fill:none;stroke:green;" points="920,-1240 924,-1244 "/>
+<text text-anchor="middle" x="787" y="-1219.5" style="font-family:Times New Roman;font-size:20.00;">xenoMrna_symbolic_link_make</text>
+</g>
+<!-- t12->t13 -->
+<g id="edge13" class="edge"><title>t12->t13</title>
+<path style="fill:none;stroke:gray;" d="M787,-1274C787,-1268 787,-1261 787,-1254"/>
+<polygon style="fill:gray;stroke:gray;" points="790.5,-1254 787,-1244 783.5,-1254 790.5,-1254"/>
+</g>
+<!-- t13->t14 -->
+<g id="edge15" class="edge"><title>t13->t14</title>
+<path style="fill:none;stroke:gray;" d="M701,-1208C659,-1200 611,-1189 568,-1180"/>
+<polygon style="fill:gray;stroke:gray;" points="568.492,-1176.53 558,-1178 567.119,-1183.39 568.492,-1176.53"/>
+</g>
+<!-- t13->t17 -->
+<g id="edge89" class="edge"><title>t13->t17</title>
+<path style="fill:none;stroke:gray;" d="M777,-1208C753,-1166 692,-1063 664,-1013"/>
+<polygon style="fill:gray;stroke:gray;" points="666.916,-1011.04 659,-1004 660.797,-1014.44 666.916,-1011.04"/>
+</g>
+<!-- t15 -->
+<g id="node19" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="600,-1120 372,-1120 368,-1116 368,-1084 596,-1084 600,-1088 600,-1120"/>
+<polyline style="fill:none;stroke:green;" points="596,-1116 368,-1116 "/>
+<polyline style="fill:none;stroke:green;" points="596,-1116 596,-1084 "/>
+<polyline style="fill:none;stroke:green;" points="596,-1116 600,-1120 "/>
+<text text-anchor="middle" x="484" y="-1095.5" style="font-family:Times New Roman;font-size:20.00;">xenoMrna_SAM_to_BAM</text>
+</g>
+<!-- t14->t15 -->
+<g id="edge17" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:gray;" d="M475,-1142C476,-1138 477,-1134 478,-1130"/>
+<polygon style="fill:gray;stroke:gray;" points="481.471,-1130.49 480,-1120 474.607,-1129.12 481.471,-1130.49"/>
+</g>
+<!-- t16 -->
+<g id="node21" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="603,-1062 413,-1062 409,-1058 409,-1026 599,-1026 603,-1030 603,-1062"/>
+<polyline style="fill:none;stroke:green;" points="599,-1058 409,-1058 "/>
+<polyline style="fill:none;stroke:green;" points="599,-1058 599,-1026 "/>
+<polyline style="fill:none;stroke:green;" points="599,-1058 603,-1062 "/>
+<text text-anchor="middle" x="506" y="-1037.5" style="font-family:Times New Roman;font-size:20.00;">FilterxenoMrna_SAM</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge19" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:gray;" d="M491,-1084C492,-1080 494,-1076 495,-1072"/>
+<polygon style="fill:gray;stroke:gray;" points="498.536,-1072.58 499,-1062 492.036,-1069.98 498.536,-1072.58"/>
+</g>
+<!-- t16->t17 -->
+<g id="edge21" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:gray;" d="M550,-1026C564,-1020 580,-1014 595,-1008"/>
+<polygon style="fill:gray;stroke:gray;" points="596.283,-1011.26 604,-1004 593.44,-1004.86 596.283,-1011.26"/>
+</g>
+<!-- t18 -->
+<g id="node25" class="node"><title>t18</title>
+<polygon style="fill:#90ee90;stroke:green;" points="902.126,-926.774 682,-941.92 461.874,-926.774 462.079,-902.266 901.921,-902.266 902.126,-926.774"/>
+<polygon style="fill:none;stroke:green;" points="906.148,-930.506 682,-945.93 457.852,-930.506 458.122,-898.266 905.878,-898.266 906.148,-930.506"/>
+<text text-anchor="middle" x="682" y="-913.5" style="font-family:Times New Roman;font-size:20.00;">prepare_raw_files_post_xenoMRNA</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge23" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:gray;" d="M657,-968C659,-964 662,-959 664,-955"/>
+<polygon style="fill:gray;stroke:gray;" points="667.203,-956.441 669,-946 661.084,-953.042 667.203,-956.441"/>
+</g>
+<!-- t21 -->
+<g id="node27" class="node"><title>t21</title>
+<polygon style="fill:#90ee90;stroke:green;" points="254,-814 40,-814 36,-810 36,-778 250,-778 254,-782 254,-814"/>
+<polyline style="fill:none;stroke:green;" points="250,-810 36,-810 "/>
+<polyline style="fill:none;stroke:green;" points="250,-810 250,-778 "/>
+<polyline style="fill:none;stroke:green;" points="250,-810 254,-814 "/>
+<text text-anchor="middle" x="145" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run_multi_onehit</text>
+</g>
+<!-- t18->t21 -->
+<g id="edge25" class="edge"><title>t18->t21</title>
+<path style="fill:none;stroke:gray;" d="M588,-898C487,-875 331,-839 233,-816"/>
+<polygon style="fill:gray;stroke:gray;" points="233.492,-812.529 223,-814 232.119,-819.393 233.492,-812.529"/>
+</g>
+<!-- t20 -->
+<g id="node56" class="node"><title>t20</title>
+<polygon style="fill:#90ee90;stroke:green;" points="884,-748 730,-748 726,-744 726,-712 880,-712 884,-716 884,-748"/>
+<polyline style="fill:none;stroke:green;" points="880,-744 726,-744 "/>
+<polyline style="fill:none;stroke:green;" points="880,-744 880,-712 "/>
+<polyline style="fill:none;stroke:green;" points="880,-744 884,-748 "/>
+<text text-anchor="middle" x="805" y="-723.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run_multi</text>
+</g>
+<!-- t18->t20 -->
+<g id="edge53" class="edge"><title>t18->t20</title>
+<path style="fill:none;stroke:gray;" d="M692,-898C701,-879 717,-852 737,-836 758,-818 778,-835 795,-814 807,-798 809,-776 809,-758"/>
+<polygon style="fill:gray;stroke:gray;" points="812.478,-757.602 808,-748 805.512,-758.299 812.478,-757.602"/>
+</g>
+<!-- t19 -->
+<g id="node58" class="node"><title>t19</title>
+<polygon style="fill:#90ee90;stroke:green;" points="924,-872 750,-872 746,-868 746,-836 920,-836 924,-840 924,-872"/>
+<polyline style="fill:none;stroke:green;" points="920,-868 746,-868 "/>
+<polyline style="fill:none;stroke:green;" points="920,-868 920,-836 "/>
+<polyline style="fill:none;stroke:green;" points="920,-868 924,-872 "/>
+<text text-anchor="middle" x="835" y="-847.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run_uniques</text>
+</g>
+<!-- t18->t19 -->
+<g id="edge55" class="edge"><title>t18->t19</title>
+<path style="fill:none;stroke:gray;" d="M733,-898C749,-891 767,-883 783,-876"/>
+<polygon style="fill:gray;stroke:gray;" points="785.015,-878.964 793,-872 782.415,-872.464 785.015,-878.964"/>
+</g>
+<!-- t25 -->
+<g id="node30" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="454.138,-736.774 333,-751.92 211.862,-736.774 211.975,-712.266 454.025,-712.266 454.138,-736.774"/>
+<polygon style="fill:none;stroke:green;" points="458.138,-740.304 333,-755.951 207.862,-740.304 208.01,-708.266 457.99,-708.266 458.138,-740.304"/>
+<text text-anchor="middle" x="333" y="-723.5" style="font-family:Times New Roman;font-size:20.00;">FiltoutDualRepeats</text>
+</g>
+<!-- t21->t25 -->
+<g id="edge93" class="edge"><title>t21->t25</title>
+<path style="fill:none;stroke:gray;" d="M196,-778C218,-770 245,-761 268,-752"/>
+<polygon style="fill:gray;stroke:gray;" points="269.427,-755.226 278,-749 267.416,-748.521 269.427,-755.226"/>
+</g>
+<!-- t21->t46 -->
+<g id="edge121" class="edge"><title>t21->t46</title>
+<path style="fill:none;stroke:gray;" d="M91,-778C80,-772 70,-765 62,-756 34,-720 37,-701 37,-656 37,-656 37,-656 37,-216 37,-192 43,-176 125,-140 155,-126 236,-115 299,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="299.398,-111.478 309,-107 298.701,-104.512 299.398,-111.478"/>
+</g>
+<!-- t24 -->
+<g id="node29" class="node"><title>t24</title>
+<polygon style="fill:#90ee90;stroke:green;" points="398,-814 276,-814 272,-810 272,-778 394,-778 398,-782 398,-814"/>
+<polyline style="fill:none;stroke:green;" points="394,-810 272,-810 "/>
+<polyline style="fill:none;stroke:green;" points="394,-810 394,-778 "/>
+<polyline style="fill:none;stroke:green;" points="394,-810 398,-814 "/>
+<text text-anchor="middle" x="335" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">make_replists</text>
+</g>
+<!-- t24->t25 -->
+<g id="edge27" class="edge"><title>t24->t25</title>
+<path style="fill:none;stroke:gray;" d="M334,-778C334,-774 334,-770 334,-766"/>
+<polygon style="fill:gray;stroke:gray;" points="337.5,-766 334,-756 330.5,-766 337.5,-766"/>
+</g>
+<!-- t26 -->
+<g id="node32" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="308.068,-662.774 194,-677.92 79.932,-662.774 80.0385,-638.266 307.961,-638.266 308.068,-662.774"/>
+<polygon style="fill:none;stroke:green;" points="312.066,-666.278 194,-681.955 75.9337,-666.278 76.0727,-634.266 311.927,-634.266 312.066,-666.278"/>
+<text text-anchor="middle" x="194" y="-649.5" style="font-family:Times New Roman;font-size:20.00;">prepare_raw_files</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge29" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M292,-708C277,-700 259,-691 243,-682"/>
+<polygon style="fill:gray;stroke:gray;" points="244.441,-678.797 234,-677 241.042,-684.916 244.441,-678.797"/>
+</g>
+<!-- t34 -->
+<g id="node64" class="node"><title>t34</title>
+<polygon style="fill:#90ee90;stroke:green;" points="665,-674 565,-674 561,-670 561,-638 661,-638 665,-642 665,-674"/>
+<polyline style="fill:none;stroke:green;" points="661,-670 561,-670 "/>
+<polyline style="fill:none;stroke:green;" points="661,-670 661,-638 "/>
+<polyline style="fill:none;stroke:green;" points="661,-670 665,-674 "/>
+<text text-anchor="middle" x="613" y="-649.5" style="font-family:Times New Roman;font-size:20.00;">fill_islands</text>
+</g>
+<!-- t25->t34 -->
+<g id="edge61" class="edge"><title>t25->t34</title>
+<path style="fill:none;stroke:gray;" d="M415,-708C458,-696 511,-682 551,-672"/>
+<polygon style="fill:gray;stroke:gray;" points="551.881,-675.393 561,-670 550.508,-668.529 551.881,-675.393"/>
+</g>
+<!-- t37 -->
+<g id="node70" class="node"><title>t37</title>
+<polygon style="fill:#90ee90;stroke:green;" points="794,-484 582,-484 578,-480 578,-448 790,-448 794,-452 794,-484"/>
+<polyline style="fill:none;stroke:green;" points="790,-480 578,-480 "/>
+<polyline style="fill:none;stroke:green;" points="790,-480 790,-448 "/>
+<polyline style="fill:none;stroke:green;" points="790,-480 794,-484 "/>
+<text text-anchor="middle" x="686" y="-459.5" style="font-family:Times New Roman;font-size:20.00;">Reallocate_Repeat_Ends</text>
+</g>
+<!-- t25->t37 -->
+<g id="edge107" class="edge"><title>t25->t37</title>
+<path style="fill:none;stroke:gray;" d="M353,-708C393,-666 486,-573 577,-514 592,-504 610,-495 626,-488"/>
+<polygon style="fill:gray;stroke:gray;" points="628.015,-490.964 636,-484 625.415,-484.464 628.015,-490.964"/>
+</g>
+<!-- t25->t46 -->
+<g id="edge119" class="edge"><title>t25->t46</title>
+<path style="fill:none;stroke:gray;" d="M331,-708C326,-643 311,-446 311,-282 311,-282 311,-282 311,-216 311,-181 334,-147 354,-125"/>
+<polygon style="fill:gray;stroke:gray;" points="356.404,-127.546 361,-118 351.454,-122.596 356.404,-127.546"/>
+</g>
+<!-- t27 -->
+<g id="node34" class="node"><title>t27</title>
+<polygon style="fill:#90ee90;stroke:green;" points="247,-608 135,-608 131,-604 131,-572 243,-572 247,-576 247,-608"/>
+<polyline style="fill:none;stroke:green;" points="243,-604 131,-604 "/>
+<polyline style="fill:none;stroke:green;" points="243,-604 243,-572 "/>
+<polyline style="fill:none;stroke:green;" points="243,-604 247,-608 "/>
+<text text-anchor="middle" x="189" y="-583.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run2</text>
+</g>
+<!-- t26->t27 -->
+<g id="edge31" class="edge"><title>t26->t27</title>
+<path style="fill:none;stroke:gray;" d="M192,-634C192,-629 192,-624 191,-618"/>
+<polygon style="fill:gray;stroke:gray;" points="194.478,-617.602 190,-608 187.512,-618.299 194.478,-617.602"/>
+</g>
+<!-- t28 -->
+<g id="node36" class="node"><title>t28</title>
+<polygon style="fill:#90ee90;stroke:green;" points="244,-550 134,-550 130,-546 130,-514 240,-514 244,-518 244,-550"/>
+<polyline style="fill:none;stroke:green;" points="240,-546 130,-546 "/>
+<polyline style="fill:none;stroke:green;" points="240,-546 240,-514 "/>
+<polyline style="fill:none;stroke:green;" points="240,-546 244,-550 "/>
+<text text-anchor="middle" x="187" y="-525.5" style="font-family:Times New Roman;font-size:20.00;">SAM2BAM</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge33" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M188,-572C188,-568 188,-564 188,-560"/>
+<polygon style="fill:gray;stroke:gray;" points="191.5,-560 188,-550 184.5,-560 191.5,-560"/>
+</g>
+<!-- t29 -->
+<g id="node38" class="node"><title>t29</title>
+<polygon style="fill:#90ee90;stroke:green;" points="259.953,-472.774 185,-487.92 110.047,-472.774 110.117,-448.266 259.883,-448.266 259.953,-472.774"/>
+<polygon style="fill:none;stroke:green;" points="263.963,-476.044 185,-492.001 106.037,-476.044 106.128,-444.266 263.872,-444.266 263.963,-476.044"/>
+<text text-anchor="middle" x="185" y="-459.5" style="font-family:Times New Roman;font-size:20.00;">splitby_chr</text>
+</g>
+<!-- t28->t29 -->
+<g id="edge35" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M186,-514C186,-510 186,-506 186,-502"/>
+<polygon style="fill:gray;stroke:gray;" points="189.5,-502 186,-492 182.5,-502 189.5,-502"/>
+</g>
+<!-- t30 -->
+<g id="node40" class="node"><title>t30</title>
+<polygon style="fill:#90ee90;stroke:green;" points="292,-392 80,-392 76,-388 76,-356 288,-356 292,-360 292,-392"/>
+<polyline style="fill:none;stroke:green;" points="288,-388 76,-388 "/>
+<polyline style="fill:none;stroke:green;" points="288,-388 288,-356 "/>
+<polyline style="fill:none;stroke:green;" points="288,-388 292,-392 "/>
+<text text-anchor="middle" x="184" y="-367.5" style="font-family:Times New Roman;font-size:20.00;">create_by_chr_raw_files</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge37" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M185,-444C184,-431 184,-416 184,-402"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-402 184,-392 180.5,-402 187.5,-402"/>
+</g>
+<!-- t31 -->
+<g id="node42" class="node"><title>t31</title>
+<polygon style="fill:#90ee90;stroke:green;" points="270,-300 102,-300 98,-296 98,-264 266,-264 270,-268 270,-300"/>
+<polyline style="fill:none;stroke:green;" points="266,-296 98,-296 "/>
+<polyline style="fill:none;stroke:green;" points="266,-296 266,-264 "/>
+<polyline style="fill:none;stroke:green;" points="266,-296 270,-300 "/>
+<text text-anchor="middle" x="184" y="-275.5" style="font-family:Times New Roman;font-size:20.00;">run_by_chr_bowtie</text>
+</g>
+<!-- t30->t31 -->
+<g id="edge39" class="edge"><title>t30->t31</title>
+<path style="fill:none;stroke:gray;" d="M184,-356C184,-343 184,-325 184,-310"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-310 184,-300 180.5,-310 187.5,-310"/>
+</g>
+<!-- t32 -->
+<g id="node44" class="node"><title>t32</title>
+<polygon style="fill:#90ee90;stroke:green;" points="287,-234 85,-234 81,-230 81,-198 283,-198 287,-202 287,-234"/>
+<polyline style="fill:none;stroke:green;" points="283,-230 81,-230 "/>
+<polyline style="fill:none;stroke:green;" points="283,-230 283,-198 "/>
+<polyline style="fill:none;stroke:green;" points="283,-230 287,-234 "/>
+<text text-anchor="middle" x="184" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">by_chr_SAM_to_BAM</text>
+</g>
+<!-- t31->t32 -->
+<g id="edge41" class="edge"><title>t31->t32</title>
+<path style="fill:none;stroke:gray;" d="M184,-264C184,-258 184,-251 184,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-244 184,-234 180.5,-244 187.5,-244"/>
+</g>
+<!-- t33 -->
+<g id="node48" class="node"><title>t33</title>
+<polygon style="fill:#90ee90;stroke:green;" points="234,-176 138,-176 134,-172 134,-140 230,-140 234,-144 234,-176"/>
+<polyline style="fill:none;stroke:green;" points="230,-172 134,-172 "/>
+<polyline style="fill:none;stroke:green;" points="230,-172 230,-140 "/>
+<polyline style="fill:none;stroke:green;" points="230,-172 234,-176 "/>
+<text text-anchor="middle" x="184" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">ccheckhits</text>
+</g>
+<!-- t32->t33 -->
+<g id="edge45" class="edge"><title>t32->t33</title>
+<path style="fill:none;stroke:gray;" d="M184,-198C184,-194 184,-190 184,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-186 184,-176 180.5,-186 187.5,-186"/>
+</g>
+<!-- t2->t33 -->
+<g id="edge95" class="edge"><title>t2->t33</title>
+<path style="fill:none;stroke:gray;" d="M416,-779C413,-779 410,-778 407,-778 244,-755 57,-820 57,-656 57,-656 57,-656 57,-282 57,-244 48,-226 72,-198 80,-189 102,-181 124,-173"/>
+<polygon style="fill:gray;stroke:gray;" points="125.427,-176.226 134,-170 123.416,-169.521 125.427,-176.226"/>
+</g>
+<!-- t23 -->
+<g id="node62" class="node"><title>t23</title>
+<polygon style="fill:#90ee90;stroke:green;" points="697,-748 533,-748 529,-744 529,-712 693,-712 697,-716 697,-748"/>
+<polyline style="fill:none;stroke:green;" points="693,-744 529,-744 "/>
+<polyline style="fill:none;stroke:green;" points="693,-744 693,-712 "/>
+<polyline style="fill:none;stroke:green;" points="693,-744 697,-748 "/>
+<text text-anchor="middle" x="613" y="-723.5" style="font-family:Times New Roman;font-size:20.00;">create_island_seed</text>
+</g>
+<!-- t2->t23 -->
+<g id="edge99" class="edge"><title>t2->t23</title>
+<path style="fill:none;stroke:gray;" d="M540,-778C551,-771 564,-762 576,-754"/>
+<polygon style="fill:gray;stroke:gray;" points="578.621,-756.459 585,-748 574.738,-750.635 578.621,-756.459"/>
+</g>
+<!-- t38 -->
+<g id="node76" class="node"><title>t38</title>
+<polygon style="fill:#90ee90;stroke:green;" points="333.879,-361.794 485,-334.5 636.121,-361.794 635.98,-405.956 334.02,-405.956 333.879,-361.794"/>
+<polygon style="fill:none;stroke:green;" points="329.868,-358.453 485,-330.435 640.132,-358.453 639.967,-409.956 330.033,-409.956 329.868,-358.453"/>
+<text text-anchor="middle" x="485" y="-367.5" style="font-family:Times New Roman;font-size:20.00;">make_karyotype_file_for_circos</text>
+</g>
+<!-- t2->t38 -->
+<g id="edge113" class="edge"><title>t2->t38</title>
+<path style="fill:none;stroke:gray;" d="M512,-778C507,-716 494,-513 488,-420"/>
+<polygon style="fill:gray;stroke:gray;" points="491.478,-419.602 487,-410 484.512,-420.299 491.478,-419.602"/>
+</g>
+<!-- t33->t46 -->
+<g id="edge117" class="edge"><title>t33->t46</title>
+<path style="fill:none;stroke:gray;" d="M234,-143C257,-136 285,-128 309,-121"/>
+<polygon style="fill:gray;stroke:gray;" points="310.427,-124.226 319,-118 308.416,-117.521 310.427,-124.226"/>
+</g>
+<!-- t44 -->
+<g id="node52" class="node"><title>t44</title>
+<polygon style="fill:#90ee90;stroke:green;" points="147,-1368 49,-1368 45,-1364 45,-1332 143,-1332 147,-1336 147,-1368"/>
+<polyline style="fill:none;stroke:green;" points="143,-1364 45,-1364 "/>
+<polyline style="fill:none;stroke:green;" points="143,-1364 143,-1332 "/>
+<polyline style="fill:none;stroke:green;" points="143,-1364 147,-1368 "/>
+<text text-anchor="middle" x="96" y="-1343.5" style="font-family:Times New Roman;font-size:20.00;">velvet_run</text>
+</g>
+<!-- t43->t44 -->
+<g id="edge49" class="edge"><title>t43->t44</title>
+<path style="fill:none;stroke:gray;" d="M116,-1390C114,-1386 111,-1382 109,-1377"/>
+<polygon style="fill:gray;stroke:gray;" points="112.26,-1375.72 105,-1368 105.863,-1378.56 112.26,-1375.72"/>
+</g>
+<!-- t45 -->
+<g id="node54" class="node"><title>t45</title>
+<polygon style="fill:#90ee90;stroke:green;" points="112,-1178 20,-1178 16,-1174 16,-1142 108,-1142 112,-1146 112,-1178"/>
+<polyline style="fill:none;stroke:green;" points="108,-1174 16,-1174 "/>
+<polyline style="fill:none;stroke:green;" points="108,-1174 108,-1142 "/>
+<polyline style="fill:none;stroke:green;" points="108,-1174 112,-1178 "/>
+<text text-anchor="middle" x="64" y="-1153.5" style="font-family:Times New Roman;font-size:20.00;">oases_run</text>
+</g>
+<!-- t44->t45 -->
+<g id="edge51" class="edge"><title>t44->t45</title>
+<path style="fill:none;stroke:gray;" d="M93,-1332C88,-1298 75,-1227 69,-1188"/>
+<polygon style="fill:gray;stroke:gray;" points="72.3933,-1187.12 67,-1178 65.5292,-1188.49 72.3933,-1187.12"/>
+</g>
+<!-- t45->t46 -->
+<g id="edge79" class="edge"><title>t45->t46</title>
+<path style="fill:none;stroke:gray;" d="M52,-1142C38,-1120 17,-1081 17,-1044 17,-1044 17,-1044 17,-216 17,-178 24,-161 54,-140 74,-126 208,-113 299,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="299.398,-109.478 309,-105 298.701,-102.512 299.398,-109.478"/>
+</g>
+<!-- t20->t34 -->
+<g id="edge103" class="edge"><title>t20->t34</title>
+<path style="fill:none;stroke:gray;" d="M758,-712C731,-701 698,-689 669,-678"/>
+<polygon style="fill:gray;stroke:gray;" points="670.56,-674.863 660,-674 667.717,-681.26 670.56,-674.863"/>
+</g>
+<!-- t20->t46 -->
+<g id="edge123" class="edge"><title>t20->t46</title>
+<path style="fill:none;stroke:gray;" d="M829,-712C858,-687 903,-641 903,-590 903,-590 903,-590 903,-216 903,-171 608,-128 461,-109"/>
+<polygon style="fill:gray;stroke:gray;" points="461.299,-105.512 451,-108 460.602,-112.478 461.299,-105.512"/>
+</g>
+<!-- t19->t22 -->
+<g id="edge57" class="edge"><title>t19->t22</title>
+<path style="fill:none;stroke:gray;" d="M795,-836C783,-830 769,-824 756,-818"/>
+<polygon style="fill:gray;stroke:gray;" points="757.56,-814.863 747,-814 754.717,-821.26 757.56,-814.863"/>
+</g>
+<!-- t19->t46 -->
+<g id="edge125" class="edge"><title>t19->t46</title>
+<path style="fill:none;stroke:gray;" d="M859,-836C885,-814 923,-774 923,-730 923,-730 923,-730 923,-216 923,-158 995,-197 760,-140 706,-126 556,-113 461,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="461.299,-102.512 451,-105 460.602,-109.478 461.299,-102.512"/>
+</g>
+<!-- t22->t23 -->
+<g id="edge59" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:gray;" d="M681,-778C671,-771 658,-762 647,-754"/>
+<polygon style="fill:gray;stroke:gray;" points="649.1,-751.2 639,-748 644.9,-756.8 649.1,-751.2"/>
+</g>
+<!-- t23->t34 -->
+<g id="edge101" class="edge"><title>t23->t34</title>
+<path style="fill:none;stroke:gray;" d="M613,-712C613,-703 613,-694 613,-684"/>
+<polygon style="fill:gray;stroke:gray;" points="616.5,-684 613,-674 609.5,-684 616.5,-684"/>
+</g>
+<!-- t35 -->
+<g id="node66" class="node"><title>t35</title>
+<polygon style="fill:#90ee90;stroke:green;" points="864,-608 602,-608 598,-604 598,-572 860,-572 864,-576 864,-608"/>
+<polyline style="fill:none;stroke:green;" points="860,-604 598,-604 "/>
+<polyline style="fill:none;stroke:green;" points="860,-604 860,-572 "/>
+<polyline style="fill:none;stroke:green;" points="860,-604 864,-608 "/>
+<text text-anchor="middle" x="731" y="-583.5" style="font-family:Times New Roman;font-size:20.00;">make_fasta_for_RepeatMasker</text>
+</g>
+<!-- t34->t35 -->
+<g id="edge63" class="edge"><title>t34->t35</title>
+<path style="fill:none;stroke:gray;" d="M645,-638C659,-631 675,-621 689,-613"/>
+<polygon style="fill:gray;stroke:gray;" points="690.958,-615.916 698,-608 687.559,-609.797 690.958,-615.916"/>
+</g>
+<!-- t34->t37 -->
+<g id="edge105" class="edge"><title>t34->t37</title>
+<path style="fill:none;stroke:gray;" d="M602,-638C597,-629 592,-618 589,-608 580,-567 571,-548 593,-514 600,-504 610,-496 620,-489"/>
+<polygon style="fill:gray;stroke:gray;" points="621.958,-491.916 629,-484 618.559,-485.797 621.958,-491.916"/>
+</g>
+<!-- t39 -->
+<g id="node72" class="node"><title>t39</title>
+<polygon style="fill:none;stroke:blue;" points="884,-392 682,-392 678,-388 678,-356 880,-356 884,-360 884,-392"/>
+<polyline style="fill:none;stroke:blue;" points="880,-388 678,-388 "/>
+<polyline style="fill:none;stroke:blue;" points="880,-388 880,-356 "/>
+<polyline style="fill:none;stroke:blue;" points="880,-388 884,-392 "/>
+<text text-anchor="middle" x="781" y="-367.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">remove_simple_repeats</text>
+</g>
+<!-- t34->t39 -->
+<g id="edge109" class="edge"><title>t34->t39</title>
+<path style="fill:none;stroke:gray;" d="M665,-654C733,-650 847,-639 873,-608 926,-544 849,-445 806,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="808.546,-396.596 799,-392 803.596,-401.546 808.546,-396.596"/>
+</g>
+<!-- t40 -->
+<g id="node74" class="node"><title>t40</title>
+<polygon style="fill:none;stroke:blue;" points="750.217,-288.774 659,-303.92 567.783,-288.774 567.868,-264.266 750.132,-264.266 750.217,-288.774"/>
+<polygon style="fill:none;stroke:blue;" points="754.229,-292.162 659,-307.975 563.771,-292.162 563.882,-260.266 754.118,-260.266 754.229,-292.162"/>
+<text text-anchor="middle" x="659" y="-275.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">report_islands</text>
+</g>
+<!-- t34->t40 -->
+<g id="edge111" class="edge"><title>t34->t40</title>
+<path style="fill:none;stroke:gray;" d="M597,-638C590,-629 583,-619 579,-608 554,-537 524,-499 569,-440 592,-410 626,-446 649,-418 662,-403 662,-353 661,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="664.5,-318 661,-308 657.5,-318 664.5,-318"/>
+</g>
+<!-- t36 -->
+<g id="node68" class="node"><title>t36</title>
+<polygon style="fill:#90ee90;stroke:green;" points="852,-550 606,-550 602,-546 602,-514 848,-514 852,-518 852,-550"/>
+<polyline style="fill:none;stroke:green;" points="848,-546 602,-546 "/>
+<polyline style="fill:none;stroke:green;" points="848,-546 848,-514 "/>
+<polyline style="fill:none;stroke:green;" points="848,-546 852,-550 "/>
+<text text-anchor="middle" x="727" y="-525.5" style="font-family:Times New Roman;font-size:20.00;">run_RepeatMasker_on_reads</text>
+</g>
+<!-- t35->t36 -->
+<g id="edge65" class="edge"><title>t35->t36</title>
+<path style="fill:none;stroke:gray;" d="M730,-572C730,-568 730,-564 729,-560"/>
+<polygon style="fill:gray;stroke:gray;" points="732.478,-559.602 728,-550 725.512,-560.299 732.478,-559.602"/>
+</g>
+<!-- t36->t37 -->
+<g id="edge67" class="edge"><title>t36->t37</title>
+<path style="fill:none;stroke:gray;" d="M716,-514C712,-508 707,-500 703,-493"/>
+<polygon style="fill:gray;stroke:gray;" points="705.916,-491.042 698,-484 699.797,-494.441 705.916,-491.042"/>
+</g>
+<!-- t37->t39 -->
+<g id="edge69" class="edge"><title>t37->t39</title>
+<path style="fill:none;stroke:gray;" d="M705,-448C719,-434 738,-415 754,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="756.779,-401.219 762,-392 752.169,-395.951 756.779,-401.219"/>
+</g>
+<!-- t37->t38 -->
+<g id="edge73" class="edge"><title>t37->t38</title>
+<path style="fill:none;stroke:gray;" d="M646,-448C626,-438 599,-426 573,-414"/>
+<polygon style="fill:gray;stroke:gray;" points="574.56,-410.863 564,-410 571.717,-417.26 574.56,-410.863"/>
+</g>
+<!-- t39->t40 -->
+<g id="edge71" class="edge"><title>t39->t40</title>
+<path style="fill:none;stroke:blue;" d="M757,-356C739,-343 715,-324 695,-309"/>
+<polygon style="fill:blue;stroke:blue;" points="697.1,-306.2 687,-303 692.9,-311.8 697.1,-306.2"/>
+</g>
+<!-- t41 -->
+<g id="node78" class="node"><title>t41</title>
+<polygon style="fill:none;stroke:blue;" points="585,-234 389,-234 385,-230 385,-198 581,-198 585,-202 585,-234"/>
+<polyline style="fill:none;stroke:blue;" points="581,-230 385,-230 "/>
+<polyline style="fill:none;stroke:blue;" points="581,-230 581,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="581,-230 585,-234 "/>
+<text text-anchor="middle" x="485" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">make_circos_conf_file</text>
+</g>
+<!-- t40->t41 -->
+<g id="edge75" class="edge"><title>t40->t41</title>
+<path style="fill:none;stroke:blue;" d="M602,-260C583,-253 561,-245 542,-238"/>
+<polygon style="fill:blue;stroke:blue;" points="543.56,-234.863 533,-234 540.717,-241.26 543.56,-234.863"/>
+</g>
+<!-- t38->t41 -->
+<g id="edge115" class="edge"><title>t38->t41</title>
+<path style="fill:none;stroke:gray;" d="M485,-330C485,-303 485,-268 485,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="488.5,-244 485,-234 481.5,-244 488.5,-244"/>
+</g>
+<!-- t42 -->
+<g id="node80" class="node"><title>t42</title>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="531,-176 371,-176 367,-172 367,-140 527,-140 531,-144 531,-176"/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="527,-172 367,-172 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="527,-172 527,-140 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="527,-172 531,-176 "/>
+<text text-anchor="middle" x="449" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">run_circos_islands</text>
+</g>
+<!-- t41->t42 -->
+<g id="edge77" class="edge"><title>t41->t42</title>
+<path style="fill:none;stroke:blue;" d="M474,-198C472,-194 469,-189 466,-185"/>
+<polygon style="fill:blue;stroke:blue;" points="468.459,-182.379 460,-176 462.635,-186.262 468.459,-182.379"/>
+</g>
+<!-- t42->t46 -->
+<g id="edge81" class="edge"><title>t42->t46</title>
+<path style="fill:none;stroke:blue;" d="M428,-140C422,-135 416,-130 410,-125"/>
+<polygon style="fill:blue;stroke:blue;" points="411.831,-121.951 402,-118 407.221,-127.219 411.831,-121.951"/>
+</g>
+<!-- t47 -->
+<g id="node85" class="node"><title>t47</title>
+<polygon style="fill:#fff68f;stroke:black;" points="456,-60 308,-60 304,-56 304,-24 452,-24 456,-28 456,-60"/>
+<polyline style="fill:none;stroke:black;" points="452,-56 304,-56 "/>
+<polyline style="fill:none;stroke:black;" points="452,-56 452,-24 "/>
+<polyline style="fill:none;stroke:black;" points="452,-56 456,-60 "/>
+<text text-anchor="middle" x="380" y="-35.5" style="font-family:Times New Roman;font-size:20.00;">target_final_task</text>
+</g>
+<!-- t46->t47 -->
+<g id="edge83" class="edge"><title>t46->t47</title>
+<path style="fill:none;stroke:blue;" d="M380,-82C380,-78 380,-74 380,-70"/>
+<polygon style="fill:blue;stroke:blue;" points="383.5,-70 380,-60 376.5,-70 383.5,-70"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_downloads/gallery_snp_annotation.svg b/doc/_build/html/_downloads/gallery_snp_annotation.svg
new file mode 100644
index 0000000..9a7995c
--- /dev/null
+++ b/doc/_build/html/_downloads/gallery_snp_annotation.svg
@@ -0,0 +1,470 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: Andreas Heger -->
+<!-- Title: tree Pages: 1 -->
+<svg width="864pt" height="420pt"
+ viewBox="0.00 0.00 432.00 203.22" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.311688 0.311688) rotate(0) translate(4 648)">
+<title>SNP Annotation Pipeline</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-648 1382,-648 1382,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-628 1194,-628 1194,-16 8,-16"/>
+<text text-anchor="middle" x="601" y="-597" style="font-family:Times New Roman;font-size:20.00pt;fill:red;"><tspan font-weight = "bold">SNP Annotation Pipeline:</tspan></text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="1202,-324 1202,-636 1370,-636 1370,-324 1202,-324"/>
+<text text-anchor="middle" x="1286" y="-605" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t8 -->
+<g id="node2" class="node"><title>t8</title>
+<polygon style="fill:#90ee90;stroke:green;" points="804,-486 662,-486 658,-482 658,-450 800,-450 804,-454 804,-486"/>
+<polyline style="fill:none;stroke:green;" points="800,-482 658,-482 "/>
+<polyline style="fill:none;stroke:green;" points="800,-482 800,-450 "/>
+<polyline style="fill:none;stroke:green;" points="800,-482 804,-486 "/>
+<text text-anchor="middle" x="731" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildTranscripts</text>
+</g>
+<!-- t9 -->
+<g id="node3" class="node"><title>t9</title>
+<polygon style="fill:#90ee90;stroke:green;" points="812,-376 676,-376 672,-372 672,-340 808,-340 812,-344 812,-376"/>
+<polyline style="fill:none;stroke:green;" points="808,-372 672,-372 "/>
+<polyline style="fill:none;stroke:green;" points="808,-372 808,-340 "/>
+<polyline style="fill:none;stroke:green;" points="808,-372 812,-376 "/>
+<text text-anchor="middle" x="742" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">loadTranscripts</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge3" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:gray;" d="M733,-450C735,-433 737,-407 739,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="742.488,-386.299 740,-376 735.522,-385.602 742.488,-386.299"/>
+</g>
+<!-- t27 -->
+<g id="node41" class="node"><title>t27</title>
+<polygon style="fill:#90ee90;stroke:green;" points="654,-376 544,-376 540,-372 540,-340 650,-340 654,-344 654,-376"/>
+<polyline style="fill:none;stroke:green;" points="650,-372 540,-372 "/>
+<polyline style="fill:none;stroke:green;" points="650,-372 650,-340 "/>
+<polyline style="fill:none;stroke:green;" points="650,-372 654,-376 "/>
+<text text-anchor="middle" x="597" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">buildAlleles</text>
+</g>
+<!-- t8->t27 -->
+<g id="edge69" class="edge"><title>t8->t27</title>
+<path style="fill:none;stroke:gray;" d="M709,-450C686,-432 652,-403 627,-382"/>
+<polygon style="fill:gray;stroke:gray;" points="629.1,-379.2 619,-376 624.9,-384.8 629.1,-379.2"/>
+</g>
+<!-- t64 -->
+<g id="node12" class="node"><title>t64</title>
+<polygon style="fill:none;stroke:blue;" points="906,-292 834,-292 830,-288 830,-256 902,-256 906,-260 906,-292"/>
+<polyline style="fill:none;stroke:blue;" points="902,-288 830,-288 "/>
+<polyline style="fill:none;stroke:blue;" points="902,-288 902,-256 "/>
+<polyline style="fill:none;stroke:blue;" points="902,-288 906,-292 "/>
+<text text-anchor="middle" x="868" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">prepare</text>
+</g>
+<!-- t9->t64 -->
+<g id="edge53" class="edge"><title>t9->t64</title>
+<path style="fill:none;stroke:gray;" d="M769,-340C787,-328 812,-311 832,-298"/>
+<polygon style="fill:gray;stroke:gray;" points="834.621,-300.459 841,-292 830.738,-294.635 834.621,-300.459"/>
+</g>
+<!-- t11 -->
+<g id="node5" class="node"><title>t11</title>
+<polygon style="fill:#90ee90;stroke:green;" points="854,-578 632,-578 628,-574 628,-542 850,-542 854,-546 854,-578"/>
+<polyline style="fill:none;stroke:green;" points="850,-574 628,-574 "/>
+<polyline style="fill:none;stroke:green;" points="850,-574 850,-542 "/>
+<polyline style="fill:none;stroke:green;" points="850,-574 854,-578 "/>
+<text text-anchor="middle" x="741" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">loadTranscriptInformation</text>
+</g>
+<!-- t11->t64 -->
+<g id="edge9" class="edge"><title>t11->t64</title>
+<path style="fill:none;stroke:gray;" d="M780,-542C793,-534 805,-524 813,-512 861,-436 810,-396 841,-314 843,-309 845,-305 848,-301"/>
+<polygon style="fill:gray;stroke:gray;" points="851.203,-302.441 853,-292 845.084,-299.042 851.203,-302.441"/>
+</g>
+<!-- t16 -->
+<g id="node15" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="476.809,-455.794 554,-428.5 631.191,-455.794 631.119,-499.956 476.881,-499.956 476.809,-455.794"/>
+<polygon style="fill:none;stroke:green;" points="472.804,-452.967 554,-424.257 635.196,-452.967 635.113,-503.956 472.887,-503.956 472.804,-452.967"/>
+<text text-anchor="middle" x="554" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildSelenoList</text>
+</g>
+<!-- t11->t16 -->
+<g id="edge59" class="edge"><title>t11->t16</title>
+<path style="fill:none;stroke:gray;" d="M704,-542C684,-532 660,-520 636,-508"/>
+<polygon style="fill:gray;stroke:gray;" points="637.56,-504.863 627,-504 634.717,-511.26 637.56,-504.863"/>
+</g>
+<!-- t4 -->
+<g id="node6" class="node"><title>t4</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1034,-578 876,-578 872,-574 872,-542 1030,-542 1034,-546 1034,-578"/>
+<polyline style="fill:none;stroke:green;" points="1030,-574 872,-574 "/>
+<polyline style="fill:none;stroke:green;" points="1030,-574 1030,-542 "/>
+<polyline style="fill:none;stroke:green;" points="1030,-574 1034,-578 "/>
+<text text-anchor="middle" x="953" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">buildGeneRegions</text>
+</g>
+<!-- t5 -->
+<g id="node7" class="node"><title>t5</title>
+<polygon style="fill:#90ee90;stroke:green;" points="973,-486 871,-486 867,-482 867,-450 969,-450 973,-454 973,-486"/>
+<polyline style="fill:none;stroke:green;" points="969,-482 867,-482 "/>
+<polyline style="fill:none;stroke:green;" points="969,-482 969,-450 "/>
+<polyline style="fill:none;stroke:green;" points="969,-482 973,-486 "/>
+<text text-anchor="middle" x="920" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildGenes</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge5" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:gray;" d="M946,-542C942,-529 935,-511 930,-496"/>
+<polygon style="fill:gray;stroke:gray;" points="933.226,-494.573 927,-486 926.521,-496.584 933.226,-494.573"/>
+</g>
+<!-- t7 -->
+<g id="node9" class="node"><title>t7</title>
+<polygon style="fill:#90ee90;stroke:green;" points="982,-376 854,-376 850,-372 850,-340 978,-340 982,-344 982,-376"/>
+<polyline style="fill:none;stroke:green;" points="978,-372 850,-372 "/>
+<polyline style="fill:none;stroke:green;" points="978,-372 978,-340 "/>
+<polyline style="fill:none;stroke:green;" points="978,-372 982,-376 "/>
+<text text-anchor="middle" x="916" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">loadGeneStats</text>
+</g>
+<!-- t5->t7 -->
+<g id="edge7" class="edge"><title>t5->t7</title>
+<path style="fill:none;stroke:gray;" d="M919,-450C918,-433 917,-407 917,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="920.5,-386 917,-376 913.5,-386 920.5,-386"/>
+</g>
+<!-- t7->t64 -->
+<g id="edge55" class="edge"><title>t7->t64</title>
+<path style="fill:none;stroke:gray;" d="M906,-340C899,-329 890,-314 883,-301"/>
+<polygon style="fill:gray;stroke:gray;" points="885.916,-299.042 878,-292 879.797,-302.441 885.916,-299.042"/>
+</g>
+<!-- t6 -->
+<g id="node11" class="node"><title>t6</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1186,-376 1004,-376 1000,-372 1000,-340 1182,-340 1186,-344 1186,-376"/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1000,-372 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1182,-340 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1186,-376 "/>
+<text text-anchor="middle" x="1093" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">loadGeneInformation</text>
+</g>
+<!-- t6->t64 -->
+<g id="edge57" class="edge"><title>t6->t64</title>
+<path style="fill:none;stroke:gray;" d="M1054,-340C1035,-331 1012,-322 991,-314 966,-304 939,-295 916,-288"/>
+<polygon style="fill:gray;stroke:gray;" points="916.584,-284.521 906,-285 914.573,-291.226 916.584,-284.521"/>
+</g>
+<!-- t68 -->
+<g id="node51" class="node"><title>t68</title>
+<polygon style="fill:#fff68f;stroke:black;" points="587,-60 537,-60 533,-56 533,-24 583,-24 587,-28 587,-60"/>
+<polyline style="fill:none;stroke:black;" points="583,-56 533,-56 "/>
+<polyline style="fill:none;stroke:black;" points="583,-56 583,-24 "/>
+<polyline style="fill:none;stroke:black;" points="583,-56 587,-60 "/>
+<text text-anchor="middle" x="560" y="-35.5" style="font-family:Times New Roman;font-size:20.00;">full</text>
+</g>
+<!-- t64->t68 -->
+<g id="edge51" class="edge"><title>t64->t68</title>
+<path style="fill:none;stroke:blue;" d="M862,-256C852,-228 829,-174 793,-140 738,-86 648,-59 597,-49"/>
+<polygon style="fill:blue;stroke:blue;" points="597.492,-45.5292 587,-47 596.119,-52.3933 597.492,-45.5292"/>
+</g>
+<!-- t15 -->
+<g id="node14" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="610,-578 468,-578 464,-574 464,-542 606,-542 610,-546 610,-578"/>
+<polyline style="fill:none;stroke:green;" points="606,-574 464,-574 "/>
+<polyline style="fill:none;stroke:green;" points="606,-574 606,-542 "/>
+<polyline style="fill:none;stroke:green;" points="606,-574 610,-578 "/>
+<text text-anchor="middle" x="537" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">loadProteinStats</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge11" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:gray;" d="M540,-542C541,-534 544,-524 545,-514"/>
+<polygon style="fill:gray;stroke:gray;" points="548.471,-514.492 547,-504 541.607,-513.119 548.471,-514.492"/>
+</g>
+<!-- t25 -->
+<g id="node17" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="522,-376 410,-376 406,-372 406,-340 518,-340 522,-344 522,-376"/>
+<polyline style="fill:none;stroke:green;" points="518,-372 406,-372 "/>
+<polyline style="fill:none;stroke:green;" points="518,-372 518,-340 "/>
+<polyline style="fill:none;stroke:green;" points="518,-372 522,-376 "/>
+<text text-anchor="middle" x="464" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">makeEffects</text>
+</g>
+<!-- t16->t25 -->
+<g id="edge13" class="edge"><title>t16->t25</title>
+<path style="fill:none;stroke:gray;" d="M526,-434C513,-418 498,-399 485,-384"/>
+<polygon style="fill:gray;stroke:gray;" points="487.8,-381.9 479,-376 482.2,-386.1 487.8,-381.9"/>
+</g>
+<!-- t16->t27 -->
+<g id="edge35" class="edge"><title>t16->t27</title>
+<path style="fill:none;stroke:gray;" d="M569,-430C575,-416 581,-399 586,-385"/>
+<polygon style="fill:gray;stroke:gray;" points="589.137,-386.56 590,-376 582.74,-383.717 589.137,-386.56"/>
+</g>
+<!-- t26 -->
+<g id="node19" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="554,-292 450,-292 446,-288 446,-256 550,-256 554,-260 554,-292"/>
+<polyline style="fill:none;stroke:green;" points="550,-288 446,-288 "/>
+<polyline style="fill:none;stroke:green;" points="550,-288 550,-256 "/>
+<polyline style="fill:none;stroke:green;" points="550,-288 554,-292 "/>
+<text text-anchor="middle" x="500" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">loadEffects</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge15" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M472,-340C477,-329 483,-314 488,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="491.536,-302.585 492,-292 485.036,-299.985 491.536,-302.585"/>
+</g>
+<!-- t65 -->
+<g id="node21" class="node"><title>t65</title>
+<polygon style="fill:none;stroke:blue;" points="486,-234 366,-234 362,-230 362,-198 482,-198 486,-202 486,-234"/>
+<polyline style="fill:none;stroke:blue;" points="482,-230 362,-230 "/>
+<polyline style="fill:none;stroke:blue;" points="482,-230 482,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="482,-230 486,-234 "/>
+<text text-anchor="middle" x="424" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">consequences</text>
+</g>
+<!-- t25->t65 -->
+<g id="edge61" class="edge"><title>t25->t65</title>
+<path style="fill:none;stroke:gray;" d="M455,-340C450,-327 442,-308 437,-292 433,-277 430,-259 427,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="430.478,-243.602 426,-234 423.512,-244.299 430.478,-243.602"/>
+</g>
+<!-- t26->t65 -->
+<g id="edge17" class="edge"><title>t26->t65</title>
+<path style="fill:none;stroke:gray;" d="M476,-256C470,-251 463,-246 456,-240"/>
+<polygon style="fill:gray;stroke:gray;" points="458.1,-237.2 448,-234 453.9,-242.8 458.1,-237.2"/>
+</g>
+<!-- t65->t68 -->
+<g id="edge49" class="edge"><title>t65->t68</title>
+<path style="fill:none;stroke:blue;" d="M445,-198C452,-191 460,-183 466,-176 497,-141 526,-96 544,-69"/>
+<polygon style="fill:blue;stroke:blue;" points="547.203,-70.4414 549,-60 541.084,-67.0418 547.203,-70.4414"/>
+</g>
+<!-- t49 -->
+<g id="node23" class="node"><title>t49</title>
+<polygon style="fill:#90ee90;stroke:green;" points="19.9571,-345.794 114,-318.5 208.043,-345.794 207.955,-389.956 20.0448,-389.956 19.9571,-345.794"/>
+<polygon style="fill:none;stroke:green;" points="15.9511,-342.791 114,-314.335 212.049,-342.791 211.947,-393.956 16.0528,-393.956 15.9511,-342.791"/>
+<text text-anchor="middle" x="114" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">buildPolyphenInput</text>
+</g>
+<!-- t50 -->
+<g id="node24" class="node"><title>t50</title>
+<polygon style="fill:#90ee90;stroke:green;" points="214,-292 20,-292 16,-288 16,-256 210,-256 214,-260 214,-292"/>
+<polyline style="fill:none;stroke:green;" points="210,-288 16,-288 "/>
+<polyline style="fill:none;stroke:green;" points="210,-288 210,-256 "/>
+<polyline style="fill:none;stroke:green;" points="210,-288 214,-292 "/>
+<text text-anchor="middle" x="115" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">buildPolyphenFeatures</text>
+</g>
+<!-- t49->t50 -->
+<g id="edge19" class="edge"><title>t49->t50</title>
+<path style="fill:none;stroke:gray;" d="M115,-314C115,-310 115,-306 115,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="118.5,-302 115,-292 111.5,-302 118.5,-302"/>
+</g>
+<!-- t52 -->
+<g id="node30" class="node"><title>t52</title>
+<polygon style="fill:#90ee90;stroke:green;" points="457,-176 301,-176 297,-172 297,-140 453,-140 457,-144 457,-176"/>
+<polyline style="fill:none;stroke:green;" points="453,-172 297,-172 "/>
+<polyline style="fill:none;stroke:green;" points="453,-172 453,-140 "/>
+<polyline style="fill:none;stroke:green;" points="453,-172 457,-176 "/>
+<text text-anchor="middle" x="377" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">loadPolyphenMap</text>
+</g>
+<!-- t49->t52 -->
+<g id="edge25" class="edge"><title>t49->t52</title>
+<path style="fill:none;stroke:gray;" d="M172,-331C192,-320 214,-307 232,-292 249,-278 249,-270 263,-256 290,-229 322,-202 346,-182"/>
+<polygon style="fill:gray;stroke:gray;" points="348.1,-184.8 354,-176 343.9,-179.2 348.1,-184.8"/>
+</g>
+<!-- t57 -->
+<g id="node35" class="node"><title>t57</title>
+<polygon style="fill:none;stroke:blue;" points="271,-234 173,-234 169,-230 169,-198 267,-198 271,-202 271,-234"/>
+<polyline style="fill:none;stroke:blue;" points="267,-230 169,-230 "/>
+<polyline style="fill:none;stroke:blue;" points="267,-230 267,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="267,-230 271,-234 "/>
+<text text-anchor="middle" x="220" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">runPanther</text>
+</g>
+<!-- t49->t57 -->
+<g id="edge63" class="edge"><title>t49->t57</title>
+<path style="fill:none;stroke:gray;" d="M179,-333C196,-323 213,-310 223,-292 231,-278 231,-259 228,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="231.393,-243.119 226,-234 224.529,-244.492 231.393,-243.119"/>
+</g>
+<!-- t51 -->
+<g id="node26" class="node"><title>t51</title>
+<polygon style="fill:#90ee90;stroke:green;" points="149,-234 37,-234 33,-230 33,-198 145,-198 149,-202 149,-234"/>
+<polyline style="fill:none;stroke:green;" points="145,-230 33,-230 "/>
+<polyline style="fill:none;stroke:green;" points="145,-230 145,-198 "/>
+<polyline style="fill:none;stroke:green;" points="145,-230 149,-234 "/>
+<text text-anchor="middle" x="91" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">runPolyphen</text>
+</g>
+<!-- t50->t51 -->
+<g id="edge21" class="edge"><title>t50->t51</title>
+<path style="fill:none;stroke:gray;" d="M108,-256C106,-252 104,-248 102,-243"/>
+<polygon style="fill:gray;stroke:gray;" points="105.26,-241.717 98,-234 98.8631,-244.56 105.26,-241.717"/>
+</g>
+<!-- t53 -->
+<g id="node28" class="node"><title>t53</title>
+<polygon style="fill:#90ee90;stroke:green;" points="151,-176 31,-176 27,-172 27,-140 147,-140 151,-144 151,-176"/>
+<polyline style="fill:none;stroke:green;" points="147,-172 27,-172 "/>
+<polyline style="fill:none;stroke:green;" points="147,-172 147,-140 "/>
+<polyline style="fill:none;stroke:green;" points="147,-172 151,-176 "/>
+<text text-anchor="middle" x="89" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">loadPolyphen</text>
+</g>
+<!-- t51->t53 -->
+<g id="edge23" class="edge"><title>t51->t53</title>
+<path style="fill:none;stroke:gray;" d="M90,-198C90,-194 90,-190 90,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="93.5001,-186 90,-176 86.5001,-186 93.5001,-186"/>
+</g>
+<!-- t67 -->
+<g id="node39" class="node"><title>t67</title>
+<polygon style="fill:none;stroke:blue;" points="336,-118 270,-118 266,-114 266,-82 332,-82 336,-86 336,-118"/>
+<polyline style="fill:none;stroke:blue;" points="332,-114 266,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="332,-114 332,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="332,-114 336,-118 "/>
+<text text-anchor="middle" x="301" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">effects</text>
+</g>
+<!-- t53->t67 -->
+<g id="edge65" class="edge"><title>t53->t67</title>
+<path style="fill:none;stroke:gray;" d="M151,-141C185,-132 226,-121 256,-113"/>
+<polygon style="fill:gray;stroke:gray;" points="257.427,-116.226 266,-110 255.416,-109.521 257.427,-116.226"/>
+</g>
+<!-- t52->t67 -->
+<g id="edge67" class="edge"><title>t52->t67</title>
+<path style="fill:none;stroke:gray;" d="M353,-140C347,-135 340,-130 333,-124"/>
+<polygon style="fill:gray;stroke:gray;" points="335.1,-121.2 325,-118 330.9,-126.8 335.1,-121.2"/>
+</g>
+<!-- t12 -->
+<g id="node32" class="node"><title>t12</title>
+<polygon style="fill:#90ee90;stroke:green;" points="388,-376 234,-376 230,-372 230,-340 384,-340 388,-344 388,-376"/>
+<polyline style="fill:none;stroke:green;" points="384,-372 230,-372 "/>
+<polyline style="fill:none;stroke:green;" points="384,-372 384,-340 "/>
+<polyline style="fill:none;stroke:green;" points="384,-372 388,-376 "/>
+<text text-anchor="middle" x="309" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">buildPeptideFasta</text>
+</g>
+<!-- t56 -->
+<g id="node33" class="node"><title>t56</title>
+<polygon style="fill:#90ee90;stroke:green;" points="408,-292 276,-292 272,-288 272,-256 404,-256 408,-260 408,-292"/>
+<polyline style="fill:none;stroke:green;" points="404,-288 272,-288 "/>
+<polyline style="fill:none;stroke:green;" points="404,-288 404,-256 "/>
+<polyline style="fill:none;stroke:green;" points="404,-288 408,-292 "/>
+<text text-anchor="middle" x="340" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">preparePanther</text>
+</g>
+<!-- t12->t56 -->
+<g id="edge27" class="edge"><title>t12->t56</title>
+<path style="fill:none;stroke:gray;" d="M316,-340C320,-329 325,-314 330,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="333.479,-302.584 333,-292 326.774,-300.573 333.479,-302.584"/>
+</g>
+<!-- t56->t57 -->
+<g id="edge29" class="edge"><title>t56->t57</title>
+<path style="fill:none;stroke:gray;" d="M303,-256C291,-250 278,-244 266,-238"/>
+<polygon style="fill:gray;stroke:gray;" points="267.56,-234.863 257,-234 264.717,-241.26 267.56,-234.863"/>
+</g>
+<!-- t58 -->
+<g id="node37" class="node"><title>t58</title>
+<polygon style="fill:none;stroke:blue;" points="279,-176 173,-176 169,-172 169,-140 275,-140 279,-144 279,-176"/>
+<polyline style="fill:none;stroke:blue;" points="275,-172 169,-172 "/>
+<polyline style="fill:none;stroke:blue;" points="275,-172 275,-140 "/>
+<polyline style="fill:none;stroke:blue;" points="275,-172 279,-176 "/>
+<text text-anchor="middle" x="224" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">loadPanther</text>
+</g>
+<!-- t57->t58 -->
+<g id="edge31" class="edge"><title>t57->t58</title>
+<path style="fill:none;stroke:blue;" d="M221,-198C221,-194 221,-190 222,-186"/>
+<polygon style="fill:blue;stroke:blue;" points="225.488,-186.299 223,-176 218.522,-185.602 225.488,-186.299"/>
+</g>
+<!-- t58->t67 -->
+<g id="edge33" class="edge"><title>t58->t67</title>
+<path style="fill:none;stroke:blue;" d="M248,-140C254,-135 262,-129 269,-124"/>
+<polygon style="fill:blue;stroke:blue;" points="271.1,-126.8 277,-118 266.9,-121.2 271.1,-126.8"/>
+</g>
+<!-- t67->t68 -->
+<g id="edge45" class="edge"><title>t67->t68</title>
+<path style="fill:none;stroke:blue;" d="M336,-92C385,-81 473,-62 523,-50"/>
+<polygon style="fill:blue;stroke:blue;" points="523.881,-53.3933 533,-48 522.508,-46.5292 523.881,-53.3933"/>
+</g>
+<!-- t28 -->
+<g id="node43" class="node"><title>t28</title>
+<polygon style="fill:#90ee90;stroke:green;" points="680,-292 576,-292 572,-288 572,-256 676,-256 680,-260 680,-292"/>
+<polyline style="fill:none;stroke:green;" points="676,-288 572,-288 "/>
+<polyline style="fill:none;stroke:green;" points="676,-288 676,-256 "/>
+<polyline style="fill:none;stroke:green;" points="676,-288 680,-292 "/>
+<text text-anchor="middle" x="626" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">loadAlleles</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge37" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M603,-340C607,-329 612,-314 617,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="620.479,-302.584 620,-292 613.774,-300.573 620.479,-302.584"/>
+</g>
+<!-- t66 -->
+<g id="node49" class="node"><title>t66</title>
+<polygon style="fill:none;stroke:blue;" points="672,-118 608,-118 604,-114 604,-82 668,-82 672,-86 672,-118"/>
+<polyline style="fill:none;stroke:blue;" points="668,-114 604,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="668,-114 668,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="668,-114 672,-118 "/>
+<text text-anchor="middle" x="638" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">alleles</text>
+</g>
+<!-- t27->t66 -->
+<g id="edge75" class="edge"><title>t27->t66</title>
+<path style="fill:none;stroke:gray;" d="M623,-340C636,-332 650,-322 663,-314 724,-276 766,-295 801,-234 823,-196 805,-168 771,-140 746,-119 710,-109 682,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="682.492,-101.529 672,-103 681.119,-108.393 682.492,-101.529"/>
+</g>
+<!-- t29 -->
+<g id="node45" class="node"><title>t29</title>
+<polygon style="fill:#90ee90;stroke:green;" points="792,-234 528,-234 524,-230 524,-198 788,-198 792,-202 792,-234"/>
+<polyline style="fill:none;stroke:green;" points="788,-230 524,-230 "/>
+<polyline style="fill:none;stroke:green;" points="788,-230 788,-198 "/>
+<polyline style="fill:none;stroke:green;" points="788,-230 792,-234 "/>
+<text text-anchor="middle" x="658" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">summarizeAllelesPerTranscript</text>
+</g>
+<!-- t28->t29 -->
+<g id="edge39" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M636,-256C638,-252 641,-247 643,-243"/>
+<polygon style="fill:gray;stroke:gray;" points="646.203,-244.441 648,-234 640.084,-241.042 646.203,-244.441"/>
+</g>
+<!-- t28->t66 -->
+<g id="edge73" class="edge"><title>t28->t66</title>
+<path style="fill:none;stroke:gray;" d="M572,-257C546,-249 519,-239 515,-234 491,-199 491,-173 515,-140 525,-126 563,-115 594,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="594.881,-111.393 604,-106 593.508,-104.529 594.881,-111.393"/>
+</g>
+<!-- t30 -->
+<g id="node47" class="node"><title>t30</title>
+<polygon style="fill:#90ee90;stroke:green;" points="752,-176 528,-176 524,-172 524,-140 748,-140 752,-144 752,-176"/>
+<polyline style="fill:none;stroke:green;" points="748,-172 524,-172 "/>
+<polyline style="fill:none;stroke:green;" points="748,-172 748,-140 "/>
+<polyline style="fill:none;stroke:green;" points="748,-172 752,-176 "/>
+<text text-anchor="middle" x="638" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">summarizeAllelesPerGene</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge41" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M652,-198C651,-194 649,-190 648,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="650.964,-183.985 644,-176 644.464,-186.585 650.964,-183.985"/>
+</g>
+<!-- t29->t66 -->
+<g id="edge71" class="edge"><title>t29->t66</title>
+<path style="fill:none;stroke:gray;" d="M721,-198C740,-191 756,-184 761,-176 771,-162 771,-152 761,-140 752,-126 714,-115 682,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="682.492,-104.529 672,-106 681.119,-111.393 682.492,-104.529"/>
+</g>
+<!-- t30->t66 -->
+<g id="edge43" class="edge"><title>t30->t66</title>
+<path style="fill:none;stroke:gray;" d="M638,-140C638,-136 638,-132 638,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="641.5,-128 638,-118 634.5,-128 641.5,-128"/>
+</g>
+<!-- t66->t68 -->
+<g id="edge47" class="edge"><title>t66->t68</title>
+<path style="fill:none;stroke:blue;" d="M614,-82C608,-77 600,-71 593,-66"/>
+<polygon style="fill:blue;stroke:blue;" points="594.262,-62.6349 584,-60 590.379,-68.4592 594.262,-62.6349"/>
+</g>
+<!-- k1 -->
+<g id="node69" class="node"><title>k1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1362,-585.5 1214,-585.5 1210,-581.5 1210,-534.5 1358,-534.5 1362,-538.5 1362,-585.5"/>
+<polyline style="fill:none;stroke:green;" points="1358,-581.5 1210,-581.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-581.5 1358,-534.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-581.5 1362,-585.5 "/>
+<text text-anchor="middle" x="1286" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">Up-to-date task</text>
+</g>
+<!-- k2 -->
+<g id="node70" class="node"><title>k2</title>
+<polygon style="fill:none;stroke:blue;" points="1345,-493.5 1231,-493.5 1227,-489.5 1227,-442.5 1341,-442.5 1345,-446.5 1345,-493.5"/>
+<polyline style="fill:none;stroke:blue;" points="1341,-489.5 1227,-489.5 "/>
+<polyline style="fill:none;stroke:blue;" points="1341,-489.5 1341,-442.5 "/>
+<polyline style="fill:none;stroke:blue;" points="1341,-489.5 1345,-493.5 "/>
+<text text-anchor="middle" x="1286" y="-461.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">Task to run</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge78" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:gray;" d="M1286,-534C1286,-524 1286,-514 1286,-504"/>
+<polygon style="fill:gray;stroke:gray;" points="1289.5,-504 1286,-494 1282.5,-504 1289.5,-504"/>
+</g>
+<!-- k3 -->
+<g id="node72" class="node"><title>k3</title>
+<polygon style="fill:#fff68f;stroke:black;" points="1347,-383.5 1229,-383.5 1225,-379.5 1225,-332.5 1343,-332.5 1347,-336.5 1347,-383.5"/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1225,-379.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1343,-332.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1347,-383.5 "/>
+<text text-anchor="middle" x="1286" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">Final target</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge80" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M1286,-442C1286,-428 1286,-410 1286,-394"/>
+<polygon style="fill:blue;stroke:blue;" points="1289.5,-394 1286,-384 1282.5,-394 1289.5,-394"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_downloads/gallery_snp_annotation_consequences.svg b/doc/_build/html/_downloads/gallery_snp_annotation_consequences.svg
new file mode 100644
index 0000000..c25b7fa
--- /dev/null
+++ b/doc/_build/html/_downloads/gallery_snp_annotation_consequences.svg
@@ -0,0 +1,471 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: Andreas Heger -->
+<!-- Title: tree Pages: 1 -->
+<svg width="864pt" height="420pt"
+ viewBox="0.00 0.00 432.00 210.08" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.311688 0.311688) rotate(0) translate(4 670)">
+<title>SNP Annotation Pipeline (consequences)</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-670 1382,-670 1382,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-639 1194,-639 1194,-16 8,-16"/>
+<text text-anchor="middle" x="601" y="-608" style="font-family:Times New Roman;font-size:20.00pt;fill:red;"><tspan font-weight = "bold">SNP Annotation Pipeline (consequences):</tspan></text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="1202,-324 1202,-658 1370,-658 1370,-324 1202,-324"/>
+<text text-anchor="middle" x="1286" y="-627" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t11 -->
+<g id="node2" class="node"><title>t11</title>
+<polygon style="fill:#90ee90;stroke:green;" points="851,-589 629,-589 625,-585 625,-553 847,-553 851,-557 851,-589"/>
+<polyline style="fill:none;stroke:green;" points="847,-585 625,-585 "/>
+<polyline style="fill:none;stroke:green;" points="847,-585 847,-553 "/>
+<polyline style="fill:none;stroke:green;" points="847,-585 851,-589 "/>
+<text text-anchor="middle" x="738" y="-564.5" style="font-family:Times New Roman;font-size:20.00;">loadTranscriptInformation</text>
+</g>
+<!-- t16 -->
+<g id="node4" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="917.809,-455.794 995,-428.5 1072.19,-455.794 1072.12,-499.956 917.881,-499.956 917.809,-455.794"/>
+<polygon style="fill:none;stroke:green;" points="913.804,-452.967 995,-424.257 1076.2,-452.967 1076.11,-503.956 913.887,-503.956 913.804,-452.967"/>
+<text text-anchor="middle" x="995" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildSelenoList</text>
+</g>
+<!-- t11->t16 -->
+<g id="edge25" class="edge"><title>t11->t16</title>
+<path style="fill:none;stroke:gray;" d="M783,-553C816,-539 864,-521 905,-504"/>
+<polygon style="fill:gray;stroke:gray;" points="906.283,-507.26 914,-500 903.44,-500.863 906.283,-507.26"/>
+</g>
+<!-- t64 -->
+<g id="node21" class="node"><title>t64</title>
+<polygon style="fill:white;stroke:gray;" points="672,-292 600,-292 596,-288 596,-256 668,-256 672,-260 672,-292"/>
+<polyline style="fill:none;stroke:gray;" points="668,-288 596,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="668,-288 668,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="668,-288 672,-292 "/>
+<text text-anchor="middle" x="634" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">prepare</text>
+</g>
+<!-- t11->t64 -->
+<g id="edge19" class="edge"><title>t11->t64</title>
+<path style="fill:none;stroke:gray;" d="M707,-553C665,-526 592,-472 567,-402 555,-365 548,-348 567,-314 572,-305 579,-299 587,-293"/>
+<polygon style="fill:gray;stroke:gray;" points="588.958,-295.916 596,-288 585.559,-289.797 588.958,-295.916"/>
+</g>
+<!-- t15 -->
+<g id="node3" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1068,-589 926,-589 922,-585 922,-553 1064,-553 1068,-557 1068,-589"/>
+<polyline style="fill:none;stroke:green;" points="1064,-585 922,-585 "/>
+<polyline style="fill:none;stroke:green;" points="1064,-585 1064,-553 "/>
+<polyline style="fill:none;stroke:green;" points="1064,-585 1068,-589 "/>
+<text text-anchor="middle" x="995" y="-564.5" style="font-family:Times New Roman;font-size:20.00;">loadProteinStats</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge3" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:gray;" d="M995,-553C995,-542 995,-528 995,-514"/>
+<polygon style="fill:gray;stroke:gray;" points="998.5,-514 995,-504 991.5,-514 998.5,-514"/>
+</g>
+<!-- t25 -->
+<g id="node6" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1186,-376 1074,-376 1070,-372 1070,-340 1182,-340 1186,-344 1186,-376"/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1070,-372 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1182,-340 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1186,-376 "/>
+<text text-anchor="middle" x="1128" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">makeEffects</text>
+</g>
+<!-- t16->t25 -->
+<g id="edge5" class="edge"><title>t16->t25</title>
+<path style="fill:none;stroke:gray;" d="M1032,-437C1053,-420 1079,-399 1098,-383"/>
+<polygon style="fill:gray;stroke:gray;" points="1100.78,-385.219 1106,-376 1096.17,-379.951 1100.78,-385.219"/>
+</g>
+<!-- t27 -->
+<g id="node49" class="node"><title>t27</title>
+<polygon style="fill:white;stroke:gray;" points="1052,-376 942,-376 938,-372 938,-340 1048,-340 1052,-344 1052,-376"/>
+<polyline style="fill:none;stroke:gray;" points="1048,-372 938,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="1048,-372 1048,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="1048,-372 1052,-376 "/>
+<text text-anchor="middle" x="995" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildAlleles</text>
+</g>
+<!-- t16->t27 -->
+<g id="edge53" class="edge"><title>t16->t27</title>
+<path style="fill:none;stroke:gray;" d="M995,-424C995,-412 995,-398 995,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="998.5,-386 995,-376 991.5,-386 998.5,-386"/>
+</g>
+<!-- t26 -->
+<g id="node8" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1186,-292 1082,-292 1078,-288 1078,-256 1182,-256 1186,-260 1186,-292"/>
+<polyline style="fill:none;stroke:green;" points="1182,-288 1078,-288 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-288 1182,-256 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-288 1186,-292 "/>
+<text text-anchor="middle" x="1132" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">loadEffects</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge7" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M1129,-340C1130,-329 1130,-315 1130,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="1133.49,-302.299 1131,-292 1126.52,-301.602 1133.49,-302.299"/>
+</g>
+<!-- t65 -->
+<g id="node10" class="node"><title>t65</title>
+<polygon style="fill:#fff68f;stroke:black;" points="1140,-234 1020,-234 1016,-230 1016,-198 1136,-198 1140,-202 1140,-234"/>
+<polyline style="fill:none;stroke:black;" points="1136,-230 1016,-230 "/>
+<polyline style="fill:none;stroke:black;" points="1136,-230 1136,-198 "/>
+<polyline style="fill:none;stroke:black;" points="1136,-230 1140,-234 "/>
+<text text-anchor="middle" x="1078" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">consequences</text>
+</g>
+<!-- t25->t65 -->
+<g id="edge27" class="edge"><title>t25->t65</title>
+<path style="fill:none;stroke:gray;" d="M1106,-340C1092,-328 1077,-311 1069,-292 1063,-277 1065,-258 1068,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="1071.48,-244.584 1071,-234 1064.77,-242.573 1071.48,-244.584"/>
+</g>
+<!-- t26->t65 -->
+<g id="edge9" class="edge"><title>t26->t65</title>
+<path style="fill:none;stroke:gray;" d="M1115,-256C1111,-252 1106,-247 1102,-242"/>
+<polygon style="fill:gray;stroke:gray;" points="1104.22,-239.221 1095,-234 1098.95,-243.831 1104.22,-239.221"/>
+</g>
+<!-- t68 -->
+<g id="node63" class="node"><title>t68</title>
+<polygon style="fill:white;stroke:gray;" points="765,-60 715,-60 711,-56 711,-24 761,-24 765,-28 765,-60"/>
+<polyline style="fill:none;stroke:gray;" points="761,-56 711,-56 "/>
+<polyline style="fill:none;stroke:gray;" points="761,-56 761,-24 "/>
+<polyline style="fill:none;stroke:gray;" points="761,-56 765,-60 "/>
+<text text-anchor="middle" x="738" y="-35.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">full</text>
+</g>
+<!-- t65->t68 -->
+<g id="edge71" class="edge"><title>t65->t68</title>
+<path style="fill:none;stroke:gray;" d="M1056,-198C1036,-182 1006,-158 977,-140 931,-110 919,-103 867,-82 837,-70 801,-59 775,-52"/>
+<polygon style="fill:gray;stroke:gray;" points="775.584,-48.5212 765,-49 773.573,-55.2259 775.584,-48.5212"/>
+</g>
+<!-- t8 -->
+<g id="node12" class="node"><title>t8</title>
+<polygon style="fill:white;stroke:gray;" points="809,-486 667,-486 663,-482 663,-450 805,-450 809,-454 809,-486"/>
+<polyline style="fill:none;stroke:gray;" points="805,-482 663,-482 "/>
+<polyline style="fill:none;stroke:gray;" points="805,-482 805,-450 "/>
+<polyline style="fill:none;stroke:gray;" points="805,-482 809,-486 "/>
+<text text-anchor="middle" x="736" y="-461.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildTranscripts</text>
+</g>
+<!-- t9 -->
+<g id="node13" class="node"><title>t9</title>
+<polygon style="fill:white;stroke:gray;" points="716,-376 580,-376 576,-372 576,-340 712,-340 716,-344 716,-376"/>
+<polyline style="fill:none;stroke:gray;" points="712,-372 576,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="712,-372 712,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="712,-372 716,-376 "/>
+<text text-anchor="middle" x="646" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadTranscripts</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge11" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:gray;" d="M721,-450C707,-432 684,-405 667,-384"/>
+<polygon style="fill:gray;stroke:gray;" points="669.8,-381.9 661,-376 664.2,-386.1 669.8,-381.9"/>
+</g>
+<!-- t8->t27 -->
+<g id="edge51" class="edge"><title>t8->t27</title>
+<path style="fill:none;stroke:gray;" d="M802,-450C840,-439 888,-422 929,-402 941,-397 952,-389 963,-382"/>
+<polygon style="fill:gray;stroke:gray;" points="965.1,-384.8 971,-376 960.9,-379.2 965.1,-384.8"/>
+</g>
+<!-- t9->t64 -->
+<g id="edge17" class="edge"><title>t9->t64</title>
+<path style="fill:none;stroke:gray;" d="M643,-340C641,-329 640,-314 638,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="641.478,-301.602 637,-292 634.512,-302.299 641.478,-301.602"/>
+</g>
+<!-- t4 -->
+<g id="node15" class="node"><title>t4</title>
+<polygon style="fill:white;stroke:gray;" points="553,-589 395,-589 391,-585 391,-553 549,-553 553,-557 553,-589"/>
+<polyline style="fill:none;stroke:gray;" points="549,-585 391,-585 "/>
+<polyline style="fill:none;stroke:gray;" points="549,-585 549,-553 "/>
+<polyline style="fill:none;stroke:gray;" points="549,-585 553,-589 "/>
+<text text-anchor="middle" x="472" y="-564.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildGeneRegions</text>
+</g>
+<!-- t5 -->
+<g id="node16" class="node"><title>t5</title>
+<polygon style="fill:white;stroke:gray;" points="525,-486 423,-486 419,-482 419,-450 521,-450 525,-454 525,-486"/>
+<polyline style="fill:none;stroke:gray;" points="521,-482 419,-482 "/>
+<polyline style="fill:none;stroke:gray;" points="521,-482 521,-450 "/>
+<polyline style="fill:none;stroke:gray;" points="521,-482 525,-486 "/>
+<text text-anchor="middle" x="472" y="-461.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildGenes</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge13" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:gray;" d="M472,-553C472,-537 472,-514 472,-496"/>
+<polygon style="fill:gray;stroke:gray;" points="475.5,-496 472,-486 468.5,-496 475.5,-496"/>
+</g>
+<!-- t7 -->
+<g id="node18" class="node"><title>t7</title>
+<polygon style="fill:white;stroke:gray;" points="538,-376 410,-376 406,-372 406,-340 534,-340 538,-344 538,-376"/>
+<polyline style="fill:none;stroke:gray;" points="534,-372 406,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="534,-372 534,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="534,-372 538,-376 "/>
+<text text-anchor="middle" x="472" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadGeneStats</text>
+</g>
+<!-- t5->t7 -->
+<g id="edge15" class="edge"><title>t5->t7</title>
+<path style="fill:none;stroke:gray;" d="M472,-450C472,-433 472,-407 472,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="475.5,-386 472,-376 468.5,-386 475.5,-386"/>
+</g>
+<!-- t7->t64 -->
+<g id="edge21" class="edge"><title>t7->t64</title>
+<path style="fill:none;stroke:gray;" d="M504,-340C518,-332 536,-322 552,-314 563,-308 575,-302 587,-297"/>
+<polygon style="fill:gray;stroke:gray;" points="588.958,-299.916 596,-292 585.559,-293.797 588.958,-299.916"/>
+</g>
+<!-- t6 -->
+<g id="node20" class="node"><title>t6</title>
+<polygon style="fill:white;stroke:gray;" points="920,-376 738,-376 734,-372 734,-340 916,-340 920,-344 920,-376"/>
+<polyline style="fill:none;stroke:gray;" points="916,-372 734,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="916,-372 916,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="916,-372 920,-376 "/>
+<text text-anchor="middle" x="827" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadGeneInformation</text>
+</g>
+<!-- t6->t64 -->
+<g id="edge23" class="edge"><title>t6->t64</title>
+<path style="fill:none;stroke:gray;" d="M786,-340C755,-326 713,-309 681,-294"/>
+<polygon style="fill:gray;stroke:gray;" points="682.56,-290.863 672,-290 679.717,-297.26 682.56,-290.863"/>
+</g>
+<!-- t64->t68 -->
+<g id="edge69" class="edge"><title>t64->t68</title>
+<path style="fill:none;stroke:gray;" d="M640,-256C650,-230 668,-181 686,-140 697,-115 712,-88 723,-69"/>
+<polygon style="fill:gray;stroke:gray;" points="726.203,-70.4414 728,-60 720.084,-67.0418 726.203,-70.4414"/>
+</g>
+<!-- t49 -->
+<g id="node28" class="node"><title>t49</title>
+<polygon style="fill:white;stroke:gray;" points="19.9571,-345.794 114,-318.5 208.043,-345.794 207.955,-389.956 20.0448,-389.956 19.9571,-345.794"/>
+<polygon style="fill:none;stroke:gray;" points="15.9511,-342.791 114,-314.335 212.049,-342.791 211.947,-393.956 16.0528,-393.956 15.9511,-342.791"/>
+<text text-anchor="middle" x="114" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildPolyphenInput</text>
+</g>
+<!-- t50 -->
+<g id="node29" class="node"><title>t50</title>
+<polygon style="fill:white;stroke:gray;" points="299,-292 105,-292 101,-288 101,-256 295,-256 299,-260 299,-292"/>
+<polyline style="fill:none;stroke:gray;" points="295,-288 101,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="295,-288 295,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="295,-288 299,-292 "/>
+<text text-anchor="middle" x="200" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildPolyphenFeatures</text>
+</g>
+<!-- t49->t50 -->
+<g id="edge29" class="edge"><title>t49->t50</title>
+<path style="fill:none;stroke:gray;" d="M148,-324C156,-316 165,-307 174,-299"/>
+<polygon style="fill:gray;stroke:gray;" points="176.404,-301.546 181,-292 171.454,-296.596 176.404,-301.546"/>
+</g>
+<!-- t52 -->
+<g id="node35" class="node"><title>t52</title>
+<polygon style="fill:white;stroke:gray;" points="176,-176 20,-176 16,-172 16,-140 172,-140 176,-144 176,-176"/>
+<polyline style="fill:none;stroke:gray;" points="172,-172 16,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="172,-172 172,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="172,-172 176,-176 "/>
+<text text-anchor="middle" x="96" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadPolyphenMap</text>
+</g>
+<!-- t49->t52 -->
+<g id="edge35" class="edge"><title>t49->t52</title>
+<path style="fill:none;stroke:gray;" d="M99,-319C96,-310 93,-301 92,-292 87,-256 89,-213 92,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="95.4875,-186.299 93,-176 88.5222,-185.602 95.4875,-186.299"/>
+</g>
+<!-- t57 -->
+<g id="node40" class="node"><title>t57</title>
+<polygon style="fill:white;stroke:gray;" points="442,-234 344,-234 340,-230 340,-198 438,-198 442,-202 442,-234"/>
+<polyline style="fill:none;stroke:gray;" points="438,-230 340,-230 "/>
+<polyline style="fill:none;stroke:gray;" points="438,-230 438,-198 "/>
+<polyline style="fill:none;stroke:gray;" points="438,-230 442,-234 "/>
+<text text-anchor="middle" x="391" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">runPanther</text>
+</g>
+<!-- t49->t57 -->
+<g id="edge39" class="edge"><title>t49->t57</title>
+<path style="fill:none;stroke:gray;" d="M174,-332C189,-325 206,-319 221,-314 259,-301 277,-316 308,-292 323,-280 316,-269 328,-256 334,-250 340,-245 347,-240"/>
+<polygon style="fill:gray;stroke:gray;" points="349.1,-242.8 355,-234 344.9,-237.2 349.1,-242.8"/>
+</g>
+<!-- t51 -->
+<g id="node31" class="node"><title>t51</title>
+<polygon style="fill:white;stroke:gray;" points="286,-234 174,-234 170,-230 170,-198 282,-198 286,-202 286,-234"/>
+<polyline style="fill:none;stroke:gray;" points="282,-230 170,-230 "/>
+<polyline style="fill:none;stroke:gray;" points="282,-230 282,-198 "/>
+<polyline style="fill:none;stroke:gray;" points="282,-230 286,-234 "/>
+<text text-anchor="middle" x="228" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">runPolyphen</text>
+</g>
+<!-- t50->t51 -->
+<g id="edge31" class="edge"><title>t50->t51</title>
+<path style="fill:none;stroke:gray;" d="M209,-256C211,-252 213,-248 215,-243"/>
+<polygon style="fill:gray;stroke:gray;" points="218.137,-244.56 219,-234 211.74,-241.717 218.137,-244.56"/>
+</g>
+<!-- t53 -->
+<g id="node33" class="node"><title>t53</title>
+<polygon style="fill:white;stroke:gray;" points="318,-176 198,-176 194,-172 194,-140 314,-140 318,-144 318,-176"/>
+<polyline style="fill:none;stroke:gray;" points="314,-172 194,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="314,-172 314,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="314,-172 318,-176 "/>
+<text text-anchor="middle" x="256" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadPolyphen</text>
+</g>
+<!-- t51->t53 -->
+<g id="edge33" class="edge"><title>t51->t53</title>
+<path style="fill:none;stroke:gray;" d="M237,-198C239,-194 241,-190 243,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="246.137,-186.56 247,-176 239.74,-183.717 246.137,-186.56"/>
+</g>
+<!-- t67 -->
+<g id="node45" class="node"><title>t67</title>
+<polygon style="fill:white;stroke:gray;" points="359,-118 293,-118 289,-114 289,-82 355,-82 359,-86 359,-118"/>
+<polyline style="fill:none;stroke:gray;" points="355,-114 289,-114 "/>
+<polyline style="fill:none;stroke:gray;" points="355,-114 355,-82 "/>
+<polyline style="fill:none;stroke:gray;" points="355,-114 359,-118 "/>
+<text text-anchor="middle" x="324" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">effects</text>
+</g>
+<!-- t53->t67 -->
+<g id="edge45" class="edge"><title>t53->t67</title>
+<path style="fill:none;stroke:gray;" d="M277,-140C283,-135 289,-130 295,-124"/>
+<polygon style="fill:gray;stroke:gray;" points="297.1,-126.8 303,-118 292.9,-121.2 297.1,-126.8"/>
+</g>
+<!-- t52->t67 -->
+<g id="edge47" class="edge"><title>t52->t67</title>
+<path style="fill:none;stroke:gray;" d="M167,-140C204,-131 248,-120 279,-111"/>
+<polygon style="fill:gray;stroke:gray;" points="279.881,-114.393 289,-109 278.508,-107.529 279.881,-114.393"/>
+</g>
+<!-- t12 -->
+<g id="node37" class="node"><title>t12</title>
+<polygon style="fill:white;stroke:gray;" points="388,-376 234,-376 230,-372 230,-340 384,-340 388,-344 388,-376"/>
+<polyline style="fill:none;stroke:gray;" points="384,-372 230,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="384,-372 384,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="384,-372 388,-376 "/>
+<text text-anchor="middle" x="309" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildPeptideFasta</text>
+</g>
+<!-- t56 -->
+<g id="node38" class="node"><title>t56</title>
+<polygon style="fill:white;stroke:gray;" points="473,-292 341,-292 337,-288 337,-256 469,-256 473,-260 473,-292"/>
+<polyline style="fill:none;stroke:gray;" points="469,-288 337,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="469,-288 469,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="469,-288 473,-292 "/>
+<text text-anchor="middle" x="405" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">preparePanther</text>
+</g>
+<!-- t12->t56 -->
+<g id="edge37" class="edge"><title>t12->t56</title>
+<path style="fill:none;stroke:gray;" d="M330,-340C343,-328 361,-312 376,-299"/>
+<polygon style="fill:gray;stroke:gray;" points="378.779,-301.219 384,-292 374.169,-295.951 378.779,-301.219"/>
+</g>
+<!-- t56->t57 -->
+<g id="edge41" class="edge"><title>t56->t57</title>
+<path style="fill:none;stroke:gray;" d="M401,-256C400,-252 399,-248 398,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="401.393,-243.119 396,-234 394.529,-244.492 401.393,-243.119"/>
+</g>
+<!-- t58 -->
+<g id="node43" class="node"><title>t58</title>
+<polygon style="fill:white;stroke:gray;" points="446,-176 340,-176 336,-172 336,-140 442,-140 446,-144 446,-176"/>
+<polyline style="fill:none;stroke:gray;" points="442,-172 336,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="442,-172 442,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="442,-172 446,-176 "/>
+<text text-anchor="middle" x="391" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadPanther</text>
+</g>
+<!-- t57->t58 -->
+<g id="edge43" class="edge"><title>t57->t58</title>
+<path style="fill:none;stroke:gray;" d="M391,-198C391,-194 391,-190 391,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="394.5,-186 391,-176 387.5,-186 394.5,-186"/>
+</g>
+<!-- t58->t67 -->
+<g id="edge49" class="edge"><title>t58->t67</title>
+<path style="fill:none;stroke:gray;" d="M370,-140C364,-135 359,-130 353,-125"/>
+<polygon style="fill:gray;stroke:gray;" points="354.831,-121.951 345,-118 350.221,-127.219 354.831,-121.951"/>
+</g>
+<!-- t67->t68 -->
+<g id="edge73" class="edge"><title>t67->t68</title>
+<path style="fill:none;stroke:gray;" d="M359,-95C437,-84 621,-58 701,-47"/>
+<polygon style="fill:gray;stroke:gray;" points="701.398,-50.4778 711,-46 700.701,-43.5125 701.398,-50.4778"/>
+</g>
+<!-- t28 -->
+<g id="node52" class="node"><title>t28</title>
+<polygon style="fill:white;stroke:gray;" points="1040,-292 936,-292 932,-288 932,-256 1036,-256 1040,-260 1040,-292"/>
+<polyline style="fill:none;stroke:gray;" points="1036,-288 932,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="1036,-288 1036,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="1036,-288 1040,-292 "/>
+<text text-anchor="middle" x="986" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadAlleles</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge55" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M993,-340C991,-329 990,-315 989,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="992.478,-301.602 988,-292 985.512,-302.299 992.478,-301.602"/>
+</g>
+<!-- t66 -->
+<g id="node58" class="node"><title>t66</title>
+<polygon style="fill:white;stroke:gray;" points="858,-118 794,-118 790,-114 790,-82 854,-82 858,-86 858,-118"/>
+<polyline style="fill:none;stroke:gray;" points="854,-114 790,-114 "/>
+<polyline style="fill:none;stroke:gray;" points="854,-114 854,-82 "/>
+<polyline style="fill:none;stroke:gray;" points="854,-114 858,-118 "/>
+<text text-anchor="middle" x="824" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">alleles</text>
+</g>
+<!-- t27->t66 -->
+<g id="edge61" class="edge"><title>t27->t66</title>
+<path style="fill:none;stroke:gray;" d="M972,-340C959,-331 944,-321 929,-314 833,-266 764,-321 701,-234 689,-216 689,-156 701,-140 711,-126 749,-115 780,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="780.881,-111.393 790,-106 779.508,-104.529 780.881,-111.393"/>
+</g>
+<!-- t29 -->
+<g id="node54" class="node"><title>t29</title>
+<polygon style="fill:white;stroke:gray;" points="978,-234 714,-234 710,-230 710,-198 974,-198 978,-202 978,-234"/>
+<polyline style="fill:none;stroke:gray;" points="974,-230 710,-230 "/>
+<polyline style="fill:none;stroke:gray;" points="974,-230 974,-198 "/>
+<polyline style="fill:none;stroke:gray;" points="974,-230 978,-234 "/>
+<text text-anchor="middle" x="844" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">summarizeAllelesPerTranscript</text>
+</g>
+<!-- t28->t29 -->
+<g id="edge57" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M942,-256C928,-250 912,-244 897,-238"/>
+<polygon style="fill:gray;stroke:gray;" points="898.56,-234.863 888,-234 895.717,-241.26 898.56,-234.863"/>
+</g>
+<!-- t28->t66 -->
+<g id="edge63" class="edge"><title>t28->t66</title>
+<path style="fill:none;stroke:gray;" d="M987,-256C987,-249 987,-241 987,-234 988,-190 991,-168 957,-140 932,-119 896,-109 868,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="868.492,-101.529 858,-103 867.119,-108.393 868.492,-101.529"/>
+</g>
+<!-- t30 -->
+<g id="node56" class="node"><title>t30</title>
+<polygon style="fill:white;stroke:gray;" points="938,-176 714,-176 710,-172 710,-140 934,-140 938,-144 938,-176"/>
+<polyline style="fill:none;stroke:gray;" points="934,-172 710,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="934,-172 934,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="934,-172 938,-176 "/>
+<text text-anchor="middle" x="824" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">summarizeAllelesPerGene</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge59" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M838,-198C837,-194 835,-190 834,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="836.964,-183.985 830,-176 830.464,-186.585 836.964,-183.985"/>
+</g>
+<!-- t29->t66 -->
+<g id="edge65" class="edge"><title>t29->t66</title>
+<path style="fill:none;stroke:gray;" d="M907,-198C926,-191 942,-184 947,-176 957,-162 957,-152 947,-140 938,-126 900,-115 868,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="868.492,-104.529 858,-106 867.119,-111.393 868.492,-104.529"/>
+</g>
+<!-- t30->t66 -->
+<g id="edge67" class="edge"><title>t30->t66</title>
+<path style="fill:none;stroke:gray;" d="M824,-140C824,-136 824,-132 824,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="827.5,-128 824,-118 820.5,-128 827.5,-128"/>
+</g>
+<!-- t66->t68 -->
+<g id="edge75" class="edge"><title>t66->t68</title>
+<path style="fill:none;stroke:gray;" d="M797,-82C790,-77 781,-71 773,-66"/>
+<polygon style="fill:gray;stroke:gray;" points="775.1,-63.2 765,-60 770.9,-68.8 775.1,-63.2"/>
+</g>
+<!-- k1 -->
+<g id="node69" class="node"><title>k1</title>
+<polygon style="fill:white;stroke:gray;" points="1347,-608 1229,-608 1225,-604 1225,-534 1343,-534 1347,-538 1347,-608"/>
+<polyline style="fill:none;stroke:gray;" points="1343,-604 1225,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="1343,-604 1343,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="1343,-604 1347,-608 "/>
+<text text-anchor="middle" x="1286" y="-576" style="font-family:Times New Roman;font-size:20.00;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="1286" y="-553" style="font-family:Times New Roman;font-size:20.00;fill:gray;">dependence</text>
+</g>
+<!-- k2 -->
+<g id="node70" class="node"><title>k2</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1362,-493.5 1214,-493.5 1210,-489.5 1210,-442.5 1358,-442.5 1362,-446.5 1362,-493.5"/>
+<polyline style="fill:none;stroke:green;" points="1358,-489.5 1210,-489.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-489.5 1358,-442.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-489.5 1362,-493.5 "/>
+<text text-anchor="middle" x="1286" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">Up-to-date task</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge78" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:gray;" d="M1286,-534C1286,-524 1286,-514 1286,-504"/>
+<polygon style="fill:gray;stroke:gray;" points="1289.5,-504 1286,-494 1282.5,-504 1289.5,-504"/>
+</g>
+<!-- k3 -->
+<g id="node72" class="node"><title>k3</title>
+<polygon style="fill:#fff68f;stroke:black;" points="1347,-383.5 1229,-383.5 1225,-379.5 1225,-332.5 1343,-332.5 1347,-336.5 1347,-383.5"/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1225,-379.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1343,-332.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1347,-383.5 "/>
+<text text-anchor="middle" x="1286" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">Final target</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge80" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:gray;" d="M1286,-442C1286,-428 1286,-410 1286,-394"/>
+<polygon style="fill:gray;stroke:gray;" points="1289.5,-394 1286,-384 1282.5,-394 1289.5,-394"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_downloads/play_with_colours.py b/doc/_build/html/_downloads/play_with_colours.py
new file mode 100644
index 0000000..5054aa4
--- /dev/null
+++ b/doc/_build/html/_downloads/play_with_colours.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python
+"""
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test
+#
+#
+# Copyright (c) 7/13/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+from optparse import OptionParser
+import StringIO
+
+parser = OptionParser(version="%play_with_colours 1.0",
+ usage = "\n\n play_with_colours "
+ "--flowchart FILE [options] "
+ "[--colour_scheme_index INT ] "
+ "[--key_legend_in_graph]")
+
+#
+# pipeline
+#
+parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+
+(options, remaining_args) = parser.parse_args()
+if not options.flowchart:
+ raise Exception("Missing mandatory parameter: --flowchart.\n")
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# up to date tasks
+#
+ at check_if_uptodate (lambda : (False, ""))
+def Up_to_date_task1(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Up_to_date_task2(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task2)
+def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task3)
+def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+#
+# Explicitly specified
+#
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+#
+# Tasks to run
+#
+ at follows(Explicitly_specified_task)
+def Task_to_run1(infile, outfile):
+ pass
+
+
+ at follows(Task_to_run1)
+def Task_to_run2(infile, outfile):
+ pass
+
+ at follows(Task_to_run2)
+def Task_to_run3(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Task_to_run2)
+def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+#
+# Final target
+#
+ at follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+def Final_target(infile, outfile):
+ pass
+
+#
+# Ignored downstream
+#
+ at follows(Final_target)
+def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+from collections import defaultdict
+custom_flow_chart_colour_scheme = defaultdict(dict)
+
+#
+# Base chart on this overall colour scheme index
+#
+custom_flow_chart_colour_scheme["colour_scheme_index"] = options.colour_scheme_index
+
+#
+# Overriding colours
+#
+if options.colour_scheme_index == None:
+ custom_flow_chart_colour_scheme["Vicious cycle"]["linecolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Pipeline"]["fontcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Key"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ custom_flow_chart_colour_scheme["Task to run"]["linecolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ custom_flow_chart_colour_scheme["Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Final target"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fillcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["color"] = "white"
+ custom_flow_chart_colour_scheme["Vicious cycle"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fillcolor"] = '"#B8CC6E"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Down stream"]["fillcolor"] = "white"
+ custom_flow_chart_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["color"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Task to run"]["fillcolor"] = '"#EBF3FF"'
+ custom_flow_chart_colour_scheme["Task to run"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+if __name__ == '__main__':
+ pipeline_printout_graph (
+
+ open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+
+ # final targets
+ [Final_target, Up_to_date_final_target],
+
+ # Explicitly specified tasks
+ [Explicitly_specified_task],
+
+ # Do we want key legend
+ no_key_legend = not options.key_legend_in_graph,
+
+ # Print all the task types whether used or not
+ minimal_key_legend = False,
+
+ user_colour_scheme = custom_flow_chart_colour_scheme,
+ pipeline_name = "Colour schemes")
+
+
+
+
+
+
+
+
diff --git a/doc/_build/html/_downloads/ruffus.pdf b/doc/_build/html/_downloads/ruffus.pdf
new file mode 100644
index 0000000..68836b8
Binary files /dev/null and b/doc/_build/html/_downloads/ruffus.pdf differ
diff --git a/doc/_build/html/_images/bestiary_combinatorics.png b/doc/_build/html/_images/bestiary_combinatorics.png
new file mode 100644
index 0000000..da876ce
Binary files /dev/null and b/doc/_build/html/_images/bestiary_combinatorics.png differ
diff --git a/doc/_build/html/_images/bestiary_decorators.png b/doc/_build/html/_images/bestiary_decorators.png
new file mode 100644
index 0000000..a4bb53d
Binary files /dev/null and b/doc/_build/html/_images/bestiary_decorators.png differ
diff --git a/doc/_build/html/_images/bestiary_transform.png b/doc/_build/html/_images/bestiary_transform.png
new file mode 100644
index 0000000..b184bfc
Binary files /dev/null and b/doc/_build/html/_images/bestiary_transform.png differ
diff --git a/doc/_build/html/_images/examples_bioinformatics_error.png b/doc/_build/html/_images/examples_bioinformatics_error.png
new file mode 100644
index 0000000..469905e
Binary files /dev/null and b/doc/_build/html/_images/examples_bioinformatics_error.png differ
diff --git a/doc/_build/html/_images/examples_bioinformatics_merge.jpg b/doc/_build/html/_images/examples_bioinformatics_merge.jpg
new file mode 100644
index 0000000..a83a17f
Binary files /dev/null and b/doc/_build/html/_images/examples_bioinformatics_merge.jpg differ
diff --git a/doc/_build/html/_images/examples_bioinformatics_pipeline.jpg b/doc/_build/html/_images/examples_bioinformatics_pipeline.jpg
new file mode 100644
index 0000000..fdd3839
Binary files /dev/null and b/doc/_build/html/_images/examples_bioinformatics_pipeline.jpg differ
diff --git a/doc/_build/html/_images/examples_bioinformatics_split.jpg b/doc/_build/html/_images/examples_bioinformatics_split.jpg
new file mode 100644
index 0000000..4a9c428
Binary files /dev/null and b/doc/_build/html/_images/examples_bioinformatics_split.jpg differ
diff --git a/doc/_build/html/_images/examples_bioinformatics_transform.jpg b/doc/_build/html/_images/examples_bioinformatics_transform.jpg
new file mode 100644
index 0000000..7a5aac5
Binary files /dev/null and b/doc/_build/html/_images/examples_bioinformatics_transform.jpg differ
diff --git a/doc/_build/html/_images/flowchart_colour_schemes.png b/doc/_build/html/_images/flowchart_colour_schemes.png
new file mode 100644
index 0000000..a576cf9
Binary files /dev/null and b/doc/_build/html/_images/flowchart_colour_schemes.png differ
diff --git a/doc/_build/html/_images/front_page_flowchart.png b/doc/_build/html/_images/front_page_flowchart.png
new file mode 100644
index 0000000..2c0a8d2
Binary files /dev/null and b/doc/_build/html/_images/front_page_flowchart.png differ
diff --git a/doc/_build/html/_images/gallery_big_pipeline.png b/doc/_build/html/_images/gallery_big_pipeline.png
new file mode 100644
index 0000000..28da4be
Binary files /dev/null and b/doc/_build/html/_images/gallery_big_pipeline.png differ
diff --git a/doc/_build/html/_images/gallery_dless.png b/doc/_build/html/_images/gallery_dless.png
new file mode 100644
index 0000000..e05c9e2
Binary files /dev/null and b/doc/_build/html/_images/gallery_dless.png differ
diff --git a/doc/_build/html/_images/gallery_rna_seq.png b/doc/_build/html/_images/gallery_rna_seq.png
new file mode 100644
index 0000000..19f16ed
Binary files /dev/null and b/doc/_build/html/_images/gallery_rna_seq.png differ
diff --git a/doc/_build/html/_images/gallery_snp_annotation.png b/doc/_build/html/_images/gallery_snp_annotation.png
new file mode 100644
index 0000000..c0e7ba3
Binary files /dev/null and b/doc/_build/html/_images/gallery_snp_annotation.png differ
diff --git a/doc/_build/html/_images/gallery_snp_annotation_consequences.png b/doc/_build/html/_images/gallery_snp_annotation_consequences.png
new file mode 100644
index 0000000..8e223bd
Binary files /dev/null and b/doc/_build/html/_images/gallery_snp_annotation_consequences.png differ
diff --git a/doc/_build/html/_images/history_html_flowchart.png b/doc/_build/html/_images/history_html_flowchart.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/_build/html/_images/history_html_flowchart.png differ
diff --git a/doc/_build/html/_images/history_html_flowchart.svg b/doc/_build/html/_images/history_html_flowchart.svg
new file mode 100644
index 0000000..268a187
--- /dev/null
+++ b/doc/_build/html/_images/history_html_flowchart.svg
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.36.0 (20140111.2315)
+ -->
+<!-- Title: Pipeline: Pages: 1 -->
+<svg width="673pt" height="633pt"
+ viewBox="0.00 0.00 673.00 633.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 629)">
+<title>Pipeline:</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-629 669,-629 669,4 -4,4"/>
+<g id="clust1" class="cluster"><title>clustertasks</title>
+<polygon fill="none" stroke="black" points="8,-8 8,-617 519,-617 519,-8 8,-8"/>
+<text text-anchor="middle" x="263.5" y="-589" font-family="Times,serif" font-size="30.00" fill="#ff3232">Pipeline:</text>
+</g>
+<g id="clust2" class="cluster"><title>clusterkey1</title>
+<polygon fill="#f6f4f4" stroke="#f6f4f4" points="527,-66 527,-610 657,-610 657,-66 527,-66"/>
+<text text-anchor="middle" x="592" y="-590" font-family="Times,serif" font-size="20.00">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node1" class="node"><title>t0</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="370.25,-568 215.75,-568 211.75,-564 211.75,-532 366.25,-532 370.25,-536 370.25,-568"/>
+<polyline fill="none" stroke="#006000" points="366.25,-564 211.75,-564 "/>
+<polyline fill="none" stroke="#006000" points="366.25,-564 366.25,-532 "/>
+<polyline fill="none" stroke="#006000" points="366.25,-564 370.25,-568 "/>
+<text text-anchor="middle" x="291" y="-545" font-family="Times,serif" font-size="20.00" fill="#006000">Up_to_date_task1</text>
+</g>
+<!-- t4 -->
+<g id="node2" class="node"><title>t4</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="293.25,-454 82.75,-454 78.75,-450 78.75,-418 289.25,-418 293.25,-422 293.25,-454"/>
+<polyline fill="none" stroke="#006000" points="289.25,-450 78.75,-450 "/>
+<polyline fill="none" stroke="#006000" points="289.25,-450 289.25,-418 "/>
+<polyline fill="none" stroke="#006000" points="289.25,-450 293.25,-454 "/>
+<text text-anchor="middle" x="186" y="-431" font-family="Times,serif" font-size="20.00" fill="#006000">Explicitly_specified_task</text>
+</g>
+<!-- t0->t4 -->
+<g id="edge1" class="edge"><title>t0->t4</title>
+<path fill="none" stroke="gray" d="M275.053,-531.99C257.476,-513.241 229.048,-482.918 209.048,-461.585"/>
+<polygon fill="gray" stroke="gray" points="211.49,-459.072 202.097,-454.17 206.383,-463.86 211.49,-459.072"/>
+</g>
+<!-- t1 -->
+<g id="node8" class="node"><title>t1</title>
+<g id="a_node8"><a xlink:href="http://cnn.com" xlink:title="What is this?<BR/> What <FONT COLOR="red">is</FONT>this???">
+<polygon fill="#b8cc6e" stroke="#006000" points="466,-490 328,-490 328,-486 324,-486 324,-482 328,-482 328,-390 324,-390 324,-386 328,-386 328,-382 466,-382 466,-490"/>
+<polyline fill="none" stroke="#006000" points="328,-486 332,-486 332,-482 328,-482 "/>
+<polyline fill="none" stroke="#006000" points="328,-390 332,-390 332,-386 328,-386 "/>
+<polygon fill="none" stroke="#006000" points="470,-494 324,-494 324,-490 320,-490 320,-486 324,-486 324,-386 320,-386 320,-382 324,-382 324,-378 470,-378 470,-494"/>
+<polyline fill="none" stroke="#006000" points="324,-490 328,-490 328,-486 324,-486 "/>
+<polyline fill="none" stroke="#006000" points="324,-386 328,-386 328,-382 324,-382 "/>
+<polygon fill="none" stroke="#006000" points="474,-498 320,-498 320,-494 316,-494 316,-490 320,-490 320,-382 316,-382 316,-378 320,-378 320,-374 474,-374 474,-498"/>
+<polyline fill="none" stroke="#006000" points="320,-494 324,-494 324,-490 320,-490 "/>
+<polyline fill="none" stroke="#006000" points="320,-382 324,-382 324,-378 320,-378 "/>
+<polygon fill="none" stroke="#006000" points="478,-502 316,-502 316,-498 312,-498 312,-494 316,-494 316,-378 312,-378 312,-374 316,-374 316,-370 478,-370 478,-502"/>
+<polyline fill="none" stroke="#006000" points="316,-498 320,-498 320,-494 316,-494 "/>
+<polyline fill="none" stroke="#006000" points="316,-378 320,-378 320,-374 316,-374 "/>
+<polygon fill="none" stroke="#006000" points="482,-506 312,-506 312,-502 308,-502 308,-498 312,-498 312,-374 308,-374 308,-370 312,-370 312,-366 482,-366 482,-506"/>
+<polyline fill="none" stroke="#006000" points="312,-502 316,-502 316,-498 312,-498 "/>
+<polyline fill="none" stroke="#006000" points="312,-374 316,-374 316,-370 312,-370 "/>
+<text text-anchor="start" x="345.5" y="-441" font-family="Times,serif" font-size="20.00" fill="#006000">What is this?</text>
+<text text-anchor="start" x="336" y="-421" font-family="Times,serif" font-size="20.00" fill="#006000"> What </text>
+<text text-anchor="start" x="390" y="-421" font-family="Times,serif" font-size="20.00" fill="red">is</text>
+<text text-anchor="start" x="403" y="-421" font-family="Times,serif" font-size="20.00" fill="#006000">this???</text>
+</a>
+</g>
+</g>
+<!-- t0->t1 -->
+<g id="edge8" class="edge"><title>t0->t1</title>
+<path fill="none" stroke="gray" d="M307.099,-531.99C312.118,-526.687 318.013,-520.458 324.344,-513.768"/>
+<polygon fill="gray" stroke="gray" points="327.033,-516.019 331.365,-506.351 321.949,-511.208 327.033,-516.019"/>
+</g>
+<!-- t5 -->
+<g id="node3" class="node"><title>t5</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="248.25,-340 127.75,-340 123.75,-336 123.75,-304 244.25,-304 248.25,-308 248.25,-340"/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-336 123.75,-336 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-336 244.25,-304 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-336 248.25,-340 "/>
+<text text-anchor="middle" x="186" y="-317" font-family="Times,serif" font-size="20.00" fill="#0044a0">Task_to_run1</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge2" class="edge"><title>t4->t5</title>
+<path fill="none" stroke="gray" d="M186,-417.99C186,-400.063 186,-371.555 186,-350.442"/>
+<polygon fill="gray" stroke="gray" points="189.5,-350.17 186,-340.17 182.5,-350.171 189.5,-350.17"/>
+</g>
+<!-- t6 -->
+<g id="node4" class="node"><title>t6</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="248.25,-267 127.75,-267 123.75,-263 123.75,-231 244.25,-231 248.25,-235 248.25,-267"/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-263 123.75,-263 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-263 244.25,-231 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-263 248.25,-267 "/>
+<text text-anchor="middle" x="186" y="-244" font-family="Times,serif" font-size="20.00" fill="#0044a0">Task_to_run2</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge3" class="edge"><title>t5->t6</title>
+<path fill="none" stroke="#0044a0" d="M186,-303.813C186,-295.789 186,-286.047 186,-277.069"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="189.5,-277.029 186,-267.029 182.5,-277.029 189.5,-277.029"/>
+</g>
+<!-- t8 -->
+<g id="node5" class="node"><title>t8</title>
+<polygon fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="312.25,-187 29.75,-187 25.75,-183 25.75,-151 308.25,-151 312.25,-155 312.25,-187"/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="308.25,-183 25.75,-183 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="308.25,-183 308.25,-151 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="308.25,-183 312.25,-187 "/>
+<text text-anchor="middle" x="169" y="-164" font-family="Times,serif" font-size="20.00" fill="#0044a0">Up_to_date_task_forced_to_rerun</text>
+</g>
+<!-- t6->t8 -->
+<g id="edge4" class="edge"><title>t6->t8</title>
+<path fill="none" stroke="#0044a0" d="M182.227,-230.689C180.092,-220.894 177.374,-208.422 174.958,-197.335"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="178.312,-196.288 172.762,-187.262 171.472,-197.778 178.312,-196.288"/>
+</g>
+<!-- t7 -->
+<g id="node6" class="node"><title>t7</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="455.25,-187 334.75,-187 330.75,-183 330.75,-151 451.25,-151 455.25,-155 455.25,-187"/>
+<polyline fill="none" stroke="#0044a0" points="451.25,-183 330.75,-183 "/>
+<polyline fill="none" stroke="#0044a0" points="451.25,-183 451.25,-151 "/>
+<polyline fill="none" stroke="#0044a0" points="451.25,-183 455.25,-187 "/>
+<text text-anchor="middle" x="393" y="-164" font-family="Times,serif" font-size="20.00" fill="#0044a0">Task_to_run3</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge5" class="edge"><title>t6->t7</title>
+<path fill="none" stroke="#0044a0" d="M232.616,-230.946C258.658,-221.366 291.675,-209.132 321,-198 327.007,-195.72 333.287,-193.312 339.528,-190.905"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="341.194,-194.014 349.257,-187.141 338.668,-187.485 341.194,-194.014"/>
+</g>
+<!-- t9 -->
+<g id="node7" class="node"><title>t9</title>
+<polygon fill="#efa03b" stroke="black" points="335.5,-114 228.5,-114 224.5,-110 224.5,-78 331.5,-78 335.5,-82 335.5,-114"/>
+<polyline fill="none" stroke="black" points="331.5,-110 224.5,-110 "/>
+<polyline fill="none" stroke="black" points="331.5,-110 331.5,-78 "/>
+<polyline fill="none" stroke="black" points="331.5,-110 335.5,-114 "/>
+<text text-anchor="middle" x="280" y="-91" font-family="Times,serif" font-size="20.00">Final_target</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge6" class="edge"><title>t8->t9</title>
+<path fill="none" stroke="#0044a0" d="M195.587,-150.994C210.307,-141.579 228.791,-129.755 244.707,-119.575"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="246.707,-122.45 253.245,-114.114 242.935,-116.554 246.707,-122.45"/>
+</g>
+<!-- t7->t9 -->
+<g id="edge7" class="edge"><title>t7->t9</title>
+<path fill="none" stroke="#0044a0" d="M365.934,-150.994C350.949,-141.579 332.132,-129.755 315.929,-119.575"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="317.567,-116.47 307.237,-114.114 313.842,-122.397 317.567,-116.47"/>
+</g>
+<!-- t10 -->
+<g id="node11" class="node"><title>t10</title>
+<polygon fill="white" stroke="gray" points="254.25,-52 19.75,-52 15.75,-48 15.75,-16 250.25,-16 254.25,-20 254.25,-52"/>
+<polyline fill="none" stroke="gray" points="250.25,-48 15.75,-48 "/>
+<polyline fill="none" stroke="gray" points="250.25,-48 250.25,-16 "/>
+<polyline fill="none" stroke="gray" points="250.25,-48 254.25,-52 "/>
+<text text-anchor="middle" x="135" y="-29" font-family="Times,serif" font-size="20.00" fill="gray">Downstream_task1_ignored</text>
+</g>
+<!-- t9->t10 -->
+<g id="edge11" class="edge"><title>t9->t10</title>
+<path fill="none" stroke="gray" d="M238.837,-77.9669C222.306,-71.1265 203.083,-63.1724 185.703,-55.9807"/>
+<polygon fill="gray" stroke="gray" points="186.845,-52.6652 176.266,-52.0757 184.168,-59.1333 186.845,-52.6652"/>
+</g>
+<!-- t11 -->
+<g id="node12" class="node"><title>t11</title>
+<polygon fill="white" stroke="gray" points="511.25,-52 276.75,-52 272.75,-48 272.75,-16 507.25,-16 511.25,-20 511.25,-52"/>
+<polyline fill="none" stroke="gray" points="507.25,-48 272.75,-48 "/>
+<polyline fill="none" stroke="gray" points="507.25,-48 507.25,-16 "/>
+<polyline fill="none" stroke="gray" points="507.25,-48 511.25,-52 "/>
+<text text-anchor="middle" x="392" y="-29" font-family="Times,serif" font-size="20.00" fill="gray">Downstream_task2_ignored</text>
+</g>
+<!-- t9->t11 -->
+<g id="edge12" class="edge"><title>t9->t11</title>
+<path fill="none" stroke="gray" d="M311.795,-77.9669C323.973,-71.4432 338.041,-63.9065 350.963,-56.9843"/>
+<polygon fill="gray" stroke="gray" points="352.963,-59.8831 360.0px5,-52.0757 349.658,-53.7128 352.963,-59.8831"/>
+</g>
+<!-- t2 -->
+<g id="node9" class="node"><title>t2</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="476.25,-340 321.75,-340 317.75,-336 317.75,-304 472.25,-304 476.25,-308 476.25,-340"/>
+<polyline fill="none" stroke="#006000" points="472.25,-336 317.75,-336 "/>
+<polyline fill="none" stroke="#006000" points="472.25,-336 472.25,-304 "/>
+<polyline fill="none" stroke="#006000" points="472.25,-336 476.25,-340 "/>
+<text text-anchor="middle" x="397" y="-317" font-family="Times,serif" font-size="20.00" fill="#006000">Up_to_date_task3</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge9" class="edge"><title>t1->t2</title>
+<path fill="none" stroke="gray" d="M397,-365.982C397,-360.465 397,-355.126 397,-350.196"/>
+<polygon fill="gray" stroke="gray" points="400.5,-350.148 397,-340.148 393.5,-350.148 400.5,-350.148"/>
+</g>
+<!-- t3 -->
+<g id="node10" class="node"><title>t3</title>
+<polygon fill="#efa03b" stroke="#006000" points="501.25,-267 296.75,-267 292.75,-263 292.75,-231 497.25,-231 501.25,-235 501.25,-267"/>
+<polyline fill="none" stroke="#006000" points="497.25,-263 292.75,-263 "/>
+<polyline fill="none" stroke="#006000" points="497.25,-263 497.25,-231 "/>
+<polyline fill="none" stroke="#006000" points="497.25,-263 501.25,-267 "/>
+<text text-anchor="middle" x="397" y="-244" font-family="Times,serif" font-size="20.00" fill="#006000">Up_to_date_final_target</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge10" class="edge"><title>t2->t3</title>
+<path fill="none" stroke="gray" d="M397,-303.813C397,-295.789 397,-286.047 397,-277.069"/>
+<polygon fill="gray" stroke="gray" points="400.5,-277.029 397,-267.029 393.5,-277.029 400.5,-277.029"/>
+</g>
+<!-- k1_1 -->
+<g id="node13" class="node"><title>k1_1</title>
+<polygon fill="white" stroke="gray" points="642.5,-571.5 545.5,-571.5 541.5,-567.5 541.5,-528.5 638.5,-528.5 642.5,-532.5 642.5,-571.5"/>
+<polyline fill="none" stroke="gray" points="638.5,-567.5 541.5,-567.5 "/>
+<polyline fill="none" stroke="gray" points="638.5,-567.5 638.5,-528.5 "/>
+<polyline fill="none" stroke="gray" points="638.5,-567.5 642.5,-571.5 "/>
+<text text-anchor="middle" x="592" y="-546.3" font-family="Times,serif" font-size="14.00" fill="gray">Down stream</text>
+</g>
+<!-- k2_1 -->
+<g id="node14" class="node"><title>k2_1</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="648.25,-457.5 539.75,-457.5 535.75,-453.5 535.75,-414.5 644.25,-414.5 648.25,-418.5 648.25,-457.5"/>
+<polyline fill="none" stroke="#006000" points="644.25,-453.5 535.75,-453.5 "/>
+<polyline fill="none" stroke="#006000" points="644.25,-453.5 644.25,-414.5 "/>
+<polyline fill="none" stroke="#006000" points="644.25,-453.5 648.25,-457.5 "/>
+<text text-anchor="middle" x="592" y="-432.3" font-family="Times,serif" font-size="14.00" fill="#006000">Up-to-date task</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge13" class="edge"><title>k1_1->k2_1</title>
+<path fill="none" stroke="gray" d="M592,-528.457C592,-511.513 592,-487.212 592,-467.917"/>
+<polygon fill="gray" stroke="gray" points="595.5,-467.792 592,-457.792 588.5,-467.792 595.5,-467.792"/>
+</g>
+<!-- k3_1 -->
+<g id="node15" class="node"><title>k3_1</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="637.25,-343.5 550.75,-343.5 546.75,-339.5 546.75,-300.5 633.25,-300.5 637.25,-304.5 637.25,-343.5"/>
+<polyline fill="none" stroke="#0044a0" points="633.25,-339.5 546.75,-339.5 "/>
+<polyline fill="none" stroke="#0044a0" points="633.25,-339.5 633.25,-300.5 "/>
+<polyline fill="none" stroke="#0044a0" points="633.25,-339.5 637.25,-343.5 "/>
+<text text-anchor="middle" x="592" y="-318.3" font-family="Times,serif" font-size="14.00" fill="#0044a0">Task to run</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge14" class="edge"><title>k2_1->k3_1</title>
+<path fill="none" stroke="gray" d="M592,-414.457C592,-397.513 592,-373.212 592,-353.917"/>
+<polygon fill="gray" stroke="gray" points="595.5,-353.792 592,-343.792 588.5,-353.792 595.5,-353.792"/>
+</g>
+<!-- k4_1 -->
+<g id="node16" class="node"><title>k4_1</title>
+<polygon fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="648.25,-278 539.75,-278 535.75,-274 535.75,-220 644.25,-220 648.25,-224 648.25,-278"/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="644.25,-274 535.75,-274 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="644.25,-274 644.25,-220 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="644.25,-274 648.25,-278 "/>
+<text text-anchor="middle" x="592" y="-252.8" font-family="Times,serif" font-size="14.00" fill="#0044a0">Up-to-date task</text>
+<text text-anchor="middle" x="592" y="-237.8" font-family="Times,serif" font-size="14.00" fill="#0044a0">forced to rerun</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge15" class="edge"><title>k3_1->k4_1</title>
+<path fill="none" stroke="#0044a0" d="M592,-300.464C592,-296.656 592,-292.568 592,-288.426"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="595.5,-288.149 592,-278.149 588.5,-288.149 595.5,-288.149"/>
+</g>
+<!-- k5_1 -->
+<g id="node17" class="node"><title>k5_1</title>
+<polygon fill="#efa03b" stroke="#006000" points="637.25,-198 550.75,-198 546.75,-194 546.75,-140 633.25,-140 637.25,-144 637.25,-198"/>
+<polyline fill="none" stroke="#006000" points="633.25,-194 546.75,-194 "/>
+<polyline fill="none" stroke="#006000" points="633.25,-194 633.25,-140 "/>
+<polyline fill="none" stroke="#006000" points="633.25,-194 637.25,-198 "/>
+<text text-anchor="middle" x="592" y="-172.8" font-family="Times,serif" font-size="14.00" fill="#006000">Up-to-date</text>
+<text text-anchor="middle" x="592" y="-157.8" font-family="Times,serif" font-size="14.00" fill="#006000">Final target</text>
+</g>
+<!-- k4_1->k5_1 -->
+<g id="edge16" class="edge"><title>k4_1->k5_1</title>
+<path fill="none" stroke="#0044a0" d="M592,-219.84C592,-216.085 592,-212.184 592,-208.292"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="595.5,-208.118 592,-198.118 588.5,-208.118 595.5,-208.118"/>
+</g>
+<!-- k6_1 -->
+<g id="node18" class="node"><title>k6_1</title>
+<polygon fill="#efa03b" stroke="black" points="637.25,-117.5 550.75,-117.5 546.75,-113.5 546.75,-74.5 633.25,-74.5 637.25,-78.5 637.25,-117.5"/>
+<polyline fill="none" stroke="black" points="633.25,-113.5 546.75,-113.5 "/>
+<polyline fill="none" stroke="black" points="633.25,-113.5 633.25,-74.5 "/>
+<polyline fill="none" stroke="black" points="633.25,-113.5 637.25,-117.5 "/>
+<text text-anchor="middle" x="592" y="-92.3" font-family="Times,serif" font-size="14.00">Final target</text>
+</g>
+<!-- k5_1->k6_1 -->
+<g id="edge17" class="edge"><title>k5_1->k6_1</title>
+<path fill="none" stroke="gray" d="M592,-139.939C592,-135.985 592,-131.903 592,-127.9"/>
+<polygon fill="gray" stroke="gray" points="595.5,-127.631 592,-117.632 588.5,-127.632 595.5,-127.631"/>
+</g>
+</g>
+</svg>
diff --git a/doc/_build/html/_images/history_html_flowchart1.png b/doc/_build/html/_images/history_html_flowchart1.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/_build/html/_images/history_html_flowchart1.png differ
diff --git a/doc/_build/html/_images/history_html_flowchart2.png b/doc/_build/html/_images/history_html_flowchart2.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/_build/html/_images/history_html_flowchart2.png differ
diff --git a/doc/_build/html/_images/jobs_limit.png b/doc/_build/html/_images/jobs_limit.png
new file mode 100644
index 0000000..4caac7d
Binary files /dev/null and b/doc/_build/html/_images/jobs_limit.png differ
diff --git a/doc/_build/html/_images/jobs_limit2.png b/doc/_build/html/_images/jobs_limit2.png
new file mode 100644
index 0000000..9455627
Binary files /dev/null and b/doc/_build/html/_images/jobs_limit2.png differ
diff --git a/doc/_build/html/_images/logo.jpg b/doc/_build/html/_images/logo.jpg
new file mode 100644
index 0000000..4d4b1ab
Binary files /dev/null and b/doc/_build/html/_images/logo.jpg differ
diff --git a/doc/_build/html/_images/manual_dependencies_flowchart1.png b/doc/_build/html/_images/manual_dependencies_flowchart1.png
new file mode 100644
index 0000000..126851d
Binary files /dev/null and b/doc/_build/html/_images/manual_dependencies_flowchart1.png differ
diff --git a/doc/_build/html/_images/manual_dependencies_flowchart2.png b/doc/_build/html/_images/manual_dependencies_flowchart2.png
new file mode 100644
index 0000000..a1911f1
Binary files /dev/null and b/doc/_build/html/_images/manual_dependencies_flowchart2.png differ
diff --git a/doc/_build/html/_images/manual_dependencies_flowchart3.png b/doc/_build/html/_images/manual_dependencies_flowchart3.png
new file mode 100644
index 0000000..0015bc5
Binary files /dev/null and b/doc/_build/html/_images/manual_dependencies_flowchart3.png differ
diff --git a/doc/_build/html/_images/manual_dependencies_flowchart_intro.png b/doc/_build/html/_images/manual_dependencies_flowchart_intro.png
new file mode 100644
index 0000000..d95dfe9
Binary files /dev/null and b/doc/_build/html/_images/manual_dependencies_flowchart_intro.png differ
diff --git a/doc/_build/html/_images/manual_dependencies_flowchart_intro1.png b/doc/_build/html/_images/manual_dependencies_flowchart_intro1.png
new file mode 100644
index 0000000..d95dfe9
Binary files /dev/null and b/doc/_build/html/_images/manual_dependencies_flowchart_intro1.png differ
diff --git a/doc/_build/html/_images/manual_exceptions.png b/doc/_build/html/_images/manual_exceptions.png
new file mode 100644
index 0000000..f1dd840
Binary files /dev/null and b/doc/_build/html/_images/manual_exceptions.png differ
diff --git a/doc/_build/html/_images/manual_follows1.png b/doc/_build/html/_images/manual_follows1.png
new file mode 100644
index 0000000..583ad7f
Binary files /dev/null and b/doc/_build/html/_images/manual_follows1.png differ
diff --git a/doc/_build/html/_images/manual_split_merge_example.jpg b/doc/_build/html/_images/manual_split_merge_example.jpg
new file mode 100644
index 0000000..ab9e5bb
Binary files /dev/null and b/doc/_build/html/_images/manual_split_merge_example.jpg differ
diff --git a/doc/_build/html/_images/manual_transform.png b/doc/_build/html/_images/manual_transform.png
new file mode 100644
index 0000000..9d89086
Binary files /dev/null and b/doc/_build/html/_images/manual_transform.png differ
diff --git a/doc/_build/html/_images/manual_transform_complex_outputs.png b/doc/_build/html/_images/manual_transform_complex_outputs.png
new file mode 100644
index 0000000..0b00542
Binary files /dev/null and b/doc/_build/html/_images/manual_transform_complex_outputs.png differ
diff --git a/doc/_build/html/_images/pretty_flowchart.png b/doc/_build/html/_images/pretty_flowchart.png
new file mode 100644
index 0000000..200338a
Binary files /dev/null and b/doc/_build/html/_images/pretty_flowchart.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_complex_flowchart.png b/doc/_build/html/_images/simple_tutorial_complex_flowchart.png
new file mode 100644
index 0000000..63b4d85
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_complex_flowchart.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_complex_flowchart1.png b/doc/_build/html/_images/simple_tutorial_complex_flowchart1.png
new file mode 100644
index 0000000..63b4d85
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_complex_flowchart1.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_complex_flowchart_error.png b/doc/_build/html/_images/simple_tutorial_complex_flowchart_error.png
new file mode 100644
index 0000000..cb1f604
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_complex_flowchart_error.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_complex_flowchart_error1.png b/doc/_build/html/_images/simple_tutorial_complex_flowchart_error1.png
new file mode 100644
index 0000000..cb1f604
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_complex_flowchart_error1.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_decorator_syntax.png b/doc/_build/html/_images/simple_tutorial_decorator_syntax.png
new file mode 100644
index 0000000..e8614f1
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_decorator_syntax.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_hello_world.png b/doc/_build/html/_images/simple_tutorial_hello_world.png
new file mode 100644
index 0000000..78aa28d
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_hello_world.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_hello_world_output.png b/doc/_build/html/_images/simple_tutorial_hello_world_output.png
new file mode 100644
index 0000000..c98ec63
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_hello_world_output.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_intro_follows.png b/doc/_build/html/_images/simple_tutorial_intro_follows.png
new file mode 100644
index 0000000..dc46350
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_intro_follows.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_merge1.png b/doc/_build/html/_images/simple_tutorial_merge1.png
new file mode 100644
index 0000000..fe1b6ab
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_merge1.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_merge2.png b/doc/_build/html/_images/simple_tutorial_merge2.png
new file mode 100644
index 0000000..04835b7
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_merge2.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_pipeline_printout1.png b/doc/_build/html/_images/simple_tutorial_pipeline_printout1.png
new file mode 100644
index 0000000..73c1cd9
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_pipeline_printout1.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_pipeline_printout11.png b/doc/_build/html/_images/simple_tutorial_pipeline_printout11.png
new file mode 100644
index 0000000..73c1cd9
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_pipeline_printout11.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_pipeline_printout2.png b/doc/_build/html/_images/simple_tutorial_pipeline_printout2.png
new file mode 100644
index 0000000..bfe04ad
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_pipeline_printout2.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_pipeline_printout21.png b/doc/_build/html/_images/simple_tutorial_pipeline_printout21.png
new file mode 100644
index 0000000..bfe04ad
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_pipeline_printout21.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_pipeline_printout3.png b/doc/_build/html/_images/simple_tutorial_pipeline_printout3.png
new file mode 100644
index 0000000..96e0343
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_pipeline_printout3.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_pipeline_printout31.png b/doc/_build/html/_images/simple_tutorial_pipeline_printout31.png
new file mode 100644
index 0000000..96e0343
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_pipeline_printout31.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_posttask.png b/doc/_build/html/_images/simple_tutorial_posttask.png
new file mode 100644
index 0000000..c5e75d5
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_posttask.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_split.png b/doc/_build/html/_images/simple_tutorial_split.png
new file mode 100644
index 0000000..2f9d8d5
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_split.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_split1.png b/doc/_build/html/_images/simple_tutorial_split1.png
new file mode 100644
index 0000000..2f9d8d5
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_split1.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_stage4_after.png b/doc/_build/html/_images/simple_tutorial_stage4_after.png
new file mode 100644
index 0000000..1095d57
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_stage4_after.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_stage4_before.png b/doc/_build/html/_images/simple_tutorial_stage4_before.png
new file mode 100644
index 0000000..1724e30
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_stage4_before.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_stage5_after.png b/doc/_build/html/_images/simple_tutorial_stage5_after.png
new file mode 100644
index 0000000..ac66986
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_stage5_after.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_stage5_before.png b/doc/_build/html/_images/simple_tutorial_stage5_before.png
new file mode 100644
index 0000000..96d5a4b
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_stage5_before.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_stage5_flowchart.png b/doc/_build/html/_images/simple_tutorial_stage5_flowchart.png
new file mode 100644
index 0000000..3714afb
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_stage5_flowchart.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_step4.png b/doc/_build/html/_images/simple_tutorial_step4.png
new file mode 100644
index 0000000..34a264a
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_step4.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_step5.png b/doc/_build/html/_images/simple_tutorial_step5.png
new file mode 100644
index 0000000..ce8b80e
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_step5.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_step5_sans_key.png b/doc/_build/html/_images/simple_tutorial_step5_sans_key.png
new file mode 100644
index 0000000..f14f775
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_step5_sans_key.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_transform.png b/doc/_build/html/_images/simple_tutorial_transform.png
new file mode 100644
index 0000000..6808632
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_transform.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_transform1.png b/doc/_build/html/_images/simple_tutorial_transform1.png
new file mode 100644
index 0000000..6808632
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_transform1.png differ
diff --git a/doc/_build/html/_images/simple_tutorial_zoo_animals_formatter_example.jpg b/doc/_build/html/_images/simple_tutorial_zoo_animals_formatter_example.jpg
new file mode 100644
index 0000000..cfbfcc4
Binary files /dev/null and b/doc/_build/html/_images/simple_tutorial_zoo_animals_formatter_example.jpg differ
diff --git a/doc/_build/html/_images/theoretical_pipeline_schematic.png b/doc/_build/html/_images/theoretical_pipeline_schematic.png
new file mode 100644
index 0000000..a84cd9d
Binary files /dev/null and b/doc/_build/html/_images/theoretical_pipeline_schematic.png differ
diff --git a/doc/_build/html/_images/transform_1_to_1_example.png b/doc/_build/html/_images/transform_1_to_1_example.png
new file mode 100644
index 0000000..ed794d6
Binary files /dev/null and b/doc/_build/html/_images/transform_1_to_1_example.png differ
diff --git a/doc/_build/html/_images/tutorial_key.jpg b/doc/_build/html/_images/tutorial_key.jpg
new file mode 100644
index 0000000..961aaf4
Binary files /dev/null and b/doc/_build/html/_images/tutorial_key.jpg differ
diff --git a/doc/_build/html/_images/tutorial_key.png b/doc/_build/html/_images/tutorial_key.png
new file mode 100644
index 0000000..a65505b
Binary files /dev/null and b/doc/_build/html/_images/tutorial_key.png differ
diff --git a/doc/_build/html/_images/tutorial_ruffus_files.jpg b/doc/_build/html/_images/tutorial_ruffus_files.jpg
new file mode 100644
index 0000000..00afb51
Binary files /dev/null and b/doc/_build/html/_images/tutorial_ruffus_files.jpg differ
diff --git a/doc/_build/html/_images/tutorial_step1_decorator_syntax.png b/doc/_build/html/_images/tutorial_step1_decorator_syntax.png
new file mode 100644
index 0000000..0fc68ab
Binary files /dev/null and b/doc/_build/html/_images/tutorial_step1_decorator_syntax.png differ
diff --git a/doc/_build/html/_images/wikimedia_bandedkrait.jpg b/doc/_build/html/_images/wikimedia_bandedkrait.jpg
new file mode 100644
index 0000000..e227f02
Binary files /dev/null and b/doc/_build/html/_images/wikimedia_bandedkrait.jpg differ
diff --git a/doc/_build/html/_images/wikimedia_cyl_ruffus.jpg b/doc/_build/html/_images/wikimedia_cyl_ruffus.jpg
new file mode 100644
index 0000000..d60e3bc
Binary files /dev/null and b/doc/_build/html/_images/wikimedia_cyl_ruffus.jpg differ
diff --git a/doc/_build/html/_modules/index.html b/doc/_build/html/_modules/index.html
new file mode 100644
index 0000000..51a44eb
--- /dev/null
+++ b/doc/_build/html/_modules/index.html
@@ -0,0 +1,163 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Overview: module code — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <h1>All modules for which code is available</h1>
+<ul><li><a href="ruffus/proxy_logger.html">ruffus.proxy_logger</a></li>
+<li><a href="ruffus/task.html">ruffus.task</a></li>
+</ul>
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../decorators/originate.html">@originate</a> </li>
+ <li><a href="../decorators/split.html">@split</a> </li>
+ <li><a href="../decorators/transform.html">@transform</a> </li>
+ <li><a href="../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../decorators/collate.html">@collate</a> </li>
+ <li><a href="../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../decorators/product.html">@product </a> </li>
+ <li><a href="../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/_modules/ruffus/proxy_logger.html b/doc/_build/html/_modules/ruffus/proxy_logger.html
new file mode 100644
index 0000000..37f5217
--- /dev/null
+++ b/doc/_build/html/_modules/ruffus/proxy_logger.html
@@ -0,0 +1,560 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>ruffus.proxy_logger — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="up" title="Module code" href="../index.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ <li><a href="../index.html" accesskey="U">Module code</a> »</li>
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <h1>Source code for ruffus.proxy_logger</h1><div class="highlight"><pre>
+<span class="c">#!/usr/bin/env python</span>
+<span class="c">################################################################################</span>
+<span class="c">#</span>
+<span class="c"># proxy_logger.py</span>
+<span class="c">#</span>
+<span class="c">#</span>
+<span class="c"># Copyright (c) 10/9/2009 Leo Goodstadt</span>
+<span class="c">#</span>
+<span class="c"># Permission is hereby granted, free of charge, to any person obtaining a copy</span>
+<span class="c"># of this software and associated documentation files (the "Software"), to deal</span>
+<span class="c"># in the Software without restriction, including without limitation the rights</span>
+<span class="c"># to use, copy, modify, merge, publish, distribute, sublicense, and/or sell</span>
+<span class="c"># copies of the Software, and to permit persons to whom the Software is</span>
+<span class="c"># furnished to do so, subject to the following conditions:</span>
+<span class="c">#</span>
+<span class="c"># The above copyright notice and this permission notice shall be included in</span>
+<span class="c"># all copies or substantial portions of the Software.</span>
+<span class="c">#</span>
+<span class="c"># THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR</span>
+<span class="c"># IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,</span>
+<span class="c"># FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE</span>
+<span class="c"># AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER</span>
+<span class="c"># LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,</span>
+<span class="c"># OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN</span>
+<span class="c"># THE SOFTWARE.</span>
+<span class="c">#################################################################################</span>
+<span class="sd">"""</span>
+<span class="sd">****************************************************************************</span>
+<span class="sd">Create proxy for logging for use with multiprocessing</span>
+<span class="sd">****************************************************************************</span>
+
+<span class="sd">These can be safely sent (marshalled) across process boundaries</span>
+
+
+<span class="sd">===========</span>
+<span class="sd">Example 1</span>
+<span class="sd">===========</span>
+
+<span class="sd"> Set up logger from config file::</span>
+
+<span class="sd"> from proxy_logger import *</span>
+<span class="sd"> args={}</span>
+<span class="sd"> args["config_file"] = "/my/config/file"</span>
+
+<span class="sd"> (logger_proxy,</span>
+<span class="sd"> logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,</span>
+<span class="sd"> "my_logger", args)</span>
+
+
+<span class="sd">===========</span>
+<span class="sd">Example 2</span>
+<span class="sd">===========</span>
+
+<span class="sd"> Log to file ``"/my/lg.log"`` in the specified format (Time / Log name / Event type / Message).</span>
+
+<span class="sd"> Delay file creation until first log.</span>
+
+<span class="sd"> Only log ``Debug`` messages</span>
+
+<span class="sd"> Other alternatives for the logging threshold (``args["level"]``) include</span>
+
+<span class="sd"> * ``logging.DEBUG``</span>
+<span class="sd"> * ``logging.INFO``</span>
+<span class="sd"> * ``logging.WARNING``</span>
+<span class="sd"> * ``logging.ERROR``</span>
+<span class="sd"> * ``logging.CRITICAL``</span>
+
+<span class="sd"> ::</span>
+
+<span class="sd"> from proxy_logger import *</span>
+<span class="sd"> args={}</span>
+<span class="sd"> args["file_name"] = "/my/lg.log"</span>
+<span class="sd"> args["formatter"] = "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"</span>
+<span class="sd"> args["delay"] = True</span>
+<span class="sd"> args["level"] = logging.DEBUG</span>
+
+<span class="sd"> (logger_proxy,</span>
+<span class="sd"> logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,</span>
+<span class="sd"> "my_logger", args)</span>
+
+<span class="sd">===========</span>
+<span class="sd">Example 3</span>
+<span class="sd">===========</span>
+
+<span class="sd"> Rotate log files every 20 Kb, with up to 10 backups.</span>
+<span class="sd"> ::</span>
+
+<span class="sd"> from proxy_logger import *</span>
+<span class="sd"> args={}</span>
+<span class="sd"> args["file_name"] = "/my/lg.log"</span>
+<span class="sd"> args["rotating"] = True</span>
+<span class="sd"> args["maxBytes"]=20000</span>
+<span class="sd"> args["backupCount"]=10</span>
+<span class="sd"> (logger_proxy,</span>
+<span class="sd"> logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,</span>
+<span class="sd"> "my_logger", args)</span>
+
+
+
+<span class="sd">==============</span>
+<span class="sd">To use:</span>
+<span class="sd">==============</span>
+
+<span class="sd"> ::</span>
+
+<span class="sd"> (logger_proxy,</span>
+<span class="sd"> logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,</span>
+<span class="sd"> "my_logger", args)</span>
+
+<span class="sd"> with logging_mutex:</span>
+<span class="sd"> my_log.debug('This is a debug message')</span>
+<span class="sd"> my_log.info('This is an info message')</span>
+<span class="sd"> my_log.warning('This is a warning message')</span>
+<span class="sd"> my_log.error('This is an error message')</span>
+<span class="sd"> my_log.critical('This is a critical error message')</span>
+<span class="sd"> my_log.log(logging.DEBUG, 'This is a debug message')</span>
+
+<span class="sd"> Note that the logging function ``exception()`` is not included because python</span>
+<span class="sd"> stack trace information is not well-marshalled</span>
+<span class="sd"> (`pickle <http://docs.python.org/library/pickle.html>`_\ d) across processes.</span>
+
+<span class="sd">"""</span>
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># imports</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span><span class="nn">os</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Shared logging</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">import</span> <span class="nn">multiprocessing</span>
+<span class="kn">import</span> <span class="nn">multiprocessing.managers</span>
+
+
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">logging.handlers</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># setup_logger</span>
+<span class="c">#</span>
+<div class="viewcode-block" id="setup_std_shared_logger"><a class="viewcode-back" href="../../proxy_logger.html#ruffus.proxy_logger.setup_std_shared_logger">[docs]</a><span class="k">def</span> <span class="nf">setup_std_shared_logger</span><span class="p">(</span><span class="n">logger_name</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> This function is a simple around wrapper around the python</span>
+<span class="sd"> `logging <http://docs.python.org/library/logging.html>`_ module.</span>
+
+<span class="sd"> This *logger_factory* example creates logging objects which can</span>
+<span class="sd"> then be managed by proxy via ``ruffus.proxy_logger.make_shared_logger_and_proxy()``</span>
+
+<span class="sd"> This can be:</span>
+
+<span class="sd"> * a `disk log file <http://docs.python.org/library/logging.html#filehandler>`_</span>
+<span class="sd"> * a automatically backed-up `(rotating) log <http://docs.python.org/library/logging.html#rotatingfilehandler>`_.</span>
+<span class="sd"> * any log specified in a `configuration file <http://docs.python.org/library/logging.html#configuration-file-format>`_</span>
+
+<span class="sd"> These are specified in the ``args`` dictionary forwarded by ``make_shared_logger_and_proxy()``</span>
+
+<span class="sd"> :param logger_name: name of log</span>
+<span class="sd"> :param args: a dictionary of parameters forwarded from ``make_shared_logger_and_proxy()``</span>
+
+<span class="sd"> Valid entries include:</span>
+
+<span class="sd"> .. describe:: "level"</span>
+
+<span class="sd"> Sets the `threshold <http://docs.python.org/library/logging.html#logging.Handler.setLevel>`_ for the logger.</span>
+
+<span class="sd"> .. describe:: "config_file"</span>
+
+<span class="sd"> The logging object is configured from this `configuration file <http://docs.python.org/library/logging.html#configuration-file-format>`_.</span>
+
+<span class="sd"> .. describe:: "file_name"</span>
+
+<span class="sd"> Sets disk log file name.</span>
+
+<span class="sd"> .. describe:: "rotating"</span>
+
+<span class="sd"> Chooses a `(rotating) log <http://docs.python.org/library/logging.html#rotatingfilehandler>`_.</span>
+
+<span class="sd"> .. describe:: "maxBytes"</span>
+
+<span class="sd"> Allows the file to rollover at a predetermined size</span>
+
+<span class="sd"> .. describe:: "backupCount"</span>
+
+<span class="sd"> If backupCount is non-zero, the system will save old log files by appending the extensions ``.1``, ``.2``, ``.3`` etc., to the filename.</span>
+
+<span class="sd"> .. describe:: "delay"</span>
+
+<span class="sd"> Defer file creation until the log is written to.</span>
+
+<span class="sd"> .. describe:: "formatter"</span>
+
+<span class="sd"> `Converts <http://docs.python.org/library/logging.html#formatter-objects>`_ the message to a logged entry string.</span>
+<span class="sd"> For example,</span>
+<span class="sd"> ::</span>
+
+<span class="sd"> "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"</span>
+
+
+
+<span class="sd"> """</span>
+
+ <span class="c">#</span>
+ <span class="c"># Log file name with logger level</span>
+ <span class="c">#</span>
+ <span class="n">new_logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="n">logger_name</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">has_key</span><span class="p">(</span><span class="s">"level"</span><span class="p">):</span>
+ <span class="n">new_logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="s">"level"</span><span class="p">])</span>
+
+ <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">has_key</span><span class="p">(</span><span class="s">"config_file"</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">fileConfig</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="s">"config_file"</span><span class="p">])</span>
+
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">args</span><span class="o">.</span><span class="n">has_key</span><span class="p">(</span><span class="s">"file_name"</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"Missing file name for log. Remember to set 'file_name'"</span><span class="p">)</span>
+ <span class="n">log_file_name</span> <span class="o">=</span> <span class="n">args</span><span class="p">[</span><span class="s">"file_name"</span><span class="p">]</span>
+
+ <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">has_key</span><span class="p">(</span><span class="s">"rotating"</span><span class="p">):</span>
+ <span class="n">rotating_args</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="c"># override default</span>
+ <span class="n">rotating_args</span><span class="p">[</span><span class="s">"maxBytes"</span><span class="p">]</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s">"maxBytes"</span><span class="p">,</span> <span class="mi">100000</span><span class="p">)</span>
+ <span class="n">rotating_args</span><span class="p">[</span><span class="s">"backupCount"</span><span class="p">]</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s">"backupCount"</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
+ <span class="n">handler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">handlers</span><span class="o">.</span><span class="n">RotatingFileHandler</span><span class="p">(</span> <span class="n">log_file_name</span><span class="p">,</span> <span class="o">**</span><span class="n">rotating_args</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">defer_loggin</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">has_key</span><span class="p">(</span><span class="s">"delay"</span><span class="p">)</span>
+ <span class="n">handler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">handlers</span><span class="o">.</span><span class="n">RotatingFileHandler</span><span class="p">(</span> <span class="n">log_file_name</span><span class="p">,</span> <span class="n">delay</span><span class="o">=</span><span class="n">defer_loggin</span><span class="p">)</span>
+
+ <span class="c"># %(name)s</span>
+ <span class="c"># %(levelno)s</span>
+ <span class="c"># %(levelname)s</span>
+ <span class="c"># %(pathname)s</span>
+ <span class="c"># %(filename)s</span>
+ <span class="c"># %(module)s</span>
+ <span class="c"># %(funcName)s</span>
+ <span class="c"># %(lineno)d</span>
+ <span class="c"># %(created)f</span>
+ <span class="c"># %(relativeCreated)d</span>
+ <span class="c"># %(asctime)s</span>
+ <span class="c"># %(msecs)d</span>
+ <span class="c"># %(thread)d</span>
+ <span class="c"># %(threadName)s</span>
+ <span class="c"># %(process)d</span>
+ <span class="c"># %(message)s</span>
+ <span class="c">#</span>
+ <span class="c"># E.g.: "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">has_key</span><span class="p">(</span><span class="s">"formatter"</span><span class="p">):</span>
+ <span class="n">my_formatter</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">Formatter</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="s">"formatter"</span><span class="p">])</span>
+ <span class="n">handler</span><span class="o">.</span><span class="n">setFormatter</span><span class="p">(</span><span class="n">my_formatter</span><span class="p">)</span>
+
+ <span class="n">new_logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">handler</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># This log object will be wrapped in proxy</span>
+ <span class="c">#</span>
+ <span class="k">return</span> <span class="n">new_logger</span>
+
+
+<span class="c">#</span>
+<span class="c"># Proxy object for logging</span>
+<span class="c"># Logging messages will be marshalled (forwarded) to the process where the</span>
+<span class="c"># shared log lives</span>
+<span class="c">#</span></div>
+<span class="k">class</span> <span class="nc">LoggerProxy</span><span class="p">(</span><span class="n">multiprocessing</span><span class="o">.</span><span class="n">managers</span><span class="o">.</span><span class="n">BaseProxy</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">debug</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'debug'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">log</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'log'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'info'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">warning</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'warning'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'error'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">critical</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'critical'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">log</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_callmethod</span><span class="p">(</span><span class="s">'log'</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">__str__</span> <span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s">"<LoggingProxy>"</span>
+
+<span class="c">#</span>
+<span class="c"># Register the setup_logger function as a proxy for setup_logger</span>
+<span class="c">#</span>
+<span class="c"># We use SyncManager as a base class so we can get a lock proxy for synchronising</span>
+<span class="c"># logging later on</span>
+<span class="c">#</span>
+<span class="k">class</span> <span class="nc">LoggingManager</span><span class="p">(</span><span class="n">multiprocessing</span><span class="o">.</span><span class="n">managers</span><span class="o">.</span><span class="n">SyncManager</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Logging manager sets up its own process and will create the real Log object there</span>
+<span class="sd"> We refer to this (real) log via proxies</span>
+<span class="sd"> """</span>
+ <span class="k">pass</span>
+
+
+
+
+<div class="viewcode-block" id="make_shared_logger_and_proxy"><a class="viewcode-back" href="../../proxy_logger.html#ruffus.proxy_logger.make_shared_logger_and_proxy">[docs]</a><span class="k">def</span> <span class="nf">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">logger_factory</span><span class="p">,</span> <span class="n">logger_name</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Make a `logging <http://docs.python.org/library/logging.html>`_ object</span>
+<span class="sd"> called "\ ``logger_name``\ " by calling ``logger_factory``\ (``args``\ )</span>
+
+<span class="sd"> This function will return a proxy to the shared logger which can be copied to jobs</span>
+<span class="sd"> in other processes, as well as a mutex which can be used to prevent simultaneous logging</span>
+<span class="sd"> from happening.</span>
+
+<span class="sd"> :param logger_factory: functions which creates and returns an object with the</span>
+<span class="sd"> `logging <http://docs.python.org/library/logging.html>`_ interface.</span>
+<span class="sd"> ``setup_std_shared_logger()`` is one example of a logger factory.</span>
+<span class="sd"> :param logger_name: name of log</span>
+<span class="sd"> :param args: parameters passed (as a single argument) to ``logger_factory``</span>
+<span class="sd"> :returns: a proxy to the shared logger which can be copied to jobs in other processes</span>
+<span class="sd"> :returns: a mutex which can be used to prevent simultaneous logging from happening</span>
+
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># make shared log and proxy</span>
+ <span class="c">#</span>
+ <span class="n">manager</span> <span class="o">=</span> <span class="n">LoggingManager</span><span class="p">()</span>
+ <span class="n">manager</span><span class="o">.</span><span class="n">register</span><span class="p">(</span> <span class="s">'setup_logger'</span><span class="p">,</span>
+ <span class="n">logger_factory</span><span class="p">,</span>
+ <span class="n">proxytype</span><span class="o">=</span><span class="n">LoggerProxy</span><span class="p">,</span>
+ <span class="n">exposed</span> <span class="o">=</span> <span class="p">(</span> <span class="s">'critical'</span><span class="p">,</span> <span class="s">'log'</span><span class="p">,</span>
+ <span class="s">'info'</span><span class="p">,</span> <span class="s">'debug'</span><span class="p">,</span> <span class="s">'warning'</span><span class="p">,</span> <span class="s">'error'</span><span class="p">))</span>
+ <span class="n">manager</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
+ <span class="n">logger_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">setup_logger</span><span class="p">(</span><span class="n">logger_name</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># make sure we are not logging at the same time in different processes</span>
+ <span class="c">#</span>
+ <span class="n">logging_mutex</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Lock</span><span class="p">()</span>
+
+ <span class="k">return</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span>
+
+
+</div>
+<span class="kn">import</span> <span class="nn">unittest</span><span class="o">,</span> <span class="nn">os</span><span class="o">,</span><span class="nn">sys</span>
+<span class="kn">from</span> <span class="nn">proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">traceback</span>
+
+
+<span class="k">class</span> <span class="nc">Test_Logging</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
+
+
+
+ <span class="k">def</span> <span class="nf">test_rotating_log</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> test rotating via proxy</span>
+<span class="sd"> """</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"/tmp/lg.log"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">args</span><span class="o">=</span><span class="p">{}</span>
+ <span class="n">args</span><span class="p">[</span><span class="s">"file_name"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"/tmp/lg.log"</span>
+ <span class="n">args</span><span class="p">[</span><span class="s">"rotating"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="n">args</span><span class="p">[</span><span class="s">"maxBytes"</span><span class="p">]</span><span class="o">=</span><span class="mi">20000</span>
+ <span class="n">args</span><span class="p">[</span><span class="s">"backupCount"</span><span class="p">]</span><span class="o">=</span><span class="mi">10</span>
+ <span class="c">#args["level"]= logging.INFO</span>
+ <span class="p">(</span><span class="n">my_log</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+ <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">'This is a debug message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">'This is an info message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s">'This is a warning message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">'This is an error message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">critical</span><span class="p">(</span><span class="s">'This is a critical error message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">ERROR</span><span class="p">,</span> <span class="s">'This is a debug message'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">assert_</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s">"/tmp/lg.log"</span><span class="p">)</span> <span class="o">.</span><span class="n">read</span><span class="p">()</span> <span class="o">==</span> \
+<span class="sd">"""This is a warning message</span>
+<span class="sd">This is an error message</span>
+<span class="sd">This is a critical error message</span>
+<span class="sd">This is a debug message</span>
+<span class="sd">"""</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># debug code not run if called as a module</span>
+<span class="c">#</span>
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s">"--debug"</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="s">"--debug"</span><span class="p">)</span>
+ <span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
+</pre></div>
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ <li><a href="../index.html" >Module code</a> »</li>
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/_modules/ruffus/task.html b/doc/_build/html/_modules/ruffus/task.html
new file mode 100644
index 0000000..d31c4aa
--- /dev/null
+++ b/doc/_build/html/_modules/ruffus/task.html
@@ -0,0 +1,3953 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>ruffus.task — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="up" title="Module code" href="../index.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ <li><a href="../index.html" accesskey="U">Module code</a> »</li>
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <h1>Source code for ruffus.task</h1><div class="highlight"><pre>
+<span class="c">#!/usr/bin/env python</span>
+<span class="c">################################################################################</span>
+<span class="c">#</span>
+<span class="c">#</span>
+<span class="c"># task.py</span>
+<span class="c">#</span>
+<span class="c"># Copyright (c) 10/9/2009 Leo Goodstadt</span>
+<span class="c">#</span>
+<span class="c"># Permission is hereby granted, free of charge, to any person obtaining a copy</span>
+<span class="c"># of this software and associated documentation files (the "Software"), to deal</span>
+<span class="c"># in the Software without restriction, including without limitation the rights</span>
+<span class="c"># to use, copy, modify, merge, publish, distribute, sublicense, and/or sell</span>
+<span class="c"># copies of the Software, and to permit persons to whom the Software is</span>
+<span class="c"># furnished to do so, subject to the following conditions:</span>
+<span class="c">#</span>
+<span class="c"># The above copyright notice and this permission notice shall be included in</span>
+<span class="c"># all copies or substantial portions of the Software.</span>
+<span class="c">#</span>
+<span class="c"># THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR</span>
+<span class="c"># IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,</span>
+<span class="c"># FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE</span>
+<span class="c"># AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER</span>
+<span class="c"># LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,</span>
+<span class="c"># OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN</span>
+<span class="c"># THE SOFTWARE.</span>
+<span class="c">#################################################################################</span>
+<span class="sd">"""</span>
+
+<span class="sd">********************************************</span>
+<span class="sd">:mod:`ruffus.task` -- Overview</span>
+<span class="sd">********************************************</span>
+
+<span class="sd">.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk></span>
+
+<span class="sd">Initial implementation of @active_if by Jacob Biesinger</span>
+
+<span class="sd">============================</span>
+<span class="sd">Decorator syntax:</span>
+<span class="sd">============================</span>
+
+<span class="sd"> Pipelined tasks are created by "decorating" a function with the following syntax::</span>
+
+<span class="sd"> def func_a():</span>
+<span class="sd"> pass</span>
+
+<span class="sd"> @follows(func_a)</span>
+<span class="sd"> def func_b ():</span>
+<span class="sd"> pass</span>
+
+
+<span class="sd"> Each task is a single function which is applied one or more times to a list of parameters</span>
+<span class="sd"> (typically input files to produce a list of output files).</span>
+
+<span class="sd"> Each of these is a separate, independent job (sharing the same code) which can be</span>
+<span class="sd"> run in parallel.</span>
+
+
+<span class="sd">============================</span>
+<span class="sd">Running the pipeline</span>
+<span class="sd">============================</span>
+<span class="sd"> To run the pipeline::</span>
+
+<span class="sd"> pipeline_run(target_tasks, forcedtorun_tasks = [], multiprocess = 1,</span>
+<span class="sd"> logger = stderr_logger,</span>
+<span class="sd"> gnu_make_maximal_rebuild_mode = True,</span>
+<span class="sd"> cleanup_log = "../cleanup.log")</span>
+
+<span class="sd"> pipeline_cleanup(cleanup_log = "../cleanup.log")</span>
+
+
+
+
+
+
+<span class="sd">"""</span>
+
+
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">with_statement</span>
+<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span><span class="nn">sys</span><span class="o">,</span><span class="nn">copy</span><span class="o">,</span> <span class="nn">multiprocessing</span>
+<span class="c">#from collections import namedtuple</span>
+<span class="kn">import</span> <span class="nn">collections</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># imports</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">re</span>
+<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
+<span class="kn">from</span> <span class="nn">multiprocessing</span> <span class="kn">import</span> <span class="n">Pool</span>
+<span class="kn">from</span> <span class="nn">multiprocessing.pool</span> <span class="kn">import</span> <span class="n">ThreadPool</span>
+<span class="kn">import</span> <span class="nn">traceback</span>
+<span class="kn">import</span> <span class="nn">types</span>
+<span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">imap</span>
+<span class="kn">import</span> <span class="nn">textwrap</span>
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="kn">from</span> <span class="nn">multiprocessing.managers</span> <span class="kn">import</span> <span class="n">SyncManager</span>
+<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">contextmanager</span>
+<span class="kn">import</span> <span class="nn">cPickle</span> <span class="kn">as</span> <span class="nn">pickle</span>
+<span class="kn">import</span> <span class="nn">dbdict</span>
+
+
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">sys</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="s">"."</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">graph</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">print_dependencies</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus_exceptions</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus_utility</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">file_name_parameters</span> <span class="kn">import</span> <span class="o">*</span>
+
+
+<span class="c">#</span>
+<span class="c"># use simplejson in place of json for python < 2.6</span>
+<span class="c">#</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">json</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">simplejson</span>
+ <span class="n">json</span> <span class="o">=</span> <span class="n">simplejson</span>
+<span class="n">dumps</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span>
+
+<span class="kn">import</span> <span class="nn">Queue</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c">#</span>
+<span class="c"># light weight logging objects</span>
+<span class="c">#</span>
+<span class="c">#</span>
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<div class="viewcode-block" id="t_black_hole_logger"><a class="viewcode-back" href="../../task.html#ruffus.task.t_black_hole_logger">[docs]</a><span class="k">class</span> <span class="nc">t_black_hole_logger</span><span class="p">:</span>
+ <span class="sd">"""</span>
+<span class="sd"> Does nothing!</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">info</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="k">def</span> <span class="nf">debug</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="k">def</span> <span class="nf">warning</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="k">def</span> <span class="nf">error</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+</div>
+<div class="viewcode-block" id="t_stderr_logger"><a class="viewcode-back" href="../../task.html#ruffus.task.t_stderr_logger">[docs]</a><span class="k">class</span> <span class="nc">t_stderr_logger</span><span class="p">:</span>
+ <span class="sd">"""</span>
+<span class="sd"> Everything to stderr</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">unique_prefix</span> <span class="o">=</span> <span class="s">""</span>
+ <span class="k">def</span> <span class="nf">add_unique_prefix</span> <span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="kn">import</span> <span class="nn">random</span>
+ <span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">unique_prefix</span><span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">1000</span><span class="p">))</span> <span class="o">+</span> <span class="s">" "</span>
+ <span class="k">def</span> <span class="nf">info</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">unique_prefix</span> <span class="o">+</span> <span class="n">message</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">warning</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n\n</span><span class="s">"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">unique_prefix</span> <span class="o">+</span> <span class="s">"WARNING:</span><span class="se">\n</span><span class="s"> "</span> <span [...]
+ <span class="k">def</span> <span class="nf">error</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n\n</span><span class="s">"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">unique_prefix</span> <span class="o">+</span> <span class="s">"ERROR:</span><span class="se">\n</span><span class="s"> "</span> <span cl [...]
+ <span class="k">def</span> <span class="nf">debug</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">unique_prefix</span> <span class="o">+</span> <span class="n">message</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+</div>
+<span class="k">class</span> <span class="nc">t_stream_logger</span><span class="p">:</span>
+ <span class="sd">"""</span>
+<span class="sd"> Everything to stderr</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stream</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">stream</span> <span class="o">=</span> <span class="n">stream</span>
+ <span class="k">def</span> <span class="nf">info</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">message</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">warning</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n\n</span><span class="s">WARNING:</span><span class="se">\n</span><span class="s"> "</span> <span class="o">+</span> <span class="n">message</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n\n</span><span class="s">"</span><span class="p [...]
+ <span class="k">def</span> <span class="nf">error</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n\n</span><span class="s">ERROR:</span><span class="se">\n</span><span class="s"> "</span> <span class="o">+</span> <span class="n">message</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n\n</span><span class="s">"</span><span class="p"> [...]
+ <span class="k">def</span> <span class="nf">debug</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">message</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+<span class="n">black_hole_logger</span> <span class="o">=</span> <span class="n">t_black_hole_logger</span><span class="p">()</span>
+<span class="n">stderr_logger</span> <span class="o">=</span> <span class="n">t_stderr_logger</span><span class="p">()</span>
+
+<span class="k">class</span> <span class="nc">t_verbose_logger</span><span class="p">:</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">runtime_data</span> <span class="o">=</span> <span class="n">runtime_data</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># logging helper function</span>
+<span class="c">#</span>
+<span class="c">#________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="n">message_level</span><span class="p">,</span> <span class="n">verbose_level</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> writes to log if message_level > verbose level</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">message_level</span> <span class="o"><=</span> <span class="n">verbose_level</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
+
+
+
+
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+<span class="c"># queue management objects</span>
+
+<span class="c"># inserted into queue like job parameters to control multi-processing queue</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># fake parameters to signal in queue</span>
+<span class="k">class</span> <span class="nc">all_tasks_complete</span><span class="p">:</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">waiting_for_more_tasks_to_complete</span><span class="p">:</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="c"># synchronisation data</span>
+<span class="c">#</span>
+<span class="c">#SyncManager()</span>
+<span class="c">#syncmanager.start()</span>
+
+<span class="c">#</span>
+<span class="c"># do nothing semaphore</span>
+<span class="c">#</span>
+<span class="nd">@contextmanager</span>
+<span class="k">def</span> <span class="nf">do_nothing_semaphore</span><span class="p">():</span>
+ <span class="k">yield</span>
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># task_decorator</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">class</span> <span class="nc">task_decorator</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Adds task to the "pipeline_task" attribute of this function but</span>
+<span class="sd"> otherwise leaves function untouched</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">decoratorArgs</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> saves decorator arguments</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">args</span> <span class="o">=</span> <span class="n">decoratorArgs</span>
+
+ <span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> calls func in task with the same name as the class</span>
+<span class="sd"> """</span>
+ <span class="c"># add task as attribute of this function</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="s">"pipeline_task"</span><span class="p">):</span>
+ <span class="n">func</span><span class="o">.</span><span class="n">pipeline_task</span> <span class="o">=</span> <span class="n">_task</span><span class="o">.</span><span class="n">create_task</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
+
+
+ <span class="c"># call the method called</span>
+ <span class="c"># "task.task_decorator"</span>
+ <span class="c"># where "task_decorator" is the name of this class</span>
+ <span class="n">decorator_function_name</span> <span class="o">=</span> <span class="s">"task_"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="n">task_decorator_function</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">pipeline_task</span><span class="p">,</span> <span class="n">decorator_function_name</span><span class="p">)</span>
+ <span class="n">task_decorator_function</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># don't change the function so we can call it unaltered</span>
+ <span class="c">#</span>
+ <span class="k">return</span> <span class="n">func</span>
+
+
+<span class="c">#</span>
+<span class="c"># Basic decorators</span>
+<span class="c">#</span>
+<span class="k">class</span> <span class="nc">follows</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">files</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+
+
+
+
+<span class="c">#</span>
+<span class="c"># Core</span>
+<span class="c">#</span>
+<span class="k">class</span> <span class="nc">split</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">transform</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">subdivide</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">originate</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">merge</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">posttask</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">jobs_limit</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="c"># Advanced</span>
+<span class="c">#</span>
+<span class="k">class</span> <span class="nc">collate</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">active_if</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c">#</span>
+<span class="c"># Esoteric</span>
+<span class="c">#</span>
+<span class="k">class</span> <span class="nc">check_if_uptodate</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="k">class</span> <span class="nc">parallel</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="c"># Obsolete</span>
+<span class="c">#</span>
+<span class="k">class</span> <span class="nc">files_re</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># indicator objects</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># mkdir</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">class</span> <span class="nc">mkdir</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="c">#def __init__ (self, *args):</span>
+ <span class="c"># self.args = args</span>
+ <span class="k">pass</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># touch_file</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">class</span> <span class="nc">touch_file</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">args</span> <span class="o">=</span> <span class="n">args</span>
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># inputs</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">class</span> <span class="nc">inputs</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">args</span> <span class="o">=</span> <span class="n">args</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># add_inputs</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">class</span> <span class="nc">add_inputs</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">args</span> <span class="o">=</span> <span class="n">args</span>
+
+<span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># job descriptors</span>
+
+<span class="c"># given parameters, returns strings describing job</span>
+<span class="c"># First returned parameter is string in strong form</span>
+<span class="c"># Second returned parameter is a list of strings for input, output and extra parameters</span>
+<span class="c"># intended to be reformatted with indentation</span>
+<span class="c"># main use in error logging</span>
+
+<span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">def</span> <span class="nf">generic_job_descriptor</span> <span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">param</span> <span class="ow">in</span> <span class="p">([],</span> <span class="bp">None</span><span class="p">):</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="s">"Job"</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="s">"Job = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">ignore_unknown_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">m</span><span class="p">,</span> <span class="p">[</span><span class="n">m</span><span class="p">]</span>
+
+<span class="k">def</span> <span class="nf">io_files_job_descriptor</span> <span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+ <span class="n">extra_param</span> <span class="o">=</span> <span class="s">", "</span> <span class="o">+</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">2</span><span class="p">:])[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class=" [...]
+ <span class="n">out_param</span> <span class="o">=</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span> <span class="k">else</span> <span class="s">"??&qu [...]
+ <span class="n">in_param</span> <span class="o">=</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="k">else</span> <span class="s">"??&qu [...]
+
+ <span class="k">return</span> <span class="p">(</span><span class="s">"Job = [</span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s%s</span><span class="s">]"</span> <span class="o">%</span> <span class="p">(</span><span class="n">in_param</span><span class="p">,</span> <span class="n">out_param</span><span class="p">,</span> <span class="n">extra_param</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">"Job = [</span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">in_param</span><span class="p">,</span> <span class="s">"-> "</span> <span class="o">+</span> <span class="n">out_param</span> <span class="o">+</span> <span class="n">extra_param</span> <span class="o">+</span> <span class="s">"]"</span><span class="p">])</span>
+
+
+<span class="k">def</span> <span class="nf">io_files_one_to_many_job_descriptor</span> <span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+
+ <span class="n">extra_param</span> <span class="o">=</span> <span class="s">", "</span> <span class="o">+</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">2</span><span class="p">:])[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class=" [...]
+ <span class="n">out_param</span> <span class="o">=</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span> <span class="k">else</span> <span class="s">"??&qu [...]
+ <span class="n">in_param</span> <span class="o">=</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="k">else</span> <span class="s">"??&qu [...]
+
+ <span class="c"># start with input parameter</span>
+ <span class="n">ret_params</span> <span class="o">=</span> <span class="p">[</span><span class="s">"Job = [</span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">in_param</span><span class="p">]</span>
+
+ <span class="c"># add output parameter to list,</span>
+ <span class="c"># processing one by one if multiple output parameters</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
+ <span class="n">ret_params</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="s">"-> "</span> <span class="o">+</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">ret_params</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">"-> "</span> <span class="o">+</span> <span class="n">out_param</span><span class="p">)</span>
+
+ <span class="c"># add extra</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">></span> <span class="mi">2</span> <span class="p">:</span>
+ <span class="n">ret_params</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">" , "</span> <span class="o">+</span> <span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">2</span><span class="p">:])[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
+
+ <span class="c"># add closing bracket</span>
+ <span class="n">ret_params</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">+=</span><span class="s">"]"</span>
+
+ <span class="k">return</span> <span class="p">(</span><span class="s">"Job = [</span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s%s</span><span class="s">]"</span> <span class="o">%</span> <span class="p">(</span><span class="n">in_param</span><span class="p">,</span> <span class="n">out_param</span><span class="p">,</span> <span class="n">extra_param</span><span class="p">),</span> <span class="n">ret_params</span><span class="p">)</span>
+
+
+<span class="k">def</span> <span class="nf">mkdir_job_descriptor</span> <span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+ <span class="c"># input, output and parameters</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="s">"Make directories </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
+ <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="s">"Make directories </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="p">[],</span> <span class="p">[]</span>
+ <span class="k">return</span> <span class="n">m</span><span class="p">,</span> <span class="p">[</span><span class="n">m</span><span class="p">]</span>
+
+
+<span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># job wrappers</span>
+<span class="c"># registers files/directories for cleanup</span>
+
+<span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># generic job wrapper</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<div class="viewcode-block" id="job_wrapper_generic"><a class="viewcode-back" href="../../task.html#ruffus.task.job_wrapper_generic">[docs]</a><span class="k">def</span> <span class="nf">job_wrapper_generic</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> run func</span>
+<span class="sd"> """</span>
+ <span class="k">assert</span><span class="p">(</span><span class="n">user_defined_work_func</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">user_defined_work_func</span><span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">)</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># job wrapper for all that deal with i/o files</span>
+
+<span class="c">#_________________________________________________________________________________________</span></div>
+<div class="viewcode-block" id="job_wrapper_io_files"><a class="viewcode-back" href="../../task.html#ruffus.task.job_wrapper_io_files">[docs]</a><span class="k">def</span> <span class="nf">job_wrapper_io_files</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">,</span> < [...]
+ <span class="sd">"""</span>
+<span class="sd"> run func on any i/o if not up to date</span>
+<span class="sd"> """</span>
+ <span class="k">assert</span><span class="p">(</span><span class="n">user_defined_work_func</span><span class="p">)</span>
+
+ <span class="n">i</span><span class="p">,</span><span class="n">o</span> <span class="o">=</span> <span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">2</span><span class="p">]</span>
+
+ <span class="k">if</span> <span class="n">touch_files_only</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="c"># @originate only uses output files</span>
+ <span class="k">if</span> <span class="n">output_files_only</span><span class="p">:</span>
+ <span class="n">ret_val</span> <span class="o">=</span> <span class="n">user_defined_work_func</span><span class="p">(</span><span class="o">*</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">:]))</span>
+ <span class="c"># all other decorators</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">ret_val</span> <span class="o">=</span> <span class="n">user_defined_work_func</span><span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">touch_files_only</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="c">#job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)</span>
+
+ <span class="c">#</span>
+ <span class="c"># touch files only</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">get_strings_in_nested_sequence</span><span class="p">(</span><span class="n">o</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># race condition still possible...</span>
+ <span class="c">#</span>
+ <span class="k">with</span> <span class="nb">file</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="s">'a'</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">utime</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span>
+ <span class="c">#if not os.path.exists(f):</span>
+ <span class="c"># open(f, 'w')</span>
+ <span class="c"># mtime = os.path.getmtime(f)</span>
+ <span class="c">#else:</span>
+ <span class="c"># os.utime(f, None)</span>
+ <span class="c"># mtime = os.path.getmtime(f)</span>
+
+
+ <span class="c">#chksum = JobHistoryChecksum(f, mtime, param[2:], user_defined_work_func.pipeline_task)</span>
+ <span class="c">#job_history[f] = chksum # update file times and job details in history</span>
+
+
+
+ <span class="c">#</span>
+ <span class="c"># register strings in output file for cleanup</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">get_strings_in_nested_sequence</span><span class="p">(</span><span class="n">o</span><span class="p">):</span>
+ <span class="n">register_cleanup</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="s">"file"</span><span class="p">)</span>
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># job wrapper for all that only deals with output files</span>
+
+<span class="c">#_________________________________________________________________________________________</span></div>
+<span class="k">def</span> <span class="nf">job_wrapper_output_files</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> run func on any output file if not up to date</span>
+<span class="sd"> """</span>
+ <span class="n">job_wrapper_io_files</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">,</span> <span class="n">output_files_only</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># job wrapper for mkdir</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<div class="viewcode-block" id="job_wrapper_mkdir"><a class="viewcode-back" href="../../task.html#ruffus.task.job_wrapper_mkdir">[docs]</a><span class="k">def</span> <span class="nf">job_wrapper_mkdir</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> make directories if not exists</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># Just in case, swallow file exist errors because some other makedirs might be subpath</span>
+ <span class="c"># of this directory</span>
+ <span class="c"># Should not be necessary because of "sorted" in task_mkdir</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">dirs</span> <span class="o">=</span> <span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+
+ <span class="c"># if there are two parameters, they are i/o, and the directories to be created are the output</span>
+ <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="n">dirs</span> <span class="o">=</span> <span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"Wrong number of arguments in mkdir check </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">param</span><span class="p">,))</span>
+
+ <span class="c"># get all file names in flat list</span>
+ <span class="n">dirs</span> <span class="o">=</span> <span class="n">get_strings_in_nested_sequence</span> <span class="p">(</span><span class="n">dirs</span><span class="p">)</span>
+
+ <span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">dirs</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
+ <span class="n">register_cleanup</span><span class="p">(</span><span class="n">d</span><span class="p">,</span> <span class="s">"makedirs"</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="c">#</span>
+ <span class="c"># ignore exception if exception == OSError / "File exists"</span>
+ <span class="c">#</span>
+ <span class="n">exceptionType</span><span class="p">,</span> <span class="n">exceptionValue</span><span class="p">,</span> <span class="n">exceptionTraceback</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">exc_info</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">exceptionType</span> <span class="o">==</span> <span class="ne">OSError</span> <span class="ow">and</span> <span class="s">"File exists"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">exceptionValue</span><span class="p">):</span>
+ <span class="k">continue</span>
+ <span class="k">raise</span>
+
+ <span class="c"># changed for compatibility with python 3.x</span>
+ <span class="c">#except OSError, e:</span>
+ <span class="c"># if "File exists" not in e:</span>
+ <span class="c"># raise</span>
+
+</div>
+<span class="n">JOB_ERROR</span> <span class="o">=</span> <span class="mi">0</span>
+<span class="n">JOB_SIGNALLED_BREAK</span> <span class="o">=</span> <span class="mi">1</span>
+<span class="n">JOB_UP_TO_DATE</span> <span class="o">=</span> <span class="mi">2</span>
+<span class="n">JOB_COMPLETED</span> <span class="o">=</span> <span class="mi">3</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># t_job_result</span>
+<span class="c"># Previously a collections.namedtuple (introduced in python 2.6)</span>
+<span class="c"># Now using implementation from running</span>
+<span class="c"># t_job_result = namedtuple('t_job_result', 'task_name state job_name return_value exception', verbose =1)</span>
+<span class="c"># for compatibility with python 2.5</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">class</span> <span class="nc">t_job_result</span><span class="p">(</span><span class="nb">tuple</span><span class="p">):</span>
+ <span class="s">'t_job_result(task_name, state, job_name, return_value, exception, params)'</span>
+
+ <span class="n">__slots__</span> <span class="o">=</span> <span class="p">()</span>
+
+ <span class="n">fields</span> <span class="o">=</span> <span class="p">(</span><span class="s">'task_name'</span><span class="p">,</span> <span class="s">'state'</span><span class="p">,</span> <span class="s">'job_name'</span><span class="p">,</span> <span class="s">'return_value'</span><span class="p">,</span> <span class="s">'exception'</span><span class="p">,</span> <span class="s">'params'</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__new__</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">task_name</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">job_name</span><span class="p">,</span> <span class="n">return_value</span><span class="p">,</span> <span class="n">exception</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">tuple</span><span class="o">.</span><span class="n">__new__</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="p">(</span><span class="n">task_name</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">job_name</span><span class="p">,</span> <span class="n">return_value</span><span class="p">,</span> <span class="n">exception</span><span clas [...]
+
+ <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">make</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">iterable</span><span class="p">,</span> <span class="n">new</span><span class="o">=</span><span class="nb">tuple</span><span class="o">.</span><span class="n">__new__</span><span class="p">,</span> <span class="nb">len</span><span class="o">=</span><span class="nb">len</span><span class="p">):</span>
+ <span class="s">'Make a new t_job_result object from a sequence or iterable'</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">new</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">iterable</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">6</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">'Expected 6 arguments, got </span><span class="si">%d</span><span class="s">'</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">result</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s">'t_job_result(task_name=</span><span class="si">%r</span><span class="s">, state=</span><span class="si">%r</span><span class="s">, job_name=</span><span class="si">%r</span><span class="s">, return_value=</span><span class="si">%r</span><span class="s">, exception=</span><span class="si">%r</span><span class="s">, params=</span><span class="si">%r</span><span class="s">)'</span> <span class="o">%</span> <span class="bp">s [...]
+
+ <span class="k">def</span> <span class="nf">asdict</span><span class="p">(</span><span class="n">t</span><span class="p">):</span>
+ <span class="s">'Return a new dict which maps field names to their values'</span>
+ <span class="k">return</span> <span class="p">{</span><span class="s">'task_name'</span><span class="p">:</span> <span class="n">t</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s">'state'</span><span class="p">:</span> <span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s">'job_name'</span><span class="p">:</span> <span class="n">t</span><span cl [...]
+
+ <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">):</span>
+ <span class="s">'Return a new t_job_result object replacing specified fields with new values'</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="n">kwds</span><span class="o">.</span><span class="n">pop</span><span class="p">,</span> <span class="p">(</span><span class="s">'task_name'</span><span class="p">,</span> <span class="s">'state'</span><span class="p">,</span> <span class="s" [...]
+ <span class="k">if</span> <span class="n">kwds</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s">'Got unexpected field names: </span><span class="si">%r</span><span class="s">'</span> <span class="o">%</span> <span class="n">kwds</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+ <span class="k">return</span> <span class="n">result</span>
+
+ <span class="k">def</span> <span class="nf">__getnewargs__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+
+ <span class="n">task_name</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">2</span><span class="p">))</span>
+ <span class="n">return_value</span><span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
+ <span class="n">exception</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">4</span><span class="p">))</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># multiprocess_callback</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">run_pooled_job_without_exceptions</span> <span class="p">(</span><span class="n">process_parameters</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> handles running jobs in parallel</span>
+<span class="sd"> Make sure exceptions are caught here:</span>
+<span class="sd"> Otherwise, these will kill the thread/process</span>
+<span class="sd"> return any exceptions which will be rethrown at the other end:</span>
+<span class="sd"> See RethrownJobError / run_all_jobs_in_task</span>
+<span class="sd"> """</span>
+
+ <span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">task_name</span><span class="p">,</span> <span class="n">job_name</span><span class="p">,</span> <span class="n">job_wrapper</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span>
+ <span class="n">job_limit_semaphore</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">)</span> <span class="o">=</span> <span class="n">process_parameters</span>
+
+ <span class="c">##job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)</span>
+ <span class="c">##outfile = param[1] if len(param) > 1 else None # mkdir has no output</span>
+ <span class="c">##if not isinstance(outfile, list):</span>
+ <span class="c">## outfile = [outfile]</span>
+ <span class="c">##for o in outfile:</span>
+ <span class="c">## job_history.pop(o, None) # remove outfile from history if it exists</span>
+
+ <span class="k">if</span> <span class="n">job_limit_semaphore</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">job_limit_semaphore</span> <span class="o">=</span> <span class="n">do_nothing_semaphore</span><span class="p">()</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">with</span> <span class="n">job_limit_semaphore</span><span class="p">:</span>
+ <span class="n">return_value</span> <span class="o">=</span> <span class="n">job_wrapper</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">user_defined_work_func</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">,</span> <span class="n">touch_files_only</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># ensure one second between jobs</span>
+ <span class="c">#</span>
+ <span class="c">#if one_second_per_job:</span>
+ <span class="c"># time.sleep(1.01)</span>
+ <span class="k">return</span> <span class="n">t_job_result</span><span class="p">(</span><span class="n">task_name</span><span class="p">,</span> <span class="n">JOB_COMPLETED</span><span class="p">,</span> <span class="n">job_name</span><span class="p">,</span> <span class="n">return_value</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="n">param</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="c"># Wrap up one or more exceptions rethrown across process boundaries</span>
+ <span class="c">#</span>
+ <span class="c"># See multiprocessor.Server.handle_request/serve_client for an analogous function</span>
+ <span class="n">exceptionType</span><span class="p">,</span> <span class="n">exceptionValue</span><span class="p">,</span> <span class="n">exceptionTraceback</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">exc_info</span><span class="p">()</span>
+ <span class="n">exception_stack</span> <span class="o">=</span> <span class="n">traceback</span><span class="o">.</span><span class="n">format_exc</span><span class="p">(</span><span class="n">exceptionTraceback</span><span class="p">)</span>
+ <span class="n">exception_name</span> <span class="o">=</span> <span class="n">exceptionType</span><span class="o">.</span><span class="n">__module__</span> <span class="o">+</span> <span class="s">'.'</span> <span class="o">+</span> <span class="n">exceptionType</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="n">exception_value</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">exceptionValue</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">exception_value</span><span class="p">):</span>
+ <span class="n">exception_value</span> <span class="o">=</span> <span class="s">"(</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="n">exception_value</span>
+
+ <span class="k">if</span> <span class="n">exceptionType</span> <span class="o">==</span> <span class="n">JobSignalledBreak</span><span class="p">:</span>
+ <span class="n">job_state</span> <span class="o">=</span> <span class="n">JOB_SIGNALLED_BREAK</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">job_state</span> <span class="o">=</span> <span class="n">JOB_ERROR</span>
+ <span class="k">return</span> <span class="n">t_job_result</span><span class="p">(</span><span class="n">task_name</span><span class="p">,</span> <span class="n">job_state</span><span class="p">,</span> <span class="n">job_name</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">task_name</span><span class="p">,</span>
+ <span class="n">job_name</span><span class="p">,</span>
+ <span class="n">exception_name</span><span class="p">,</span>
+ <span class="n">exception_value</span><span class="p">,</span>
+ <span class="n">exception_stack</span><span class="p">],</span> <span class="n">param</span><span class="p">)</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Helper function</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># register_cleanup</span>
+
+<span class="c"># to do</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">register_cleanup</span> <span class="p">(</span><span class="n">file_name</span><span class="p">,</span> <span class="n">operation</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># _task</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">class</span> <span class="nc">_task</span> <span class="p">(</span><span class="n">node</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> pipeline task</span>
+<span class="sd"> """</span>
+
+ <span class="n">action_names</span> <span class="o">=</span> <span class="p">[</span><span class="s">"unspecified"</span><span class="p">,</span>
+ <span class="s">"task"</span><span class="p">,</span>
+ <span class="s">"task_files_re"</span><span class="p">,</span>
+ <span class="s">"task_split"</span><span class="p">,</span>
+ <span class="s">"task_merge"</span><span class="p">,</span>
+ <span class="s">"task_transform"</span><span class="p">,</span>
+ <span class="s">"task_collate"</span><span class="p">,</span>
+ <span class="s">"task_files_func"</span><span class="p">,</span>
+ <span class="s">"task_files"</span><span class="p">,</span>
+ <span class="s">"task_mkdir"</span><span class="p">,</span>
+ <span class="s">"task_parallel"</span><span class="p">,</span>
+ <span class="s">"task_active_if"</span><span class="p">,</span>
+ <span class="s">"task_product"</span><span class="p">,</span>
+ <span class="s">"task_permutations"</span><span class="p">,</span>
+ <span class="s">"task_combinations"</span><span class="p">,</span>
+ <span class="s">"task_combinations_with_replacement"</span><span class="p">,</span>
+ <span class="s">"task_subdivide"</span><span class="p">,</span>
+ <span class="s">"task_originate"</span><span class="p">,</span>
+ <span class="p">]</span>
+ <span class="n">action_unspecified</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">action_task</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="n">action_task_files_re</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="n">action_task_split</span> <span class="o">=</span> <span class="mi">3</span>
+ <span class="n">action_task_merge</span> <span class="o">=</span> <span class="mi">4</span>
+ <span class="n">action_task_transform</span> <span class="o">=</span> <span class="mi">5</span>
+ <span class="n">action_task_collate</span> <span class="o">=</span> <span class="mi">6</span>
+ <span class="n">action_task_files_func</span> <span class="o">=</span> <span class="mi">7</span>
+ <span class="n">action_task_files</span> <span class="o">=</span> <span class="mi">8</span>
+ <span class="n">action_mkdir</span> <span class="o">=</span> <span class="mi">9</span>
+ <span class="n">action_parallel</span> <span class="o">=</span> <span class="mi">10</span>
+ <span class="n">action_active_if</span> <span class="o">=</span> <span class="mi">11</span>
+ <span class="n">action_task_product</span> <span class="o">=</span> <span class="mi">12</span>
+ <span class="n">action_task_permutations</span> <span class="o">=</span> <span class="mi">13</span>
+ <span class="n">action_task_combinations</span> <span class="o">=</span> <span class="mi">14</span>
+ <span class="n">action_task_combinations_with_replacement</span> <span class="o">=</span> <span class="mi">15</span>
+ <span class="n">action_task_subdivide</span> <span class="o">=</span> <span class="mi">16</span>
+ <span class="n">action_task_originate</span> <span class="o">=</span> <span class="mi">17</span>
+
+
+
+ <span class="n">multiple_jobs_outputs</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">single_job_single_output</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="n">job_single_matches_parent</span><span class="o">=</span> <span class="mi">2</span>
+
+ <span class="n">job_limit_semaphores</span> <span class="o">=</span> <span class="p">{}</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># create_task / __init__</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">create_task</span><span class="p">(</span><span class="n">func</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Create task if the name as not been previously specified</span>
+<span class="sd"> Note that the task function may not have been created yet.</span>
+<span class="sd"> This allows us to create tasks and dependencies out of order</span>
+<span class="sd"> """</span>
+ <span class="n">func_name</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="n">module_name</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">__module__</span><span class="p">)</span>
+ <span class="n">task_name</span> <span class="o">=</span> <span class="n">module_name</span> <span class="o">+</span> <span class="s">"."</span> <span class="o">+</span> <span class="n">func_name</span>
+
+ <span class="c"># Link to existing dependency if task name has previously been specified</span>
+ <span class="k">if</span> <span class="n">node</span><span class="o">.</span><span class="n">is_node</span><span class="p">(</span><span class="n">task_name</span><span class="p">):</span>
+ <span class="n">t</span> <span class="o">=</span> <span class="n">node</span><span class="o">.</span><span class="n">lookup_node_from_name</span><span class="p">(</span><span class="n">task_name</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">user_defined_work_func</span> <span class="o">!=</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_duplicate_task_name</span><span class="p">(</span><span class="s">"Same task name </span><span class="si">%s</span><span class="s"> specified multiple times in the same module"</span> <span class="o">%</span> <span class="n">task_name</span><span class="p">)</span>
+ <span class="c"># otherwise create new</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">t</span> <span class="o">=</span> <span class="n">_task</span><span class="p">(</span><span class="n">module_name</span><span class="p">,</span> <span class="n">func_name</span><span class="p">)</span>
+
+ <span class="n">t</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task</span><span class="p">)</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">user_defined_work_func</span> <span class="o">=</span> <span class="n">func</span>
+ <span class="k">assert</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">_name</span> <span class="o">==</span> <span class="n">task_name</span><span class="p">)</span>
+ <span class="c"># convert description into one line</span>
+ <span class="k">if</span> <span class="n">func</span><span class="o">.</span><span class="n">__doc__</span><span class="p">:</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">_description</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">\s+"</span><span class="p">,</span> <span class="s">" "</span><span class="p">,</span> <span class="n">func</span><span class="o">.</span><span class="n">__doc__</span><span cl [...]
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">_description</span> <span class="o">=</span> <span class="s">""</span>
+
+ <span class="k">return</span> <span class="n">t</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># get_action_name</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">get_action_name</span> <span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_names</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_action_type</span><span class="p">]</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># __init__</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">module_name</span><span class="p">,</span> <span class="n">func_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Does nothing because this might just be a dependency.</span>
+<span class="sd"> If it does not get initialised by a real task</span>
+<span class="sd"> (a task is depending on an unknown function/task),</span>
+<span class="sd"> throw an exception when running the pipeline</span>
+
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_module_name</span> <span class="o">=</span> <span class="n">module_name</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_func_name</span> <span class="o">=</span> <span class="n">func_name</span>
+
+ <span class="n">node</span><span class="o">.</span><span class="n">__init__</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">module_name</span> <span class="o">+</span> <span class="s">"."</span> <span class="o">+</span> <span class="n">func_name</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_action_type</span> <span class="o">=</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_unspecified</span>
+
+ <span class="c"># Each task has its own checksum level</span>
+ <span class="c"># At the moment this is really so multiple pipelines in the same script can have</span>
+ <span class="c"># different checksum levels</span>
+ <span class="c"># Though set by pipeline_xxxx functions, have initial valid value so unit tests work :-|</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">checksum_level</span> <span class="o">=</span> <span class="n">CHECKSUM_FILE_TIMESTAMPS</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_generic</span>
+
+ <span class="c">#</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">generic_job_descriptor</span>
+
+ <span class="c"># jobs which produce a single output.</span>
+ <span class="c"># special handling for task.get_output_files for dependency chaining</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">multiple_jobs_outputs</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="c"># function which is decorated and does the actual work</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">user_defined_work_func</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="c"># functions which will be called when task completes</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">posttask_functions</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="c"># give makedir automatically made parent tasks unique names</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cnt_task_mkdir</span> <span class="o">=</span> <span class="mi">0</span>
+
+ <span class="c"># whether only task function itself knows what output it will produce</span>
+ <span class="c"># i.e. output is a glob or something similar</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span> <span class="o">=</span> <span class="mi">0</span>
+
+ <span class="c"># cache output file names here</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">semaphore_name</span> <span class="o">=</span> <span class="n">module_name</span> <span class="o">+</span> <span class="s">"."</span> <span class="o">+</span> <span class="n">func_name</span>
+
+ <span class="c"># do not test for whether task is active</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="c"># extra flag for outputfiles</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_active</span> <span class="o">=</span> <span class="bp">True</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># init_for_pipeline</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">init_for_pipeline</span> <span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Initialize variables for pipeline run / printout</span>
+
+<span class="sd"> **********</span>
+<span class="sd"> BEWARE</span>
+<span class="sd"> **********</span>
+
+<span class="sd"> Because state is stored, ruffus is *not* reentrant.</span>
+
+<span class="sd"> **********</span>
+<span class="sd"> BEWARE</span>
+<span class="sd"> **********</span>
+<span class="sd"> """</span>
+
+ <span class="c"># cache output file names here</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="bp">None</span>
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># set_action_type</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">set_action_type</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">new_action_type</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Save how this task</span>
+<span class="sd"> 1) tests whether it is up-to-date and</span>
+<span class="sd"> 2) handles input/output files</span>
+
+<span class="sd"> Checks that the task has not been defined with conflicting actions</span>
+
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_action_type</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_unspecified</span><span class="p">,</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_task</span><span class="p">):</span>
+ <span class="n">old_action</span> <span class="o">=</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_names</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_action_type</span><span class="p">]</span>
+ <span class="n">new_action</span> <span class="o">=</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_names</span><span class="p">[</span><span class="n">new_action_type</span><span class="p">]</span>
+ <span class="n">actions</span> <span class="o">=</span> <span class="s">" and "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">((</span><span class="n">old_action</span><span class="p">,</span> <span class="n">new_action</span><span class="p">))))</span>
+ <span class="n">task_name</span> <span class="o">=</span> <span class="s">"def </span><span class="si">%s</span><span class="s">(...)"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_name</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s">"__main__."</span><span class="p">,</span> <span class="s">""</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">error_decorator_args</span><span class="p">((</span><span class="s">" </span><span class="si">%s</span><span class="se">\n</span><span class="s"> has duplicate task specifications: (</span><span class="si">%s</span><span class="s">)</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">task_name</span><span class="p">,</span> <span class="n">actions</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_action_type</span> <span class="o">=</span> <span class="n">new_action_type</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_action_type_desc</span> <span class="o">=</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_names</span><span class="p">[</span><span class="n">new_action_type</span><span class="p">]</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># get_job_name</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">get_job_name</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">descriptive_param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Use job descriptor to return short name for job, including any parameters</span>
+
+<span class="sd"> runtime_data is not (yet) used but may be used to add context in future</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span><span class="p">(</span><span class="n">descriptive_param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># get_task_name</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">get_task_name</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">in_func_format</span> <span class="o">=</span> <span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns name of task function, removing __main__ namespace if necessary</span>
+
+<span class="sd"> if in_func_format is true, will return def task_func(...):</span>
+
+<span class="sd"> """</span>
+
+ <span class="n">task_name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_name</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s">"__main__."</span><span class="p">,</span> <span class="s">""</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_action_type</span> <span class="o">!=</span> <span class="n">_task</span><span class="o">.</span><span class="n">action_mkdir</span> <span class="ow">and</span> <span class="n">in_func_format</span><span class="p">:</span>
+ <span class="k">return</span> <span class="s">"def </span><span class="si">%s</span><span class="s">(...):"</span> <span class="o">%</span> <span class="n">task_name</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">task_name</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># update_active_state</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">update_active_state</span> <span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># If has an @active_if decorator, check if the task needs to be run</span>
+ <span class="c"># @active_if parameters may be call back functions or booleans</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span> <span class="o">!=</span> <span class="bp">None</span> <span class="ow">and</span>
+ <span class="nb">any</span><span class="p">(</span> <span class="ow">not</span> <span class="n">arg</span><span class="p">()</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p">)</span> <span class="k">else</span> <span class="ow">not</span> <span class="n">arg</span>
+ <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span><span class="p">)):</span>
+ <span class="c"># flip is active to false.</span>
+ <span class="c"># ( get_output_files() will return empty if inactive )</span>
+ <span class="c"># Remember each iteration of pipeline_printout pipeline_run will have</span>
+ <span class="c"># another bite at changing this value</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_active</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c"># flip is active to True so that downstream dependencies will be correct</span>
+ <span class="c"># ( get_output_files() will return empty if inactive )</span>
+ <span class="c"># Remember each iteration of pipeline_printout pipeline_run will have</span>
+ <span class="c"># another bite at changing this value</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_active</span> <span class="o">=</span> <span class="bp">True</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># printout</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">printout</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">,</span> <span class="n">force_rerun</span><span class="p">,</span> <span class="n">job_history</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">indent</span> <span class="o">=</span> <span class="m [...]
+ <span class="sd">"""</span>
+<span class="sd"> Print out all jobs for this task</span>
+
+<span class="sd"> verbose = 1 : print task name</span>
+<span class="sd"> 2 : print task description if exists</span>
+<span class="sd"> 3 : print job names for jobs to be run</span>
+<span class="sd"> 4 : print job names for up-to- date jobs</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">get_job_names</span> <span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">indent_str</span><span class="p">):</span>
+ <span class="n">job_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">job_names</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">job_names</span> <span class="o">=</span> <span class="p">([</span><span class="n">indent_str</span> <span class="o">+</span> <span class="n">job_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span> <span class="o">+</span>
+ <span class="p">[</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="n">jn</span> <span class="k">for</span> <span class="n">jn</span> <span class="ow">in</span> <span class="n">job_names</span><span class="p">[</span><span class="mi">1</span><span class="p">:]])</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">job_names</span> <span class="o">=</span> <span class="p">([</span><span class="n">indent_str</span> <span class="o">+</span> <span class="n">job_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]])</span>
+ <span class="k">return</span> <span class="n">job_names</span>
+
+
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">verbose</span><span class="p">:</span>
+ <span class="k">return</span> <span class="p">[]</span>
+
+ <span class="n">indent_str</span> <span class="o">=</span> <span class="s">' '</span> <span class="o">*</span> <span class="n">indent</span>
+
+ <span class="n">messages</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">"Task = "</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">()</span> <span class="o">+</span> <span class="p">(</span><span class="s">" >>Forced to rerun<<"</span> <span class="k">if</span> <span class="n">force_rerun</span> <span class=" [...]
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">==</span><span class="mi">1</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">messages</span>
+
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">>=</span> <span class="mi">2</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_description</span><span class="p">):</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">'"'</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_description</span> <span class="o">+</span> <span class="s">'"'</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># single job state</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">></span> <span class="mi">5</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">single_job_single_output</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">" Single job single output"</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">multiple_jobs_outputs</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">" Multiple jobs Multiple outputs"</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">" Single jobs status depends on </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span><span class="o">.</span><span class="n">_name</span><span class="p">)</span>
+
+
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o"><=</span> <span class="mi">2</span> <span class="p">:</span>
+ <span class="k">return</span> <span class="n">messages</span>
+
+ <span class="c"># increase indent for jobs up to date status</span>
+ <span class="n">indent_str</span> <span class="o">+=</span> <span class="s">" "</span> <span class="o">*</span> <span class="mi">3</span>
+
+ <span class="c">#</span>
+ <span class="c"># If has an @active_if decorator, check if the task needs to be run</span>
+ <span class="c"># @active_if parameters may be call back functions or booleans</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_active</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o"><=</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">messages</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">"Task is inactive"</span><span class="p">)</span>
+ <span class="c"># add spacer line</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">""</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">messages</span>
+
+ <span class="c">#</span>
+ <span class="c"># No parameters: just call task function</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o"><=</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">messages</span>
+
+ <span class="c">#</span>
+ <span class="c"># needs update func = None: always needs update</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">"Task needs update: No function to check if up-to-date or not"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">messages</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="n">needs_update_check_modify_time</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="n">task</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span> <span class="n">job_history</span> <span class="o">=</span> <span class="n">job_history</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">()</span>
+
+ <span class="k">if</span> <span class="n">needs_update</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">"Task needs update: </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">msg</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">"Task up-to-date"</span><span class="p">)</span>
+
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">runtime_data</span><span class="p">[</span><span class="s">"MATCH_FAILURE"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="c">#</span>
+ <span class="c"># return messages description per job</span>
+ <span class="c">#</span>
+ <span class="n">cnt_jobs</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">param</span><span class="p">,</span> <span class="n">descriptive_param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">):</span>
+ <span class="n">cnt_jobs</span> <span class="o">+=</span> <span class="mi">1</span>
+
+ <span class="c">#</span>
+ <span class="c"># needs update func = None: always needs update</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">get_job_names</span> <span class="p">(</span><span class="n">descriptive_param</span><span class="p">,</span> <span class="n">indent_str</span><span class="p">))</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">" Jobs needs update: No function to check if up-to-date or not"</span><span class="p">)</span>
+ <span class="k">continue</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="n">needs_update_check_modify_time</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">,</span> <span class="n">task</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span> <span class="n">job_history</span> <span class="o">=</span> <s [...]
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">needs_update</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">get_job_names</span> <span class="p">(</span><span class="n">descriptive_param</span><span class="p">,</span> <span class="n">indent_str</span><span class="p">))</span>
+ <span class="n">per_job_messages</span> <span class="o">=</span> <span class="p">[(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="n">s</span><span class="p">)</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="p">(</span><span class="s">" Job needs update: </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">msg</span><span class="p" [...]
+ <span class="n">messages</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">per_job_messages</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">></span> <span class="mi">4</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">get_job_names</span> <span class="p">(</span><span class="n">descriptive_param</span><span class="p">,</span> <span class="n">indent_str</span><span class="p">))</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">" Job up-to-date"</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">cnt_jobs</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">"!!! No jobs for this task. "</span>
+ <span class="s">"Are you sure there is not a error in your "</span>
+ <span class="s">"code / regular expression?"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">>=</span> <span class="mi">3</span> <span class="ow">or</span> <span class="p">(</span><span class="n">verbose</span> <span class="ow">and</span> <span class="n">cnt_jobs</span> <span class="o">==</span> <span class="mi">0</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">runtime_data</span> <span class="ow">and</span> <span class="s">"MATCH_FAILURE"</span> <span class="ow">in</span> <span class="n">runtime_data</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"MATCH_FAILURE"</span><span class="p">]:</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indent_str</span> <span class="o">+</span> <span class="s">"Warning: File match failure: "</span> <span class="o">+</span> <span class="n">s</span><span class="p">)</span>
+ <span class="n">runtime_data</span><span class="p">[</span><span class="s">"MATCH_FAILURE"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">messages</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">""</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">messages</span>
+
+
+
+
+ <span class="c">#_____________________________________________________________________________________</span>
+
+ <span class="c"># signal</span>
+ <span class="c">#</span>
+ <span class="c"># returns whether up to date</span>
+ <span class="c">#</span>
+ <span class="c">#_____________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">signal</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">verbose_logger_job_history</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> If up to date: signal = true</span>
+<span class="sd"> If true, depth first search will not pass through this node</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">verbose_logger_job_history</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"verbose_logger_job_history is None"</span><span class="p">)</span>
+
+ <span class="n">verbose_logger</span> <span class="o">=</span> <span class="n">verbose_logger_job_history</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">job_history</span> <span class="o">=</span> <span class="n">verbose_logger_job_history</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">verbose_logger</span><span class="o">.</span><span class="n">logger</span>
+ <span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose_logger</span><span class="o">.</span><span class="n">verbose</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="n">verbose_logger</span><span class="o">.</span><span class="n">runtime_data</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="s">" Task = "</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">())</span>
+
+ <span class="c">#</span>
+ <span class="c"># If job is inactive, always consider it up-to-date</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span> <span class="o">!=</span> <span class="bp">None</span> <span class="ow">and</span>
+ <span class="nb">any</span><span class="p">(</span> <span class="ow">not</span> <span class="n">arg</span><span class="p">()</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p">)</span> <span class="k">else</span> <span class="ow">not</span> <span class="n">arg</span>
+ <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span><span class="p">)):</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="s">" Inactive task: treat as Up to date"</span><span class="p">)</span>
+ <span class="c">#print 'signaling that the inactive task is up to date'</span>
+ <span class="k">return</span> <span class="bp">True</span>
+
+ <span class="c">#</span>
+ <span class="c"># Always needs update if no way to check if up to date</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="s">" No update function: treat as out of date"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span>
+
+ <span class="c">#</span>
+ <span class="c"># if no parameters, just return the results of needs update</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="n">needs_update_check_modify_time</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="n">task</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span> <span class="n">job_history</span> <span class="o">=</span> <span class="n">job_history</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">()</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="s">" Needs update = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">needs_update</span><span class="p">)</span>
+ <span class="k">return</span> <span class="ow">not</span> <span class="n">needs_update</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">True</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c">#</span>
+ <span class="c"># return not up to date if ANY jobs needs update</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">param</span><span class="p">,</span> <span class="n">descriptive_param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="n">needs_update_check_modify_time</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">,</span> <span class="n">task</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span> <span class="n">job_history</span> <span class="o">=</span [...]
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">needs_update</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">>=</span> <span class="mi">4</span><span class="p">:</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_job_name</span><span class="p">(</span><span class="n">descriptive_param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">)</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="s">" Needing update:</span><span class="se">\n</span><span class="s"> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">job_name</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span>
+
+ <span class="c">#</span>
+ <span class="c"># Percolate warnings from parameter factories</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">verbose</span> <span class="o">>=</span> <span class="mi">1</span> <span class="ow">and</span> <span class="s">"ruffus_WARNING"</span> <span class="ow">in</span> <span class="n">runtime_data</span> <span class="ow">and</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="ow">in</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ruffus_WARNING"</span><span class="p">]):</span>
+ <span class="k">for</span> <span class="n">msg</span> <span class="ow">in</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ruffus_WARNING"</span><span class="p">][</span><span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span><span class="p">]:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s">" 'In Task </span><span class="si">%s</span><span class="s">' </span><span class="si">%s</span><span class="s"> "</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">(</span><span class="bp">True</span><span class="p [...]
+
+
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" All jobs up to date"</span><span class="p">)</span>
+
+
+
+
+ <span class="k">return</span> <span class="bp">True</span>
+
+ <span class="c">#</span>
+ <span class="c"># removed for compatibility with python 3.x</span>
+ <span class="c">#</span>
+ <span class="c"># rethrow exception after adding task name</span>
+ <span class="c">#except error_task, inst:</span>
+ <span class="c"># inst.specify_task(self, "Exceptions in dependency checking")</span>
+ <span class="c"># raise</span>
+
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">exceptionType</span><span class="p">,</span> <span class="n">exceptionValue</span><span class="p">,</span> <span class="n">exceptionTraceback</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">exc_info</span><span class="p">()</span>
+
+ <span class="c">#</span>
+ <span class="c"># rethrow exception after adding task name</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">exceptionType</span> <span class="o">==</span> <span class="n">error_task</span><span class="p">:</span>
+ <span class="n">exceptionValue</span><span class="o">.</span><span class="n">specify</span>
+ <span class="n">inst</span><span class="o">.</span><span class="n">specify_task</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Exceptions in dependency checking"</span><span class="p">)</span>
+ <span class="k">raise</span>
+
+ <span class="n">exception_stack</span> <span class="o">=</span> <span class="n">traceback</span><span class="o">.</span><span class="n">format_exc</span><span class="p">(</span><span class="n">exceptionTraceback</span><span class="p">)</span>
+ <span class="n">exception_name</span> <span class="o">=</span> <span class="n">exceptionType</span><span class="o">.</span><span class="n">__module__</span> <span class="o">+</span> <span class="s">'.'</span> <span class="o">+</span> <span class="n">exceptionType</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="n">exception_value</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">exceptionValue</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">exception_value</span><span class="p">):</span>
+ <span class="n">exception_value</span> <span class="o">=</span> <span class="s">"(</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="n">exception_value</span>
+ <span class="n">errt</span> <span class="o">=</span> <span class="n">RethrownJobError</span><span class="p">([(</span><span class="bp">self</span><span class="o">.</span><span class="n">_name</span><span class="p">,</span>
+ <span class="s">""</span><span class="p">,</span>
+ <span class="n">exception_name</span><span class="p">,</span>
+ <span class="n">exception_value</span><span class="p">,</span>
+ <span class="n">exception_stack</span><span class="p">)])</span>
+ <span class="n">errt</span><span class="o">.</span><span class="n">specify_task</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Exceptions generating parameters"</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">errt</span>
+
+
+
+ <span class="c">#_____________________________________________________________________________________</span>
+
+ <span class="c"># get_output_files</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c">#_____________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">get_output_files</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">do_not_expand_single_job_tasks</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Cache output files</span>
+
+<span class="sd"> If flattened is True, returns file as a list of strings,</span>
+<span class="sd"> flattening any nested structures and discarding non string names</span>
+<span class="sd"> Normally returns a list with one item for each job or a just a list of file names.</span>
+<span class="sd"> For "single_job_single_output" i.e. @merge and @files with single jobs,</span>
+<span class="sd"> returns the output of a single job (i.e. can be a string)</span>
+<span class="sd"> """</span>
+
+ <span class="c">#</span>
+ <span class="c"># N.B. active_if_checks is called once per task</span>
+ <span class="c"># in make_job_parameter_generator() for consistency</span>
+ <span class="c">#</span>
+ <span class="c"># self.is_active can be set using self.active_if_checks in that function,</span>
+ <span class="c"># and therefore can be changed BETWEEN invocations of pipeline_run</span>
+ <span class="c">#</span>
+ <span class="c"># self.is_active is not used anywhere else</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="p">(</span><span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_active</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[]</span>
+
+ <span class="c">#</span>
+ <span class="c"># This looks like the wrong place to flatten</span>
+ <span class="c">#</span>
+ <span class="n">flattened</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="c"># skip tasks which don't have parameters</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">!=</span> <span class="bp">None</span><span class="p">:</span>
+
+ <span class="n">cnt_jobs</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">param</span><span class="p">,</span> <span class="n">descriptive_param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">):</span>
+ <span class="n">cnt_jobs</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="c"># skip tasks which don't have output parameters</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="c"># make sure each @split or @subdivide or @originate returns a list of jobs</span>
+ <span class="c"># i.e. each @split or @subdivide or @originate is always a ->many operation</span>
+ <span class="c"># even if len(many) can be 1 (or zero)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">non_str_sequence</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">]])</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">single_job_single_output</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">cnt_jobs</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_task_get_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="s">"Task which is supposed to produce a single output "</span>
+ <span class="s">"somehow has more than one job."</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># The output of @split should be treated as multiple jobs</span>
+ <span class="c">#</span>
+ <span class="c"># The output of @split is always a list of lists:</span>
+ <span class="c"># 1) There is a list of @split jobs</span>
+ <span class="c"># A) For advanced (regex) @split</span>
+ <span class="c"># this is a many -> many more operation</span>
+ <span class="c"># So len(list) == many (i.e. the number of jobs</span>
+ <span class="c"># B) For normal @split</span>
+ <span class="c"># this is a 1 -> many operation</span>
+ <span class="c"># So len(list) = 1</span>
+ <span class="c">#</span>
+ <span class="c"># 2) The output of each @split job is a list</span>
+ <span class="c"># The items in this list of lists are each a job in subsequent tasks</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># So we need to concatenate these separate lists into a single list of output</span>
+ <span class="c">#</span>
+ <span class="c"># For example:</span>
+ <span class="c"># @split(["a.1", "b.1"], regex(r"(.)\.1"), r"\1.*.2")</span>
+ <span class="c"># def example(input, output):</span>
+ <span class="c"># # JOB 1</span>
+ <span class="c"># # a.1 -> a.i.2</span>
+ <span class="c"># # -> a.j.2</span>
+ <span class="c">#</span>
+ <span class="c"># # JOB 2</span>
+ <span class="c"># # b.1 -> b.i.2</span>
+ <span class="c"># # -> b.j.2</span>
+ <span class="c">#</span>
+ <span class="c"># output_filenames = [ [a.i.2, a.j.2], [b.i.2, b.j.2] ]</span>
+ <span class="c">#</span>
+ <span class="c"># we want [ a.i.2, a.j.2, b.i.2, b.j.2 ]</span>
+ <span class="c">#</span>
+ <span class="c"># This also works for simple @split</span>
+ <span class="c">#</span>
+ <span class="c"># @split("a.1", r"a.*.2")</span>
+ <span class="c"># def example(input, output):</span>
+ <span class="c"># # only job</span>
+ <span class="c"># # a.1 -> a.i.2</span>
+ <span class="c"># # -> a.j.2</span>
+ <span class="c">#</span>
+ <span class="c"># output_filenames = [ [a.i.2, a.j.2] ]</span>
+ <span class="c">#</span>
+ <span class="c"># we want [ a.i.2, a.j.2 ]</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="nb">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_ [...]
+
+
+ <span class="k">if</span> <span class="n">flattened</span><span class="p">:</span>
+ <span class="c"># if single file name, return that</span>
+ <span class="c"># accepts unicode</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">do_not_expand_single_job_tasks</span> <span class="ow">and</span>
+ <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">)</span> <span class="ow">and</span>
+ <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">basestring</span><span class="p">)):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span>
+ <span class="c"># if it is flattened, might as well sort it</span>
+ <span class="k">return</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">get_strings_in_nested_sequence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">))</span>
+
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c"># special handling for jobs which have a single task,</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">do_not_expand_single_job_tasks</span> <span class="ow">and</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="ow">and</span>
+ <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">)</span> <span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+
+ <span class="c">#</span>
+ <span class="c"># sort by jobs so it is just a weeny little bit less deterministic</span>
+ <span class="c">#</span>
+ <span class="k">return</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">)</span>
+
+
+
+ <span class="c">#_____________________________________________________________________________________</span>
+
+ <span class="c"># completed</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c">#_____________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">completed</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">jobs_uptodate</span> <span class="o">=</span> <span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> called even when all jobs are up to date</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_active</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Inactive Task = "</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">())</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">return</span>
+
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">posttask_functions</span><span class="p">:</span>
+ <span class="n">f</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">jobs_uptodate</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Uptodate Task = "</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">())</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Completed Task = "</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">())</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># indeterminate output. Check actual output again if someother tasks job function depend on it</span>
+ <span class="c"># used for @split</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="bp">None</span>
+
+
+
+
+
+
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># handle_tasks_globs_in_inputs</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_params</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Helper function for tasks which</span>
+<span class="sd"> 1) Notes globs and tasks</span>
+<span class="sd"> 2) Replaces tasks names and functions with actual tasks</span>
+<span class="sd"> 3) Adds task dependencies automatically via task_follows</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># get list of function/function names and globs</span>
+ <span class="c">#</span>
+ <span class="n">function_or_func_names</span><span class="p">,</span> <span class="n">globs</span><span class="p">,</span> <span class="n">runtime_data_names</span> <span class="o">=</span> <span class="n">get_nested_tasks_or_globs</span><span class="p">(</span><span class="n">input_params</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">tasks</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_follows</span><span class="p">(</span><span class="n">function_or_func_names</span><span class="p">)</span>
+ <span class="n">functions_to_tasks</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">function_or_func_names</span><span class="p">,</span> <span class="n">tasks</span><span class="p">))</span>
+ <span class="n">input_params</span> <span class="o">=</span> <span class="n">replace_func_names_with_tasks</span><span class="p">(</span><span class="n">input_params</span><span class="p">,</span> <span class="n">functions_to_tasks</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">t_params_tasks_globs_run_time_data</span><span class="p">(</span><span class="n">input_params</span><span class="p">,</span> <span class="n">tasks</span><span class="p">,</span> <span class="n">globs</span><span class="p">,</span> <span class="n">runtime_data_names</span><span class="p">)</span>
+
+
+
+
+
+ <span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+ <span class="c"># task handlers</span>
+
+ <span class="c"># sets</span>
+ <span class="c"># 1) action_type</span>
+ <span class="c"># 2) param_generator_func</span>
+ <span class="c"># 3) needs_update_func</span>
+ <span class="c"># 4) job wrapper</span>
+
+
+ <span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># do_task_subdivide</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">do_task_subdivide</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> @subdivide and @split are synonyms</span>
+<span class="sd"> Common code here</span>
+<span class="sd"> """</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+ <span class="c"># allows split to take a single file or task</span>
+ <span class="n">input_files_task_globs</span><span class="o">.</span><span class="n">single_file_to_list</span><span class="p">()</span>
+
+ <span class="c"># how to transform input to output file name</span>
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">choose_file_names_transform</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">error_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="n">orig_args</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+
+ <span class="c"># inputs can also be defined by pattern match</span>
+ <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</ [...]
+
+ <span class="c">#</span>
+ <span class="c"># output globs will be replaced with files. But there should not be tasks here!</span>
+ <span class="c">#</span>
+ <span class="n">output_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">output_pattern</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">output_files_task_globs</span><span class="o">.</span><span class="n">tasks</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> cannot output to another task. "</span>
+ <span class="s">"Do not include tasks in output parameters."</span><span class="p">)</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">subdivide_param_factory</span> <span class="p">(</span> <span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_inputs</span><span class="p">,</span>
+ <span class="n">replace_inputs</span><span class="p">,</span>
+ <span class="n">output_files_task_globs</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="c">#self.job_descriptor = io_files_job_descriptor # (orig_args[0], output_runtime_data_names)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_one_to_many_job_descriptor</span>
+
+ <span class="c"># output is a glob</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_split</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">do_task_simple_split</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">):</span>
+
+ <span class="c">#check enough arguments</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace output globs with files</span>
+ <span class="c">#</span>
+ <span class="n">output_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">output_files_task_globs</span><span class="o">.</span><span class="n">tasks</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> cannot output to another task. "</span>
+ <span class="s">"Do not include tasks in output parameters."</span><span class="p">)</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="n">extra_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">split_param_factory</span> <span class="p">(</span><span class="n">input_files_task_globs</span><span class="p">,</span> <span class="n">output_files_task_globs</span><span class="p">,</span> <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="c">#self.job_descriptor = io_files_job_descriptor# (orig_args[1], output_runtime_data_names)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_one_to_many_job_descriptor</span>
+
+ <span class="c"># output is a glob</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">one_to_many</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_split</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_split</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits a single set of input files into multiple output file names,</span>
+<span class="sd"> where the number of output files may not be known beforehand.</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@split"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_split</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_split</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># This is actually @subdivide</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">regex</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">do_task_subdivide</span><span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># This is actually @split</span>
+ <span class="c">#</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">do_task_simple_split</span><span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">)</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_originate</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_originate</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits out multiple output file names,</span>
+<span class="sd"> where the number of output files may or may not be known beforehand.</span>
+<span class="sd"> This is a synonym for @split(None,...)</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@originate"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_originate</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_originate</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"</span><span class="si">%s</span><span class="s"> takes a single argument"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="n">output_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+
+ <span class="c"># make sure output_params is a list.</span>
+ <span class="c"># Each of these will be called as an output</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">non_str_sequence</span> <span class="p">(</span><span class="n">output_params</span><span class="p">):</span>
+ <span class="n">output_params</span> <span class="o">=</span> <span class="p">[</span><span class="n">output_params</span><span class="p">]</span>
+
+ <span class="c">#</span>
+ <span class="c"># output globs will be replaced with files. But there should not be tasks here!</span>
+ <span class="c">#</span>
+ <span class="n">list_output_files_task_globs</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">oo</span><span class="p">)</span> <span class="k">for</span> <span class="n">oo</span> <span class="ow">in</span> <span class="n">output_params</span><span class="p">]</span>
+ <span class="k">for</span> <span class="n">oftg</span> <span class="ow">in</span> <span class="n">list_output_files_task_globs</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">oftg</span><span class="o">.</span><span class="n">tasks</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> cannot output to another task. "</span>
+ <span class="s">"Do not include tasks in output parameters."</span><span class="p">)</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">originate_param_factory</span> <span class="p">(</span><span class="n">list_output_files_task_globs</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">:])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_output_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_one_to_many_job_descriptor</span>
+
+ <span class="c"># output is not a glob</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">indeterminate_output</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+
+
+
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_subdivide</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_subdivide</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits a single set of input files into multiple output file names,</span>
+<span class="sd"> where the number of output files may not be known beforehand.</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@subdivide"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_subdivide</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_subdivide</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">do_task_subdivide</span><span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">)</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># get_extra_inputs</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> shared code for subdivide, transform, product etc for parsing orig_args into</span>
+<span class="sd"> add_inputs/inputs, output, extra</span>
+<span class="sd"> """</span>
+
+ <span class="c">#</span>
+ <span class="c"># inputs can also be defined by pattern match</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">inputs</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_task_transform_inputs_multiple_args</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="s">"inputs(...) expects only a single argument. "</span>
+ <span class="s">"This can be, for example, a file name, "</span>
+ <span class="s">"a regular expression pattern, or any "</span>
+ <span class="s">"nested structure. If the intention was to "</span>
+ <span class="s">"specify a tuple as the input parameter, "</span>
+ <span class="s">"please wrap the elements of the tuple "</span>
+ <span class="s">"in brackets in the decorator</span><span class="se">\n\n</span><span class="s">"</span>
+ <span class="s">"</span><span class="si">%s</span><span class="s">(..., inputs(...), ...)</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">decorator_name</span><span class="p">))</span>
+ <span class="n">replace_inputs</span> <span class="o">=</span> <span class="n">t_extra_inputs</span><span class="o">.</span><span class="n">REPLACE_INPUTS</span>
+ <span class="n">extra_inputs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="n">output_pattern</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="n">extra_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">add_inputs</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+ <span class="n">replace_inputs</span> <span class="o">=</span> <span class="n">t_extra_inputs</span><span class="o">.</span><span class="n">ADD_TO_INPUTS</span>
+ <span class="n">extra_inputs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">)</span>
+ <span class="n">output_pattern</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="n">extra_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">replace_inputs</span> <span class="o">=</span> <span class="n">t_extra_inputs</span><span class="o">.</span><span class="n">KEEP_INPUTS</span>
+ <span class="n">extra_inputs</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">output_pattern</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">extra_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
+
+ <span class="k">return</span> <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># choose_file_names_transform</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">choose_file_names_transform</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">valid_tags</span> <span class="o">=</span> <span class="p">(</span><span class="n">regex</span><span class="p">,</span> <span [...]
+ <span class="sd">"""</span>
+<span class="sd"> shared code for subdivide, transform, product etc for choosing method for transform input file to output files</span>
+<span class="sd"> """</span>
+ <span class="n">valid_tag_names</span> <span class="o">=</span> <span class="p">[];</span>
+ <span class="c"># regular expression match</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">regex</span> <span class="ow">in</span> <span class="n">valid_tags</span><span class="p">):</span>
+ <span class="n">valid_tag_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">"regex()"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">regex</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">t_regex_file_names_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="c"># simulate end of string (suffix) match</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">suffix</span> <span class="ow">in</span> <span class="n">valid_tags</span><span class="p">):</span>
+ <span class="n">valid_tag_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">"suffix()"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">suffix</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">t_suffix_file_names_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="c"># new style string.format()</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">formatter</span> <span class="ow">in</span> <span class="n">valid_tags</span><span class="p">):</span>
+ <span class="n">valid_tag_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">"formatter()"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">formatter</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">t_formatter_file_names_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name_transform_tag</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"</span><span class="si">%s</span><span class="s"> expects one of </span><span class="si">%s</span><span class="s"> as the second argument"</span> <span class="o">%</span> <span class="p">(</span><span class="n">decorator_name</span><span class="p">,</span> <span class="s">", "</span><span class="o">.</span> [...]
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_product</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_product</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> all versus all</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@product"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_product</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># get all pairs of tasks / globs and formatter()</span>
+ <span class="c">#</span>
+ <span class="n">list_input_files_task_globs</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">list_formatter</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">formatter</span><span class="p">):</span>
+ <span class="n">list_input_files_task_globs</span> <span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="n">list_formatter</span> <span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+ <span class="n">orig_args</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">break</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">list_formatter</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_task_product</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"@product expects formatter() as the second argument"</span><span class="p">)</span>
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_product</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">list_input_files_task_globs</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">ii</span><span class="p">)</span> <span class="k">for</span> <span class="n">ii</span> <span class="ow">in</span> <span class="n">list_input_files_task_globs</span><span class="p">]</span>
+
+
+ <span class="c"># list of new style string.format()</span>
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="n">t_nested_formatter_file_names_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_formatter</span><span class="p">,</span> <span class="n">error_task_product</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># inputs can also be defined by pattern match</span>
+ <span class="c">#</span>
+ <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</ [...]
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">product_param_factory</span> <span class="p">(</span> <span class="n">list_input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_inputs</span><span class="p">,</span>
+ <span class="n">replace_inputs</span><span class="p">,</span>
+ <span class="n">output_pattern</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_combinatorics</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_combinatorics</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">,</span> <span class="n">combinatorics_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Common code for task_permutations, task_combinations_with_replacement, task_combinations</span>
+<span class="sd"> """</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">4</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">formatter</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_task_product</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"</span><span class="si">%s</span><span class="s"> expects formatter() as the second argument"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+ <span class="n">k_tuple</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
+
+ <span class="c"># how to transform input to output file name: len(k-tuples) of (identical) formatters</span>
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="n">t_nested_formatter_file_names_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="p">[</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">]]</span> <span class="o">*</span> <span class="n">k_tuple</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</span> <span c [...]
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_permutations</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="nb">int</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_task_product</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"</span><span class="si">%s</span><span class="s"> expects an integer number as the third argument specifying the number of elements in each tuple."</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+ <span class="n">orig_args</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">3</span><span class="p">:]</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># inputs can also be defined by pattern match</span>
+ <span class="c">#</span>
+ <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</ [...]
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">combinatorics_param_factory</span> <span class="p">(</span> <span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="n">combinatorics_type</span><span class="p">,</span>
+ <span class="n">k_tuple</span><span class="p">,</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_inputs</span><span class="p">,</span>
+ <span class="n">replace_inputs</span><span class="p">,</span>
+ <span class="n">output_pattern</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_permutations</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_permutations</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> k-permutations of n</span>
+
+<span class="sd"> k-length tuples, all possible orderings, no self vs self</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@permutations"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_permutations</span>
+ <span class="n">combinatorics_type</span> <span class="o">=</span> <span class="n">t_combinatorics_type</span><span class="o">.</span><span class="n">COMBINATORICS_PERMUTATIONS</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_combinatorics</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">combinatorics_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">)</span>
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_combinations</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_combinations</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> k-length tuples</span>
+<span class="sd"> Single (sorted) ordering, i.e. AB is the same as BA,</span>
+<span class="sd"> No repeats. No AA, BB</span>
+
+<span class="sd"> E.g.</span>
+<span class="sd"> combinations("ABCD", 3) = ['ABC', 'ABD', 'ACD', 'BCD']</span>
+<span class="sd"> combinations("ABCD", 2) = ['AB', 'AC', 'AD', 'BC', 'BD', 'CD']</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@combinations"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_combinations</span>
+ <span class="n">combinatorics_type</span> <span class="o">=</span> <span class="n">t_combinatorics_type</span><span class="o">.</span><span class="n">COMBINATORICS_COMBINATIONS</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_combinatorics</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">combinatorics_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">)</span>
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_combinations_with_replacement</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_combinations_with_replacement</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> k-length tuples</span>
+<span class="sd"> Single (sorted) ordering, i.e. AB is the same as BA,</span>
+<span class="sd"> Repeats. AA, BB, AAC etc.</span>
+
+<span class="sd"> E.g.</span>
+<span class="sd"> combinations_with_replacement("ABCD", 3) = ['AAA', 'AAB', 'AAC', 'AAD',</span>
+<span class="sd"> 'ABB', 'ABC', 'ABD',</span>
+<span class="sd"> 'ACC', 'ACD',</span>
+<span class="sd"> 'ADD',</span>
+<span class="sd"> 'BBB', 'BBC', 'BBD',</span>
+<span class="sd"> 'BCC', 'BCD',</span>
+<span class="sd"> 'BDD',</span>
+<span class="sd"> 'CCC', 'CCD',</span>
+<span class="sd"> 'CDD',</span>
+<span class="sd"> 'DDD']</span>
+<span class="sd"> combinations_with_replacement("ABCD", 2) = ['AA', 'AB', 'AC', 'AD',</span>
+<span class="sd"> 'BB', 'BC', 'BD',</span>
+<span class="sd"> 'CC', 'CD',</span>
+<span class="sd"> 'DD']</span>
+
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@combinations_with_replacement"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_combinations_with_replacement</span>
+ <span class="n">combinatorics_type</span> <span class="o">=</span> <span class="n">t_combinatorics_type</span><span class="o">.</span><span class="n">COMBINATORICS_COMBINATIONS_WITH_REPLACEMENT</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_combinatorics</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">combinatorics_type</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="n">error_type</span><span class="p">)</span>
+
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_transform</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_transform</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Merges multiple input files into a single output.</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@transform"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_transform</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_transform</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+
+ <span class="c">#_________________________________________________________________________________</span>
+ <span class="c">#</span>
+ <span class="c"># single_job_single_output is bad policy. Can we remove it?</span>
+ <span class="c"># What does this actually mean in Ruffus semantics?</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># allows transform to take a single file or task</span>
+ <span class="k">if</span> <span class="n">input_files_task_globs</span><span class="o">.</span><span class="n">single_file_to_list</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">single_job_single_output</span>
+
+ <span class="c">#</span>
+ <span class="c"># whether transform generates a list of jobs or not will depend on the parent task</span>
+ <span class="c">#</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">input_files_task_globs</span><span class="o">.</span><span class="n">params</span><span class="p">,</span> <span class="n">_task</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="n">input_files_task_globs</span><span class="o">.</span><span class="n">params</span>
+
+ <span class="c">#_________________________________________________________________________________</span>
+
+ <span class="c"># how to transform input to output file name</span>
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">choose_file_names_transform</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">error_task_transform</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="n">orig_args</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># inputs can also be defined by pattern match</span>
+ <span class="c">#</span>
+ <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</ [...]
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">transform_param_factory</span> <span class="p">(</span> <span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_inputs</span><span class="p">,</span>
+ <span class="n">replace_inputs</span><span class="p">,</span>
+ <span class="n">output_pattern</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_collate</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_collate</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Merges multiple input files into a single output.</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"@collate"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_collate</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_collate</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+
+ <span class="c"># how to transform input to output file name</span>
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">choose_file_names_transform</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">error_task_collate</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">,</span> <span class="p">(</span><span class="n">regex</span>< [...]
+
+ <span class="n">orig_args</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+
+ <span class="c">#</span>
+ <span class="c"># inputs also defined by pattern match</span>
+ <span class="c">#</span>
+ <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p">,</ [...]
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">collate_param_factory</span> <span class="p">(</span> <span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_inputs</span><span class="p">,</span>
+ <span class="n">replace_inputs</span><span class="p">,</span>
+ <span class="n">output_pattern</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_merge</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_merge</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Merges multiple input files into a single output.</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># check enough arguments</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_task_merge</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for @merge"</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_merge</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+ <span class="n">extra_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">merge_param_factory</span> <span class="p">(</span><span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+
+
+<span class="c"># self._single_job_single_output = self.multiple_jobs_outputs</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">single_job_single_output</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_one</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_parallel</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_parallel</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> calls user function in parallel</span>
+<span class="sd"> with either each of a list of parameters</span>
+<span class="sd"> or using parameters generated by a custom function</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_parallel</span><span class="p">)</span>
+
+ <span class="c"># unmodified from __init__</span>
+ <span class="c">#</span>
+ <span class="c"># self.needs_update_func = None</span>
+ <span class="c"># self.job_wrapper = job_wrapper_generic</span>
+ <span class="c"># self.job_descriptor = io_files_job_descriptor</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_task_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for @parallel"</span><span class="p">)</span>
+
+ <span class="c"># Use parameters generated by a custom function</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p"> [...]
+ <span class="c">#if len(orig_args) == 1 and type(orig_args[0]) == types.FunctionType:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">args_param_factory</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]())</span>
+
+ <span class="c"># list of params</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="c"># single jobs</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">([</span><span class="n">orig_args</span><span class="p">])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">single_job_single_output</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c"># multiple jobs with input/output parameters etc.</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="n">check_parallel_parameters</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">error_task_parallel</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">args_param_factory</span> <span class="p">(</span><span class="n">params</span><span class="p">)</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_files</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_files</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> calls user function in parallel</span>
+<span class="sd"> with either each of a list of parameters</span>
+<span class="sd"> or using parameters generated by a custom function</span>
+
+<span class="sd"> In the parameter list,</span>
+<span class="sd"> The first two items of each set of parameters must</span>
+<span class="sd"> be input/output files or lists of files or Null</span>
+<span class="sd"> """</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_task_files</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for @files"</span><span class="p">)</span>
+
+ <span class="c"># Use parameters generated by a custom function</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p"> [...]
+ <span class="c">#if len(orig_args) == 1 and type(orig_args[0]) == types.FunctionType:</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_files_func</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">files_custom_generator_param_factory</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+ <span class="c"># assume</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="c"># Use parameters in supplied list</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_files</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+
+ <span class="c"># single jobs</span>
+ <span class="c"># This is true even if the previous task has multiple output</span>
+ <span class="c"># These will all be joined together at the hip (like @merge)</span>
+ <span class="c"># If you want different behavior, use @transform</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">([</span><span class="n">orig_args</span><span class="p">])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">single_job_single_output</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">one_to_one</span>
+
+
+ <span class="k">else</span><span class="p">:</span>
+
+ <span class="c"># multiple jobs with input/output parameters etc.</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">multiple_jobs_outputs</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="n">check_files_io_parameters</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">error_task_files</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># get list of function/function names and globs for all job params</span>
+ <span class="c">#</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_patterns</span> <span class="o">=</span> <span class="p">[</span><span class="n">j</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="n">params</span><span class="p">]</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">input_patterns</span><span class="p">)</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># extra params</span>
+ <span class="c">#</span>
+ <span class="n">output_extra_params</span> <span class="o">=</span> <span class="p">[</span><span class="nb">tuple</span><span class="p">(</span><span class="n">j</span><span class="p">[</span><span class="mi">1</span><span class="p">:])</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="n">params</span><span class="p">]</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">files_param_factory</span> <span class="p">(</span><span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="bp">True</span><span class="p">,</span> <span class="c"># do_not_expand_single_job_tasks</span>
+ <span class="n">output_extra_params</span><span class="p">)</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_files_re</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_files_re</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">old_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> calls user function in parallel</span>
+<span class="sd"> with input_files, output_files, parameters</span>
+<span class="sd"> These needed to be generated on the fly by</span>
+<span class="sd"> getting all file names in the supplied list/glob pattern</span>
+<span class="sd"> There are two variations:</span>
+
+<span class="sd"> 1) inputfiles = all files in glob which match the regular expression</span>
+<span class="sd"> outputfile = generated from the replacement string</span>
+
+<span class="sd"> 2) inputfiles = all files in glob which match the regular expression and</span>
+<span class="sd"> generated from the "from" replacement string</span>
+<span class="sd"> outputfiles = all files in glob which match the regular expression and</span>
+<span class="sd"> generated from the "to" replacement string</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># check enough arguments</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">old_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_task_files_re</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for @files_re"</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_task_files_re</span><span class="p">)</span>
+
+ <span class="c"># check if parameters wrapped in combine</span>
+ <span class="n">combining_all_jobs</span><span class="p">,</span> <span class="n">orig_args</span> <span class="o">=</span> <span class="n">is_file_re_combining</span><span class="p">(</span><span class="n">old_args</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="n">t_regex_file_names_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> <span class="n">error_task_files_re</span><span class="p">,</span> <span class="s">"@files_re"</span><span class="p"> [...]
+
+
+ <span class="c"># if the input file term is missing, just use the original</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="n">extra_input_files_task_globs</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">output_and_extras</span> <span class="o">=</span> <span class="p">[</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">]]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">extra_input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span>
+ <span class="n">output_and_extras</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">3</span><span class="p">:]</span>
+
+
+ <span class="k">if</span> <span class="n">combining_all_jobs</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">collate_param_factory</span> <span class="p">(</span><span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_input_files_task_globs</span><span class="p">,</span>
+ <span class="n">t_extra_inputs</span><span class="o">.</span><span class="n">REPLACE_INPUTS</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">output_and_extras</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">transform_param_factory</span> <span class="p">(</span><span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_input_files_task_globs</span><span class="p">,</span>
+ <span class="n">t_extra_inputs</span><span class="o">.</span><span class="n">REPLACE_INPUTS</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">output_and_extras</span><span class="p">)</span>
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_modify_time</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_io_files</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">io_files_job_descriptor</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_mkdir</span>
+
+ <span class="c"># only called within task_follows</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_mkdir</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cnt_task_mkdir</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="c"># give unique name to this instance of mkdir</span>
+ <span class="n">unique_name</span> <span class="o">=</span> <span class="s">r"(mkdir </span><span class="si">%d</span><span class="s">) before "</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">cnt_task_mkdir</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_name</span>
+ <span class="n">new_node</span> <span class="o">=</span> <span class="n">_task</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_module_name</span><span class="p">,</span> <span class="n">unique_name</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">add_child</span><span class="p">(</span><span class="n">new_node</span><span class="p">)</span>
+ <span class="n">new_node</span><span class="o">.</span><span class="n">do_task_mkdir</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span>
+ <span class="n">new_node</span><span class="o">.</span><span class="n">display_name</span> <span class="o">=</span> <span class="n">new_node</span><span class="o">.</span><span class="n">_description</span>
+
+
+ <span class="k">def</span> <span class="nf">do_task_mkdir</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> list of directory names or a single argument which is aa list of directory names</span>
+<span class="sd"> Creates directory if missing</span>
+<span class="sd"> """</span>
+ <span class="n">decorator_name</span> <span class="o">=</span> <span class="s">"mkdir"</span>
+ <span class="n">error_type</span> <span class="o">=</span> <span class="n">error_task_mkdir</span>
+
+ <span class="c"># jump through hoops</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_action_type</span> <span class="p">(</span><span class="n">_task</span><span class="o">.</span><span class="n">action_mkdir</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="ow">or</span> <span class="n">needs_update_check_directory_missing</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_description</span> <span class="o">=</span> <span class="s">"Make directories </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">shorten_filenames_encoder</span><span class="p">(</span><span class="n">orig_args</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span> <span class="o">=</span> <span class="n">job_wrapper_mkdir</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_descriptor</span> <span class="o">=</span> <span class="n">mkdir_job_descriptor</span>
+
+ <span class="c"># doesn't have a real function</span>
+ <span class="c"># use job_wrapper just so it is not None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">user_defined_work_func</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">job_wrapper</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># @transform like behaviour with regex / suffix or formatter</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="p">(</span><span class="n">formatter</span><span class="p">,</span> <span class="n">suffix [...]
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">many_to_many</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too few arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># replace function / function names with tasks</span>
+ <span class="c">#</span>
+ <span class="n">input_files_task_globs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_tasks_globs_in_inputs</span><span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+
+
+ <span class="c"># how to transform input to output file name</span>
+ <span class="n">file_names_transform</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">choose_file_names_transform</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">error_task_transform</span><span class="p">,</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+ <span class="n">orig_args</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
+
+ <span class="c">#</span>
+ <span class="c"># inputs can also be defined by pattern match</span>
+ <span class="c">#</span>
+ <span class="n">extra_inputs</span><span class="p">,</span> <span class="n">replace_inputs</span><span class="p">,</span> <span class="n">output_pattern</span><span class="p">,</span> <span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_extra_inputs_outputs_extra</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">,</span> <span class="n">error_type</span><span class="p" [...]
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">extra_params</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">error_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">"Too many arguments for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">decorator_name</span><span class="p">)</span>
+
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">transform_param_factory</span> <span class="p">(</span> <span class="n">input_files_task_globs</span><span class="p">,</span>
+ <span class="bp">False</span><span class="p">,</span> <span class="c"># flatten input</span>
+ <span class="n">file_names_transform</span><span class="p">,</span>
+ <span class="n">extra_inputs</span><span class="p">,</span>
+ <span class="n">replace_inputs</span><span class="p">,</span>
+ <span class="n">output_pattern</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">extra_params</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># simple behaviour: just make directories in list of strings</span>
+ <span class="c">#</span>
+ <span class="c"># the mkdir decorator accepts one string, multiple strings or a list of strings</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">single_multi_io</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">one_to_one</span>
+
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># if a single argument collection of parameters, keep that as is</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">mkdir_params</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">orig_args</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">mkdir_params</span> <span class="o">=</span> <span class="n">orig_args</span>
+ <span class="c"># len(orig_args) == 1: unpack orig_args[0]</span>
+ <span class="k">elif</span> <span class="n">non_str_sequence</span> <span class="p">(</span><span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span>
+ <span class="n">mkdir_params</span> <span class="o">=</span> <span class="n">orig_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="c"># single string or other non collection types</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">mkdir_params</span> <span class="o">=</span> <span class="n">orig_args</span>
+
+ <span class="c"># all directories created in one job to reduce race conditions</span>
+ <span class="c"># so we are converting [a,b,c] into [ [(a, b,c)] ]</span>
+ <span class="c"># where orig_args = (a,b,c)</span>
+ <span class="c"># i.e. one job whose solitory argument is a tuple/list of directory names</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">=</span> <span class="n">args_param_factory</span><span class="p">([[</span><span class="nb">sorted</span><span class="p">(</span><span class="n">mkdir_params</span><span class="p">)]])</span>
+
+
+
+
+
+
+
+
+ <span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+ <span class="c"># Other task handlers</span>
+
+
+
+ <span class="c">#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_follows</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_follows</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Saved decorator arguments should be:</span>
+<span class="sd"> (string/task,...)</span>
+<span class="sd"> """</span>
+ <span class="n">new_tasks</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">:</span>
+ <span class="c">#</span>
+ <span class="c"># specified by string: unicode or otherwise</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">):</span>
+ <span class="c"># string looks up to defined task, use that</span>
+ <span class="k">if</span> <span class="n">node</span><span class="o">.</span><span class="n">is_node</span><span class="p">(</span><span class="n">arg</span><span class="p">):</span>
+ <span class="n">arg</span> <span class="o">=</span> <span class="n">node</span><span class="o">.</span><span class="n">lookup_node_from_name</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span>
+ <span class="c"># string looks up to defined task in main module, use that</span>
+ <span class="k">elif</span> <span class="n">node</span><span class="o">.</span><span class="n">is_node</span><span class="p">(</span><span class="s">"__main__."</span> <span class="o">+</span> <span class="n">arg</span><span class="p">):</span>
+ <span class="n">arg</span> <span class="o">=</span> <span class="n">node</span><span class="o">.</span><span class="n">lookup_node_from_name</span><span class="p">(</span><span class="s">"__main__."</span> <span class="o">+</span> <span class="n">arg</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># string does not look up to defined task: defer</span>
+ <span class="c">#</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c"># no module: use same module as current task</span>
+ <span class="n">names</span> <span class="o">=</span> <span class="n">arg</span><span class="o">.</span><span class="n">rsplit</span><span class="p">(</span><span class="s">"."</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">names</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">arg</span> <span class="o">=</span> <span class="n">_task</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_module_name</span><span class="p">,</span> <span class="n">arg</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">arg</span> <span class="o">=</span> <span class="n">_task</span><span class="p">(</span><span class="o">*</span><span class="n">names</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># add dependency</span>
+ <span class="c"># duplicate dependencies are ignore automatically</span>
+ <span class="c">#</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">add_child</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span>
+ <span class="n">new_tasks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># for mkdir, automatically generate task with unique name</span>
+ <span class="c">#</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">mkdir</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cnt_task_mkdir</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="c"># give unique name to this instance of mkdir</span>
+ <span class="n">unique_name</span> <span class="o">=</span> <span class="s">r"(mkdir </span><span class="si">%d</span><span class="s">) before "</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">cnt_task_mkdir</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_name</span>
+ <span class="n">new_node</span> <span class="o">=</span> <span class="n">_task</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_module_name</span><span class="p">,</span> <span class="n">unique_name</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">add_child</span><span class="p">(</span><span class="n">new_node</span><span class="p">)</span>
+ <span class="n">new_node</span><span class="o">.</span><span class="n">do_task_mkdir</span><span class="p">(</span><span class="n">arg</span><span class="o">.</span><span class="n">args</span><span class="p">)</span>
+ <span class="n">new_node</span><span class="o">.</span><span class="n">display_name</span> <span class="o">=</span> <span class="n">new_node</span><span class="o">.</span><span class="n">_description</span>
+ <span class="n">new_tasks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">new_node</span><span class="p">)</span>
+
+
+
+
+ <span class="c">#</span>
+ <span class="c"># Is this a function?</span>
+ <span class="c"># Turn this function into a task</span>
+ <span class="c"># (add task as attribute of this function)</span>
+ <span class="c"># Add self as dependent</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c">#if type(arg) != types.FunctionType:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p">):</span>
+
+ <span class="k">raise</span> <span class="n">error_decorator_args</span><span class="p">(</span><span class="s">"Dependencies must be functions or function names in "</span> <span class="o">+</span>
+ <span class="s">"@task_follows </span><span class="si">%s</span><span class="s">:</span><span class="se">\n</span><span class="s">[</span><span class="si">%s</span><span class="s">]"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_name</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">arg</span><span class="p">)))</span>
+
+ <span class="c"># add task as attribute of this function</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="s">"pipeline_task"</span><span class="p">):</span>
+ <span class="n">arg</span><span class="o">.</span><span class="n">pipeline_task</span> <span class="o">=</span> <span class="n">_task</span><span class="o">.</span><span class="n">create_task</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">add_child</span><span class="p">(</span><span class="n">arg</span><span class="o">.</span><span class="n">pipeline_task</span><span class="p">)</span>
+ <span class="n">new_tasks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arg</span><span class="o">.</span><span class="n">pipeline_task</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">new_tasks</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_check_if_uptodate</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_check_if_uptodate</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Saved decorator arguments should be:</span>
+<span class="sd"> a function which takes the appropriate number of arguments for each job</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">1</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span [...]
+ <span class="c">#if len(args) != 1 or type(args[0]) != types.FunctionType:</span>
+ <span class="k">raise</span> <span class="n">error_decorator_args</span><span class="p">(</span><span class="s">"Expecting a single function in "</span> <span class="o">+</span>
+ <span class="s">"@task_check_if_uptodate </span><span class="si">%s</span><span class="s">:</span><span class="se">\n</span><span class="s">[</span><span class="si">%s</span><span class="s">]"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_name</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">args</span><span class="p">)))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">=</span> <span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_posttask</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_posttask</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Saved decorator arguments should be:</span>
+<span class="sd"> one or more functions which will be called if the task completes</span>
+<span class="sd"> """</span>
+ <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">touch_file</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">posttask_functions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">touch_file_factory</span> <span class="p">(</span><span class="n">arg</span><span class="o">.</span><span class="n">args</span><span class="p">,</span> <span class="n">register_cleanup</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p">):</span>
+ <span class="c">#elif type(arg) == types.FunctionType:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">posttask_functions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">PostTaskArgumentError</span><span class="p">(</span><span class="s">"Expecting simple functions or touch_file in "</span> <span class="o">+</span>
+ <span class="s">"@posttask(...)</span><span class="se">\n</span><span class="s"> Task = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_name</span><span class="p">))</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_jobs_limit</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_jobs_limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Limit the number of concurrent jobs</span>
+<span class="sd"> """</span>
+ <span class="n">maximum_jobs</span><span class="p">,</span> <span class="n">name</span> <span class="o">=</span> <span class="p">(</span><span class="n">args</span> <span class="o">+</span> <span class="p">(</span><span class="bp">None</span><span class="p">,))[</span><span class="mi">0</span><span class="p">:</span><span class="mi">2</span><span class="p">]</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">maximum_jobs_num</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">maximum_jobs</span><span class="p">)</span>
+ <span class="k">assert</span><span class="p">(</span><span class="n">maximum_jobs_num</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">limit_name</span> <span class="o">=</span> <span class="s">", "</span> <span class="o">+</span> <span class="n">name</span> <span class="k">if</span> <span class="n">name</span> <span class="k">else</span> <span class="s">""</span>
+ <span class="k">raise</span> <span class="n">JobsLimitArgumentError</span><span class="p">((</span><span class="s">'In @jobs_limit(</span><span class="si">%s%s</span><span class="s">), the limit '</span>
+ <span class="s">'must be an integer number greater than or '</span>
+ <span class="s">'equal to 1'</span><span class="p">)</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">maximum_jobs_num</span><span class="p">,</span> <span class="n">limit_name</span><span class="p">))</span>
+ <span class="k">if</span> <span class="n">name</span> <span class="o">!=</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">semaphore_name</span> <span class="o">=</span> <span class="n">name</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">semaphore_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">job_limit_semaphores</span><span class="p">:</span>
+ <span class="n">curr_maximum_jobs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">job_limit_semaphores</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">semaphore_name</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">curr_maximum_jobs</span> <span class="o">!=</span> <span class="n">maximum_jobs_num</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">JobsLimitArgumentError</span><span class="p">((</span><span class="s">'@jobs_limit(</span><span class="si">%d</span><span class="s">, "</span><span class="si">%s</span><span class="s">") cannot '</span> <span class="o">+</span>
+ <span class="s">'re-defined with a different limit of </span><span class="si">%d</span><span class="s">'</span><span class="p">)</span> <span class="o">%</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">semaphore_name</span><span class="p">,</span> <span class="n">curr_maximum_jobs</span><span class="p">,</span>
+ <span class="n">maximum_jobs_num</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c">#</span>
+ <span class="c"># save semaphore and limit</span>
+ <span class="c">#</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_limit_semaphores</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">semaphore_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">maximum_jobs_num</span>
+
+
+ <span class="c">#_________________________________________________________________________________________</span>
+
+ <span class="c"># task_active_if</span>
+
+ <span class="c">#_________________________________________________________________________________________</span>
+ <span class="k">def</span> <span class="nf">task_active_if</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">active_if_checks</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> If any of active_checks is False or returns False, then the task is</span>
+<span class="sd"> marked as "inactive" and its outputs removed.</span>
+<span class="sd"> """</span>
+ <span class="c">#print 'job is active:', active_checks, [</span>
+ <span class="c"># arg() if isinstance(arg, collections.Callable) else arg</span>
+ <span class="c"># for arg in active_checks]</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">active_if_checks</span><span class="p">)</span>
+ <span class="k">print</span> <span class="bp">self</span><span class="o">.</span><span class="n">active_if_checks</span>
+
+
+<span class="k">class</span> <span class="nc">task_encoder</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">JSONEncoder</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">default</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">set</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">defaultdict</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">dict</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">_task</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">obj</span><span class="o">.</span><span class="n">_name</span> <span class="c">#, _task.action_names[obj.action_task], obj._description]</span>
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">JSONEncoder</span><span class="o">.</span><span class="n">default</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">)</span>
+
+
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Functions</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># link_task_names_to_functions</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">link_task_names_to_functions</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> Make sure all tasks in dependency list are linked to real functions</span>
+<span class="sd"> Call this before running anything else</span>
+<span class="sd"> """</span>
+
+ <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">node</span><span class="o">.</span><span class="n">_all_nodes</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">n</span><span class="o">.</span><span class="n">user_defined_work_func</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">dependent_display_task_name</span> <span class="o">=</span> <span class="n">n</span><span class="o">.</span><span class="n">_inward</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">n</span><span class="o">.</span><span class="n">_module_name</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">:</span>
+ <span class="n">module</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">[</span><span class="n">n</span><span class="o">.</span><span class="n">_module_name</span><span class="p">]</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="n">n</span><span class="o">.</span><span class="n">_func_name</span><span class="p">):</span>
+ <span class="n">n</span><span class="o">.</span><span class="n">user_defined_work_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="n">n</span><span class="o">.</span><span class="n">_func_name</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_decorator_args</span><span class="p">((</span><span class="s">"Module '</span><span class="si">%s</span><span class="s">' has no function '</span><span class="si">%s</span><span class="s">' in "</span> <span class="o">+</span>
+ <span class="s">"</span><span class="se">\n</span><span class="s">@task_follows('</span><span class="si">%s</span><span class="s">')</span><span class="se">\n</span><span class="s">def </span><span class="si">%s</span><span class="s">..."</span><span class="p">)</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">_module_name</span><span class="p">,</span> <span class="n">n</span><span class="o">.</span><span class="n">_func_name</span><span class="p">,</span> <span class="n">n</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">(),</span> <span class="n">dependent_display_task_name</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_decorator_args</span><span class="p">(</span><span class="s">"Module '</span><span class="si">%s</span><span class="s">' not found in "</span> <span class="o">+</span>
+ <span class="s">"</span><span class="se">\n</span><span class="s">@task_follows('</span><span class="si">%s</span><span class="s">')</span><span class="se">\n</span><span class="s">def </span><span class="si">%s</span><span class="s">..."</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">_module_name</span><span class="p">,</span> <span class="n">n</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">(),</span> <span class="n">dependent_display_task_name</span><span class="p">))</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># some jobs single state status mirrors parent's state</span>
+ <span class="c"># and parent task not known until know</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">_single_job_single_output</span><span class="p">,</span> <span class="n">_task</span><span class="p">):</span>
+ <span class="n">n</span><span class="o">.</span><span class="n">_single_job_single_output</span> <span class="o">=</span> <span class="n">n</span><span class="o">.</span><span class="n">_single_job_single_output</span><span class="o">.</span><span class="n">_single_job_single_output</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># update_checksum_level_on_tasks</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">update_checksum_level_on_tasks</span> <span class="p">(</span><span class="n">checksum_level</span><span class="p">):</span>
+ <span class="sd">"""Reset the checksum level for all tasks"""</span>
+ <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">node</span><span class="o">.</span><span class="n">_all_nodes</span><span class="p">:</span>
+ <span class="n">n</span><span class="o">.</span><span class="n">checksum_level</span> <span class="o">=</span> <span class="n">checksum_level</span>
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># update_active_states_for_all_tasks</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">update_active_states_for_all_tasks</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+
+<span class="sd"> @active_if decorated tasks can change their active state every time</span>
+<span class="sd"> pipeline_run / pipeline_printout / pipeline_printout_graph is called</span>
+
+<span class="sd"> update_active_states_for_all_tasks ()</span>
+
+<span class="sd"> """</span>
+ <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">node</span><span class="o">.</span><span class="n">_all_nodes</span><span class="p">:</span>
+ <span class="n">n</span><span class="o">.</span><span class="n">update_active_state</span><span class="p">()</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># task_names_to_tasks</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">task_names_to_tasks</span> <span class="p">(</span><span class="n">task_description</span><span class="p">,</span> <span class="n">task_names</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Given a list of task names, look up the corresponding tasks</span>
+<span class="sd"> Will just pass through if the task_name is already a task</span>
+<span class="sd"> """</span>
+
+ <span class="c">#</span>
+ <span class="c"># In case we are given a single item instead of a list</span>
+ <span class="c"># accepts unicode</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">task_names</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">task_names</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p">):</span>
+ <span class="c">#if isinstance(task_names, basestring) or type(task_names) == types.FunctionType:</span>
+ <span class="n">task_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">task_names</span><span class="p">]</span>
+
+ <span class="n">task_nodes</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">task_name</span> <span class="ow">in</span> <span class="n">task_names</span><span class="p">:</span>
+
+ <span class="c"># Is this already a function, don't do mapping if already is task</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">task_name</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">Callable</span><span class="p">):</span>
+ <span class="c">#if type(task_name) == types.FunctionType:</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">task_name</span><span class="p">,</span> <span class="s">"pipeline_task"</span><span class="p">):</span>
+ <span class="n">task_nodes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">task_name</span><span class="o">.</span><span class="n">pipeline_task</span><span class="p">)</span>
+ <span class="k">continue</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c"># blow up for unwrapped function</span>
+ <span class="k">raise</span> <span class="n">error_function_is_not_a_task</span><span class="p">((</span><span class="s">"Function def </span><span class="si">%s</span><span class="s">(...): is not a pipelined task in ruffus."</span> <span class="o">%</span>
+ <span class="n">task_name</span><span class="o">.</span><span class="n">__name__</span><span class="p">)</span> <span class="o">+</span>
+ <span class="s">" To include this, this function needs to have a ruffus "</span><span class="o">+</span>
+ <span class="s">"decoration like '@parallel', '@files', or named as a dependent "</span><span class="o">+</span>
+ <span class="s">"of some other Ruffus task function via '@follows'."</span><span class="p">)</span>
+
+ <span class="c"># assumes is some kind of string</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">node</span><span class="o">.</span><span class="n">is_node</span><span class="p">(</span><span class="n">task_name</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">node</span><span class="o">.</span><span class="n">is_node</span><span class="p">(</span><span class="s">"__main__."</span> <span class="o">+</span> <span class="n">task_name</span><span class="p">):</span>
+ <span class="n">task_nodes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">lookup_node_from_name</span><span class="p">(</span><span class="s">"__main__."</span> <span class="o">+</span> <span class="n">task_name</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">error_node_not_task</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> task '</span><span class="si">%s</span><span class="s">' is not a pipelined task in Ruffus. Have you mis-spelt the function name?"</span> <span class="o">%</span> <span class="p">(</span>
+ <span class="n">task_description</span><span class="p">,</span> <span class="n">task_name</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">task_nodes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">lookup_node_from_name</span><span class="p">(</span><span class="n">task_name</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">task_nodes</span>
+
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># pipeline_printout_in_dot_format</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<div class="viewcode-block" id="pipeline_printout_graph"><a class="viewcode-back" href="../../task.html#ruffus.task.pipeline_printout_graph">[docs]</a><span class="k">def</span> <span class="nf">pipeline_printout_graph</span> <span class="p">(</span><span class="n">stream</span><span class="p">,</span>
+ <span class="n">output_format</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[],</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[],</span>
+ <span class="n">draw_vertically</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">ignore_upstream_of_target</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="n">skip_uptodate_tasks</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">test_all_task_for_update</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">no_key_legend</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="n">minimal_key_legend</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">user_colour_scheme</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">pipeline_name</span> <span class="o">=</span> <span class="s">"Pipeline:"</span><span class="p">,</span>
+ <span class="n">size</span> <span class="o">=</span> <span class="p">(</span><span class="mi">11</span><span class="p">,</span><span class="mi">8</span><span class="p">),</span>
+ <span class="n">dpi</span> <span class="o">=</span> <span class="mi">120</span><span class="p">,</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">checksum_level</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">history_file</span> <span class="o">=</span> <span class="bp">None</span><span class="p">):</span>
+ <span class="c"># Remember to add further extra parameters here to "extra_pipeline_printout_graph_options" inside cmdline.py</span>
+ <span class="c"># This will forward extra parameters from the command line to pipeline_printout_graph</span>
+ <span class="sd">"""</span>
+<span class="sd"> print out pipeline dependencies in various formats</span>
+
+<span class="sd"> :param stream: where to print to</span>
+<span class="sd"> :type stream: file-like object with ``write()`` function</span>
+<span class="sd"> :param output_format: ["dot", "jpg", "svg", "ps", "png"]. All but the first depends on the `dot <http://www.graphviz.org>`_ program.</span>
+<span class="sd"> :param target_tasks: targets task functions which will be run if they are out-of-date.</span>
+<span class="sd"> :param forcedtorun_tasks: task functions which will be run whether or not they are out-of-date.</span>
+<span class="sd"> :param draw_vertically: Top to bottom instead of left to right.</span>
+<span class="sd"> :param ignore_upstream_of_target: Don't draw upstream tasks of targets.</span>
+<span class="sd"> :param skip_uptodate_tasks: Don't draw up-to-date tasks if possible.</span>
+<span class="sd"> :param gnu_make_maximal_rebuild_mode: Defaults to re-running *all* out-of-date tasks. Runs minimal</span>
+<span class="sd"> set to build targets if set to ``True``. Use with caution.</span>
+<span class="sd"> :param test_all_task_for_update: Ask all task functions if they are up-to-date.</span>
+<span class="sd"> :param no_key_legend: Don't draw key/legend for graph.</span>
+<span class="sd"> :param checksum_level: Several options for checking up-to-dateness are available: Default is level 1.</span>
+<span class="sd"> level 0 : Use only file timestamps</span>
+<span class="sd"> level 1 : above, plus timestamp of successful job completion</span>
+<span class="sd"> level 2 : above, plus a checksum of the pipeline function body</span>
+<span class="sd"> level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</span>
+<span class="sd"> """</span>
+
+
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">checksum_level</span> <span class="o">=</span> <span class="n">get_default_checksum_level</span><span class="p">()</span>
+
+ <span class="n">link_task_names_to_functions</span> <span class="p">()</span>
+ <span class="n">update_checksum_level_on_tasks</span> <span class="p">(</span><span class="n">checksum_level</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># @active_if decorated tasks can change their active state every time</span>
+ <span class="c"># pipeline_run / pipeline_printout / pipeline_printout_graph is called</span>
+ <span class="c">#</span>
+ <span class="n">update_active_states_for_all_tasks</span> <span class="p">()</span>
+
+ <span class="c">#</span>
+ <span class="c"># run time data</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">runtime_data</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"pipeline_run parameter runtime_data should be a dictionary of "</span>
+ <span class="s">"values passes to jobs at run time."</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># If we aren't using checksums, and history file hasn't been specified,</span>
+ <span class="c"># we might be a bit surprised to find Ruffus writing to a sqlite db anyway.</span>
+ <span class="c"># Let us just use a in memory db which will be thrown away</span>
+ <span class="c"># Of course, if history_file is specified, we presume you know what you are doing</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="o">==</span> <span class="n">CHECKSUM_FILE_TIMESTAMPS</span> <span class="ow">and</span> <span class="n">history_file</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">history_file</span> <span class="o">=</span> <span class="s">':memory:'</span>
+
+ <span class="c">#</span>
+ <span class="c"># load previous job history if it exists, otherwise create an empty history</span>
+ <span class="c">#</span>
+ <span class="n">job_history</span> <span class="o">=</span> <span class="n">open_job_history</span> <span class="p">(</span><span class="n">history_file</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># target jobs</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">target_tasks</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="n">forcedtorun_tasks</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="n">task_names_to_tasks</span> <span class="p">(</span><span class="s">"Target"</span><span class="p">,</span> <span class="n">target_tasks</span><span class="p">)</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="n">task_names_to_tasks</span> <span class="p">(</span><span class="s">"Forced to run"</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">)</span>
+
+
+ <span class="p">(</span><span class="n">topological_sorted</span><span class="p">,</span> <span class="n">ignore_param1</span><span class="p">,</span> <span class="n">ignore_param2</span><span class="p">,</span>
+ <span class="n">ignore_param3</span><span class="p">)</span> <span class="o">=</span> <span class="n">topologically_sorted_nodes</span><span class="p">(</span><span class="n">target_tasks</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span><span class="p">,</span>
+ <span class="n">extra_data_for_signal</span> <span class="o">=</span> <span class="p">[</span><span class="n">t_verbose_logger</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">),</span> <span class="n">job_history</span><span class="p">])</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_tasks</span><span class="p">):</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="n">topological_sorted</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">:]</span>
+
+
+
+ <span class="c"># open file if (unicode?) string</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">):</span>
+ <span class="n">stream</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+ <span class="c"># derive format automatically from name</span>
+ <span class="k">if</span> <span class="n">output_format</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">output_format</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">name</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lstrip</span><span class="p">(</span><span class="s">"."</span> [...]
+
+
+
+ <span class="n">graph_printout</span> <span class="p">(</span> <span class="n">stream</span><span class="p">,</span>
+ <span class="n">output_format</span><span class="p">,</span>
+ <span class="n">target_tasks</span><span class="p">,</span>
+ <span class="n">forcedtorun_tasks</span><span class="p">,</span>
+ <span class="n">draw_vertically</span><span class="p">,</span>
+ <span class="n">ignore_upstream_of_target</span><span class="p">,</span>
+ <span class="n">skip_uptodate_tasks</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span><span class="p">,</span>
+ <span class="n">test_all_task_for_update</span><span class="p">,</span>
+ <span class="n">no_key_legend</span><span class="p">,</span>
+ <span class="n">minimal_key_legend</span><span class="p">,</span>
+ <span class="n">user_colour_scheme</span><span class="p">,</span>
+ <span class="n">pipeline_name</span><span class="p">,</span>
+ <span class="n">size</span><span class="p">,</span>
+ <span class="n">dpi</span><span class="p">,</span>
+ <span class="n">extra_data_for_signal</span> <span class="o">=</span> <span class="p">[</span><span class="n">t_verbose_logger</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">),</span> <span class="n">job_history</span><span class="p">])</span>
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># pipeline_printout</span>
+
+<span class="c">#_________________________________________________________________________________________</span></div>
+<div class="viewcode-block" id="pipeline_printout"><a class="viewcode-back" href="../../task.html#ruffus.task.pipeline_printout">[docs]</a><span class="k">def</span> <span class="nf">pipeline_printout</span><span class="p">(</span> <span class="n">output_stream</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[],</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[],</span>
+ <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
+ <span class="n">indent</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">wrap_width</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">checksum_level</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">history_file</span> <span class="o">=</span> <span class="bp">None</span><span class="p">):</span>
+ <span class="c"># Remember to add further extra parameters here to "extra_pipeline_printout_options" inside cmdline.py</span>
+ <span class="c"># This will forward extra parameters from the command line to pipeline_printout</span>
+ <span class="sd">"""</span>
+<span class="sd"> Printouts the parts of the pipeline which will be run</span>
+
+<span class="sd"> Because the parameters of some jobs depend on the results of previous tasks, this function</span>
+<span class="sd"> produces only the current snap-shot of task jobs. In particular, tasks which generate</span>
+<span class="sd"> variable number of inputs into following tasks will not produce the full range of jobs.</span>
+
+<span class="sd"> ::</span>
+
+<span class="sd"> verbose = 0 : nothing</span>
+<span class="sd"> verbose = 1 : print task name</span>
+<span class="sd"> verbose = 2 : print task description if exists</span>
+<span class="sd"> verbose = 3 : print job names for jobs to be run</span>
+<span class="sd"> verbose = 4 : print list of up-to-date tasks and job names for jobs to be run</span>
+<span class="sd"> verbose = 5 : print job names for all jobs whether up-to-date or not</span>
+
+<span class="sd"> :param output_stream: where to print to</span>
+<span class="sd"> :type output_stream: file-like object with ``write()`` function</span>
+<span class="sd"> :param target_tasks: targets task functions which will be run if they are out-of-date</span>
+<span class="sd"> :param forcedtorun_tasks: task functions which will be run whether or not they are out-of-date</span>
+<span class="sd"> :param verbose: level 0 : nothing</span>
+<span class="sd"> level 1 : logs task names and warnings</span>
+<span class="sd"> level 2 : logs task description if exists</span>
+<span class="sd"> level 3 : logs job names for jobs to be run</span>
+<span class="sd"> level 4 : logs list of up-to-date tasks and job names for jobs to be run</span>
+<span class="sd"> level 5 : logs job names for all jobs whether up-to-date or not</span>
+<span class="sd"> level 10: logs messages useful only for debugging ruffus pipeline code</span>
+<span class="sd"> :param indent: How much indentation for pretty format.</span>
+<span class="sd"> :param gnu_make_maximal_rebuild_mode: Defaults to re-running *all* out-of-date tasks. Runs minimal</span>
+<span class="sd"> set to build targets if set to ``True``. Use with caution.</span>
+<span class="sd"> :param wrap_width: The maximum length of each line</span>
+<span class="sd"> :param runtime_data: Experimental feature for passing data to tasks at run time</span>
+<span class="sd"> :param checksum_level: Several options for checking up-to-dateness are available: Default is level 1.</span>
+<span class="sd"> level 0 : Use only file timestamps</span>
+<span class="sd"> level 1 : above, plus timestamp of successful job completion</span>
+<span class="sd"> level 2 : above, plus a checksum of the pipeline function body</span>
+<span class="sd"> level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">return</span>
+
+ <span class="k">if</span> <span class="n">output_stream</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">sys</span>
+ <span class="n">output_stream</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span>
+
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">output_stream</span><span class="p">,</span> <span class="s">"write"</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"The first parameter to pipeline_printout needs to be an output file, e.g. sys.stdout and not </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="nb">str</span><span class="p">(</span><span class="n">output_stream</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">runtime_data</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"pipeline_run parameter runtime_data should be a dictionary of "</span>
+ <span class="s">"values passes to jobs at run time."</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">checksum_level</span> <span class="o">=</span> <span class="n">get_default_checksum_level</span><span class="p">()</span>
+
+ <span class="n">link_task_names_to_functions</span> <span class="p">()</span>
+ <span class="n">update_checksum_level_on_tasks</span><span class="p">(</span><span class="n">checksum_level</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># @active_if decorated tasks can change their active state every time</span>
+ <span class="c"># pipeline_run / pipeline_printout / pipeline_printout_graph is called</span>
+ <span class="c">#</span>
+ <span class="n">update_active_states_for_all_tasks</span> <span class="p">()</span>
+
+ <span class="c">#</span>
+ <span class="c"># target jobs</span>
+ <span class="c">#</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="n">task_names_to_tasks</span> <span class="p">(</span><span class="s">"Target"</span><span class="p">,</span> <span class="n">target_tasks</span><span class="p">)</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="n">task_names_to_tasks</span> <span class="p">(</span><span class="s">"Forced to run"</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">)</span>
+
+ <span class="n">logging_strm</span> <span class="o">=</span> <span class="n">t_verbose_logger</span><span class="p">(</span><span class="n">verbose</span><span class="p">,</span> <span class="n">t_stream_logger</span><span class="p">(</span><span class="n">output_stream</span><span class="p">),</span> <span class="n">runtime_data</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># If we aren't using checksums, and history file hasn't been specified,</span>
+ <span class="c"># we might be a bit surprised to find Ruffus writing to a sqlite db anyway.</span>
+ <span class="c"># Let us just use a in memory db which will be thrown away</span>
+ <span class="c"># Of course, if history_file is specified, we presume you know what you are doing</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="o">==</span> <span class="n">CHECKSUM_FILE_TIMESTAMPS</span> <span class="ow">and</span> <span class="n">history_file</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">history_file</span> <span class="o">=</span> <span class="s">':memory:'</span>
+
+ <span class="c">#</span>
+ <span class="c"># load previous job history if it exists, otherwise create an empty history</span>
+ <span class="c">#</span>
+ <span class="n">job_history</span> <span class="o">=</span> <span class="n">open_job_history</span> <span class="p">(</span><span class="n">history_file</span><span class="p">)</span>
+
+ <span class="p">(</span><span class="n">topological_sorted</span><span class="p">,</span>
+ <span class="n">self_terminated_nodes</span><span class="p">,</span>
+ <span class="n">dag_violating_edges</span><span class="p">,</span>
+ <span class="n">dag_violating_nodes</span><span class="p">)</span> <span class="o">=</span> <span class="n">topologically_sorted_nodes</span><span class="p">(</span><span class="n">target_tasks</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span><span class="p">,</span>
+ <span class="n">extra_data_for_signal</span> <span class="o">=</span> <span class="p">[</span><span class="n">t_verbose_logger</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">),</span> <span class="n">job_history</span><span class="p">])</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># raise error if DAG violating nodes</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">dag_violating_nodes</span><span class="p">):</span>
+ <span class="n">dag_violating_tasks</span> <span class="o">=</span> <span class="s">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">_name</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">dag_violating_nodes</span><span class="p">)</span>
+
+ <span class="n">e</span> <span class="o">=</span> <span class="n">error_circular_dependencies</span><span class="p">(</span><span class="s">"Circular dependencies found in the "</span>
+ <span class="s">"pipeline involving one or more of (</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">dag_violating_tasks</span><span class="p">))</span>
+ <span class="k">raise</span> <span class="n">e</span>
+
+ <span class="n">wrap_indent</span> <span class="o">=</span> <span class="s">" "</span> <span class="o">*</span> <span class="p">(</span><span class="n">indent</span> <span class="o">+</span> <span class="mi">11</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># Get updated nodes as all_nodes - nodes_to_run</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">>=</span> <span class="mi">4</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">all_tasks</span><span class="p">,</span> <span class="n">ignore_param1</span><span class="p">,</span> <span class="n">ignore_param2</span><span class="p">,</span>
+ <span class="n">ignore_param3</span><span class="p">)</span> <span class="o">=</span> <span class="n">topologically_sorted_nodes</span><span class="p">(</span><span class="n">target_tasks</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span><span class="p">,</span>
+ <span class="n">extra_data_for_signal</span> <span class="o">=</span> <span class="p">[</span><span class="n">t_verbose_logger</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">),</span> <span class="n">job_history</span><span class="p">])</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">all_tasks</span><span class="p">)</span> <span class="o">></span> <span class="nb">len</span><span class="p">(</span><span class="n">topological_sorted</span><span class="p">):</span>
+ <span class="n">output_stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">"</span> <span class="o">+</span> <span class="s">"_"</span> <span class="o">*</span> <span class="mi">40</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">Tasks which are up-to-date:</span><span class="se">\n\n</span><span class [...]
+ <span class="n">pipelined_tasks_to_run</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">topological_sorted</span><span class="p">)</span>
+
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">all_tasks</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">pipelined_tasks_to_run</span><span class="p">:</span>
+ <span class="k">continue</span>
+ <span class="n">messages</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">printout</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span> <span class="n">job_history</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">indent</s [...]
+ <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">messages</span><span class="p">:</span>
+ <span class="n">output_stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">textwrap</span><span class="o">.</span><span class="n">fill</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">subsequent_indent</span> <span class="o">=</span> <span class="n">wrap_indent</span><span class="p">,</span> <span class="n">width</span> <span class="o">=</span> <span class="n">wrap_width [...]
+
+ <span class="n">output_stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">"</span> <span class="o">+</span> <span class="s">"_"</span> <span class="o">*</span> <span class="mi">40</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">Tasks which will be run:</span><span class="se">\n\n</span><span class="s">" [...]
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">topological_sorted</span><span class="p">:</span>
+ <span class="n">messages</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">printout</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span> <span class="n">job_history</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">indent</span><spa [...]
+ <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">messages</span><span class="p">:</span>
+ <span class="n">output_stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">textwrap</span><span class="o">.</span><span class="n">fill</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">subsequent_indent</span> <span class="o">=</span> <span class="n">wrap_indent</span><span class="p">,</span> <span class="n">width</span> <span class="o">=</span> <span class="n">wrap_width</span>< [...]
+
+ <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
+ <span class="n">output_stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"_"</span> <span class="o">*</span> <span class="mi">40</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># get_semaphore</span>
+
+<span class="c">#_________________________________________________________________________________________</span></div>
+<span class="k">def</span> <span class="nf">get_semaphore</span> <span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">job_limit_semaphores</span><span class="p">,</span> <span class="n">syncmanager</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> return semaphore to limit the number of concurrent jobs</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># Is this task limited in the number of jobs?</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">semaphore_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">job_limit_semaphores</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">None</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># create semaphore if not yet created</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">semaphore_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">job_limit_semaphores</span><span class="p">:</span>
+ <span class="n">maximum_jobs_num</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">job_limit_semaphores</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">semaphore_name</span><span class="p">]</span>
+ <span class="n">job_limit_semaphores</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">semaphore_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">syncmanager</span><span class="o">.</span><span class="n">BoundedSemaphore</span><span class="p">(</span><span class="n">maximum_jobs_num</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">job_limit_semaphores</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">semaphore_name</span><span class="p">]</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># Parameter generator for all jobs / tasks</span>
+<span class="c">#</span>
+<span class="c">#________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">make_job_parameter_generator</span> <span class="p">(</span><span class="n">incomplete_tasks</span><span class="p">,</span> <span class="n">task_parents</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span>
+ <span class="n">task_with_completed_job_q</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="n">syncmanager</span><span class="p">,</span>
+ <span class="n">touch_files_only</span><span class="p">,</span> <span class="n">job_history</span><span class="p">):</span>
+
+ <span class="n">inprogress_tasks</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+ <span class="n">job_limit_semaphores</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">parameter_generator</span><span class="p">():</span>
+ <span class="n">count_remaining_jobs</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" job_parameter_generator BEGIN"</span><span class="p">)</span>
+ <span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">incomplete_tasks</span><span class="p">):</span>
+ <span class="n">cnt_jobs_created_for_all_tasks</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">cnt_tasks_processed</span> <span class="o">=</span> <span class="mi">0</span>
+
+ <span class="c">#</span>
+ <span class="c"># get rid of all completed tasks first</span>
+ <span class="c"># Completion is signalled from pipeline_run</span>
+ <span class="c">#</span>
+ <span class="k">while</span> <span class="bp">True</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">item</span> <span class="o">=</span> <span class="n">task_with_completed_job_q</span><span class="o">.</span><span class="n">get_nowait</span><span class="p">()</span>
+ <span class="n">job_completed_task</span><span class="p">,</span> <span class="n">job_completed_task_name</span><span class="p">,</span> <span class="n">job_completed_name</span> <span class="o">=</span> <span class="n">item</span>
+
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">job_completed_task</span> <span class="ow">in</span> <span class="n">incomplete_tasks</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"Last job </span><span class="si">%s</span><span class="s"> for </span><span class="si">%s</span><span class="s">. Missing from incomplete tasks in make_job_parameter_generator"</span> <span class="o">%</span> <span class="p">(</span><span class="n">job_completed_name</span><span class="p">,</span> <span class="n">job_completed_task_name</span><span clas [...]
+ <span class="n">count_remaining_jobs</span><span class="p">[</span><span class="n">job_completed_task</span><span class="p">]</span> <span class="o">=</span> <span class="n">count_remaining_jobs</span><span class="p">[</span><span class="n">job_completed_task</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span>
+ <span class="c">#</span>
+ <span class="c"># This is bad: something has gone very wrong</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">count_remaining_jobs</span><span class="p">[</span><span class="n">t</span><span class="p">]</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"job </span><span class="si">%s</span><span class="s"> for </span><span class="si">%s</span><span class="s"> causes job count < 0."</span> <span class="o">%</span> <span class="p">(</span><span class="n">job_completed_name</span><span class="p">,</span> <span class="n">job_completed_task_name</span><span class="p">))</span>
+
+ <span class="c">#</span>
+ <span class="c"># This Task completed</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">count_remaining_jobs</span><span class="p">[</span><span class="n">job_completed_task</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Last job for </span><span class="si">%s</span><span class="s">. Retired from incomplete tasks in pipeline_run "</span> <span class="o">%</span> <span class="n">job_completed_task</span><span class="o">.</span><span cla [...]
+ <span class="n">incomplete_tasks</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">job_completed_task</span><span class="p">)</span>
+ <span class="n">job_completed_task</span><span class="o">.</span><span class="n">completed</span> <span class="p">(</span><span class="n">logger</span><span class="p">)</span>
+ <span class="k">except</span> <span class="n">Queue</span><span class="o">.</span><span class="n">Empty</span><span class="p">:</span>
+ <span class="k">break</span>
+
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">incomplete_tasks</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># wrap in execption handler so that we know which task exception</span>
+ <span class="c"># came from</span>
+ <span class="c">#</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" job_parameter_generator consider task = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">t</span><span class="o">.</span><span class="n">_name</span><span class="p">)</span>
+
+ <span class="c"># ignore tasks in progress</span>
+ <span class="k">if</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">inprogress_tasks</span><span class="p">:</span>
+ <span class="k">continue</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" job_parameter_generator task </span><span class="si">%s</span><span class="s"> not in progress"</span> <span class="o">%</span> <span class="n">t</span><span class="o">.</span><span class="n">_name</span><span class="p">)</span>
+
+ <span class="c"># ignore tasks with incomplete dependencies</span>
+ <span class="n">incomplete_parent</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">for</span> <span class="n">parent</span> <span class="ow">in</span> <span class="n">task_parents</span><span class="p">[</span><span class="n">t</span><span class="p">]:</span>
+ <span class="k">if</span> <span class="n">parent</span> <span class="ow">in</span> <span class="n">incomplete_tasks</span><span class="p">:</span>
+ <span class="n">incomplete_parent</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">break</span>
+ <span class="k">if</span> <span class="n">incomplete_parent</span><span class="p">:</span>
+ <span class="k">continue</span>
+
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" job_parameter_generator start task </span><span class="si">%s</span><span class="s"> (parents completed)"</span> <span class="o">%</span> <span class="n">t</span><span class="o">.</span><span class="n">_name</span><span cl [...]
+ <span class="n">force_rerun</span> <span class="o">=</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">forcedtorun_tasks</span>
+ <span class="c">#</span>
+ <span class="c"># Only log active task</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">is_active</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">"Task enters queue = "</span> <span class="o">+</span> <span class="n">t</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">()</span> <span class="o">+</span> <span class="p">(</span><span cla [...]
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">_description</span><span class="p">)</span>
+ <span class="n">inprogress_tasks</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
+ <span class="n">cnt_tasks_processed</span> <span class="o">+=</span> <span class="mi">1</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># Use output parameters actually generated by running task</span>
+ <span class="c">#</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">output_filenames</span> <span class="o">=</span> <span class="p">[]</span>
+
+
+
+ <span class="c">#</span>
+ <span class="c"># If no parameters: just call task function (empty list)</span>
+ <span class="c">#</span>
+ <span class="c">#if (t.active_if_checks != None):</span>
+ <span class="c"># t.is_active = all(arg() if isinstance(arg, collections.Callable) else arg</span>
+ <span class="c"># for arg in t.active_if_checks)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">t</span><span class="o">.</span><span class="n">is_active</span><span class="p">:</span>
+ <span class="n">parameters</span> <span class="o">=</span> <span class="p">[]</span>
+
+
+
+ <span class="c">#</span>
+ <span class="c"># If no parameters: just call task function (empty list)</span>
+ <span class="c">#</span>
+ <span class="k">elif</span> <span class="n">t</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">parameters</span> <span class="o">=</span> <span class="p">([[],</span> <span class="p">[]],)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">parameters</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">param_generator_func</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># iterate through parameters</span>
+ <span class="c">#</span>
+ <span class="n">cnt_jobs_created</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">param</span><span class="p">,</span> <span class="n">descriptive_param</span> <span class="ow">in</span> <span class="n">parameters</span><span class="p">:</span>
+
+ <span class="c">#</span>
+ <span class="c"># save output even if uptodate</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">output_filenames</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+
+ <span class="n">job_name</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">get_job_name</span><span class="p">(</span><span class="n">descriptive_param</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># don't run if up to date unless force to run</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">force_rerun</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" force task </span><span class="si">%s</span><span class="s"> to rerun "</span> <span class="o">%</span> <span class="n">job_name</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">t</span><span class="o">.</span><span class="n">needs_update_func</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" </span><span class="si">%s</span><span class="s"> no function to check if up-to-date "</span> <span class="o">%</span> <span class="n">job_name</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c"># extra clunky hack to also pass task info--</span>
+ <span class="c"># makes sure that there haven't been code or arg changes</span>
+ <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="n">needs_update_check_modify_time</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">,</span> <span class="n">task</span><span class="o">=</span><span class="n">t</span><span class="p">,</span> <span class="n">job_history</span> <span class="o">=</ [...]
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">needs_update</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">needs_update</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" </span><span class="si">%s</span><span class="s"> unnecessary: already up to date "</span> <span class="o">%</span> <span class="n">job_name</span><span class="p">)</span>
+ <span class="k">continue</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" </span><span class="si">%s</span><span class="s"> </span><span class="si">%s</span><span class="s"> "</span> <span class="o">%</span> <span class="p">(</span><span class="n">job_name</span><span class="p">, [...]
+
+ <span class="c">#</span>
+ <span class="c"># Clunky hack to make sure input files exists right before</span>
+ <span class="c"># job is called for better error messages</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">needs_update_func</span> <span class="o">==</span> <span class="n">needs_update_check_modify_time</span><span class="p">:</span>
+ <span class="n">check_input_files_exist</span> <span class="p">(</span><span class="o">*</span><span class="n">param</span><span class="p">)</span>
+
+ <span class="c"># pause for one second before first job of each tasks</span>
+ <span class="c"># @originate tasks do not need to pause, because they depend on nothing!</span>
+ <span class="k">if</span> <span class="n">cnt_jobs_created</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">touch_files_only</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">if</span> <span class="s">"ONE_SECOND_PER_JOB"</span> <span class="ow">in</span> <span class="n">runtime_data</span> <span class="ow">and</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ONE_SECOND_PER_JOB"</span><span class="p">]</span> <span class="ow">and</span> <span class="n">t</span><span class="o">.</span><span class="n">_action_type</span> <span class="o">!=</span> <span class="n">_t [...]
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" 1 second PAUSE in job_parameter_generator</span><span class="se">\n\n\n</span><span class="s">"</span><span class="p">)</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mf">1.01</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mf">0.1</span><span class="p">)</span>
+
+
+ <span class="n">count_remaining_jobs</span><span class="p">[</span><span class="n">t</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">cnt_jobs_created</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">cnt_jobs_created_for_all_tasks</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="k">yield</span> <span class="p">(</span><span class="n">param</span><span class="p">,</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">_name</span><span class="p">,</span>
+ <span class="n">job_name</span><span class="p">,</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">job_wrapper</span><span class="p">,</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">user_defined_work_func</span><span class="p">,</span>
+ <span class="n">get_semaphore</span> <span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">job_limit_semaphores</span><span class="p">,</span> <span class="n">syncmanager</span><span class="p">),</span>
+ <span class="n">touch_files_only</span><span class="p">)</span>
+
+ <span class="c"># if no job came from this task, this task is complete</span>
+ <span class="c"># we need to retire it here instead of normal completion at end of job tasks</span>
+ <span class="c"># precisely because it created no jobs</span>
+ <span class="k">if</span> <span class="n">cnt_jobs_created</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">incomplete_tasks</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">completed</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="bp">True</span><span class="p">)</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" No jobs created for </span><span class="si">%s</span><span class="s">. Retired in parameter_generator "</span> <span class="o">%</span> <span class="n">t</span><span class="o">.</span><span class="n">_name</span><span [...]
+
+ <span class="c">#</span>
+ <span class="c"># Add extra warning if no regular expressions match:</span>
+ <span class="c"># This is a common class of frustrating errors</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">verbose</span> <span class="o">>=</span> <span class="mi">1</span> <span class="ow">and</span> <span class="s">"ruffus_WARNING"</span> <span class="ow">in</span> <span class="n">runtime_data</span> <span class="ow">and</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">param_generator_func</span> <span class="ow">in</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ruffus_WARNING"</span><span class="p">]):</span>
+ <span class="k">for</span> <span class="n">msg</span> <span class="ow">in</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ruffus_WARNING"</span><span class="p">][</span><span class="n">t</span><span class="o">.</span><span class="n">param_generator_func</span><span class="p">]:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s">" 'In Task def </span><span class="si">%s</span><span class="s">(...):' </span><span class="si">%s</span><span class="s"> "</span> <span class="o">%</span> <span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">get_task_name</span><span class="p">(),</span> <span class="n">msg</span [...]
+
+
+ <span class="c">#</span>
+ <span class="c"># GeneratorExit is thrown when this generator does not complete.</span>
+ <span class="c"># I.e. there is a break in the pipeline_run loop.</span>
+ <span class="c"># This happens where there are exceptions signalled from within a job</span>
+ <span class="c">#</span>
+ <span class="c"># This is not really an exception, more a way to exit the generator loop</span>
+ <span class="c"># asynchrononously so that cleanups can happen (e.g. the "with" statement</span>
+ <span class="c"># or finally.)</span>
+ <span class="c">#</span>
+ <span class="c"># We could write except Exception: below which will catch everything but</span>
+ <span class="c"># KeyboardInterrupt and StopIteration and GeneratorExit in python 2.6</span>
+ <span class="c">#</span>
+ <span class="c"># However, in python 2.5, GeneratorExit inherits from Exception. So</span>
+ <span class="c"># we explicitly catch and rethrow GeneratorExit.</span>
+ <span class="k">except</span> <span class="ne">GeneratorExit</span><span class="p">:</span>
+ <span class="k">raise</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">exceptionType</span><span class="p">,</span> <span class="n">exceptionValue</span><span class="p">,</span> <span class="n">exceptionTraceback</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">exc_info</span><span class="p">()</span>
+ <span class="n">exception_stack</span> <span class="o">=</span> <span class="n">traceback</span><span class="o">.</span><span class="n">format_exc</span><span class="p">(</span><span class="n">exceptionTraceback</span><span class="p">)</span>
+ <span class="n">exception_name</span> <span class="o">=</span> <span class="n">exceptionType</span><span class="o">.</span><span class="n">__module__</span> <span class="o">+</span> <span class="s">'.'</span> <span class="o">+</span> <span class="n">exceptionType</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="n">exception_value</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">exceptionValue</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">exception_value</span><span class="p">):</span>
+ <span class="n">exception_value</span> <span class="o">=</span> <span class="s">"(</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="n">exception_value</span>
+ <span class="n">errt</span> <span class="o">=</span> <span class="n">RethrownJobError</span><span class="p">([(</span><span class="n">t</span><span class="o">.</span><span class="n">_name</span><span class="p">,</span>
+ <span class="s">""</span><span class="p">,</span>
+ <span class="n">exception_name</span><span class="p">,</span>
+ <span class="n">exception_value</span><span class="p">,</span>
+ <span class="n">exception_stack</span><span class="p">)])</span>
+ <span class="n">errt</span><span class="o">.</span><span class="n">specify_task</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="s">"Exceptions generating parameters"</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">errt</span>
+
+
+
+ <span class="c"># extra tests incase final tasks do not result in jobs</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">incomplete_tasks</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">cnt_tasks_processed</span> <span class="ow">or</span> <span class="n">cnt_jobs_created_for_all_tasks</span><span class="p">):</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" incomplete tasks = "</span> <span class="o">+</span>
+ <span class="s">","</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">t</span><span class="o">.</span><span class="n">_name</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">incomplete_tasks</span><span class="p">]</span> <span class="p">))</span>
+ <span class="k">yield</span> <span class="n">waiting_for_more_tasks_to_complete</span><span class="p">()</span>
+
+ <span class="k">yield</span> <span class="n">all_tasks_complete</span><span class="p">()</span>
+ <span class="c"># This function is done</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" job_parameter_generator END"</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">parameter_generator</span>
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># feed_job_params_to_process_pool</span>
+<span class="c">#</span>
+<span class="c">#</span>
+<span class="c">#________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">feed_job_params_to_process_pool_factory</span> <span class="p">(</span><span class="n">parameter_q</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">verbose</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Process pool gets its parameters from this generator</span>
+<span class="sd"> Use factory function to save parameter_queue</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">feed_job_params_to_process_pool</span> <span class="p">():</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Send param to Pooled Process START"</span><span class="p">)</span>
+ <span class="k">while</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Get next parameter size = </span><span class="si">%d</span><span class="s">"</span> <span class="o">%</span>
+ <span class="n">parameter_q</span><span class="o">.</span><span class="n">qsize</span><span class="p">())</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">parameter_q</span><span class="o">.</span><span class="n">qsize</span><span class="p">():</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mf">0.1</span><span class="p">)</span>
+ <span class="n">param</span> <span class="o">=</span> <span class="n">parameter_q</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Get next parameter done"</span><span class="p">)</span>
+
+ <span class="c"># all tasks done</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">all_tasks_complete</span><span class="p">):</span>
+ <span class="k">break</span>
+
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Send param to Pooled Process=>"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
+ <span class="k">yield</span> <span class="n">param</span>
+
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Send param to Pooled Process END"</span><span class="p">)</span>
+
+ <span class="c"># return generator</span>
+ <span class="k">return</span> <span class="n">feed_job_params_to_process_pool</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># fill_queue_with_job_parameters</span>
+<span class="c">#</span>
+<span class="c">#________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">fill_queue_with_job_parameters</span> <span class="p">(</span><span class="n">job_parameters</span><span class="p">,</span> <span class="n">parameter_q</span><span class="p">,</span> <span class="n">POOL_SIZE</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">verbose</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Ensures queue is filled with number of parameters > jobs / slots (POOL_SIZE)</span>
+<span class="sd"> """</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" fill_queue_with_job_parameters START"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="n">job_parameters</span><span class="p">:</span>
+
+ <span class="c"># stop if no more jobs available</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">waiting_for_more_tasks_to_complete</span><span class="p">):</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" fill_queue_with_job_parameters WAITING for task to complete"</span><span class="p">)</span>
+ <span class="k">break</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">all_tasks_complete</span><span class="p">):</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" fill_queue_with_job_parameters=>"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">param</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
+
+ <span class="c"># put into queue</span>
+ <span class="n">parameter_q</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">param</span><span class="p">)</span>
+
+ <span class="c"># queue size needs to be at least 2 so that the parameter queue never consists of a single</span>
+ <span class="c"># waiting_for_task_to_complete entry which will cause</span>
+ <span class="c"># a loop and everything to hang!</span>
+ <span class="k">if</span> <span class="n">parameter_q</span><span class="o">.</span><span class="n">qsize</span><span class="p">()</span> <span class="o">></span> <span class="n">POOL_SIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">break</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" fill_queue_with_job_parameters END"</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># How the job queue works:</span>
+<span class="c">#</span>
+<span class="c"># Main loop</span>
+<span class="c"># iterates pool.map using feed_job_params_to_process_pool()</span>
+<span class="c"># (calls parameter_q.get() until all_tasks_complete)</span>
+<span class="c">#</span>
+<span class="c"># if errors but want to finish tasks already in pipeine:</span>
+<span class="c"># parameter_q.put(all_tasks_complete())</span>
+<span class="c"># keep going</span>
+<span class="c"># else:</span>
+<span class="c">#</span>
+<span class="c"># loops through jobs until no more jobs in non-dependent tasks</span>
+<span class="c"># separate loop in generator so that list of incomplete_tasks does not</span>
+<span class="c"># get updated half way through</span>
+<span class="c"># causing race conditions</span>
+<span class="c">#</span>
+<span class="c"># parameter_q.put(param)</span>
+<span class="c"># until waiting_for_more_tasks_to_complete</span>
+<span class="c"># until queue is full (check *after*)</span>
+<span class="c">#</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c"># pipeline_run</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<div class="viewcode-block" id="pipeline_run"><a class="viewcode-back" href="../../task.html#ruffus.task.pipeline_run">[docs]</a><span class="k">def</span> <span class="nf">pipeline_run</span><span class="p">(</span><span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[],</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[],</span>
+ <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">stderr_logger</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">one_second_per_job</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">touch_files_only</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="n">exceptions_terminate_immediately</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="n">log_exceptions</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="n">checksum_level</span> <span class="o">=</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="n">multithread</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
+ <span class="n">history_file</span> <span class="o">=</span> <span class="bp">None</span><span class="p">):</span>
+ <span class="c"># Remember to add further extra parameters here to "extra_pipeline_run_options" inside cmdline.py</span>
+ <span class="c"># This will forward extra parameters from the command line to pipeline_run</span>
+ <span class="sd">"""</span>
+<span class="sd"> Run pipelines.</span>
+
+<span class="sd"> :param target_tasks: targets task functions which will be run if they are out-of-date</span>
+<span class="sd"> :param forcedtorun_tasks: task functions which will be run whether or not they are out-of-date</span>
+<span class="sd"> :param multiprocess: The number of concurrent jobs running on different processes.</span>
+<span class="sd"> :param multithread: The number of concurrent jobs running as different threads. If > 1, ruffus will use multithreading *instead of* multiprocessing (and ignore the multiprocess parameter). Using multi threading is particularly useful to manage high performance clusters which otherwise are prone to "processor storms" when large number of cores finish jobs at the same time. (Thanks Andreas Heger)</span>
+<span class="sd"> :param logger: Where progress will be logged. Defaults to stderr output.</span>
+<span class="sd"> :type logger: `logging <http://docs.python.org/library/logging.html>`_ objects</span>
+<span class="sd"> :param verbose: level 0 : nothing</span>
+<span class="sd"> level 1 : logs task names and warnings</span>
+<span class="sd"> level 2 : logs task description if exists</span>
+<span class="sd"> level 3 : logs job names for jobs to be run</span>
+<span class="sd"> level 4 : logs list of up-to-date tasks and job names for jobs to be run</span>
+<span class="sd"> level 5 : logs job names for all jobs whether up-to-date or not</span>
+<span class="sd"> level 10: logs messages useful only for debugging ruffus pipeline code</span>
+<span class="sd"> :param touch_files_only: Create or update input/output files only to simulate running the pipeline. Do not run jobs. If set to CHECKSUM_REGENERATE, will regenerate the checksum history file to reflect the existing i/o files on disk.</span>
+<span class="sd"> :param exceptions_terminate_immediately: Exceptions cause immediate termination</span>
+<span class="sd"> rather than waiting for N jobs to finish where N = multiprocess</span>
+<span class="sd"> :param log_exceptions: Print exceptions to the logger as soon as they occur.</span>
+<span class="sd"> :param checksum_level: Several options for checking up-to-dateness are available: Default is level 1.</span>
+<span class="sd"> level 0 : Use only file timestamps</span>
+<span class="sd"> level 1 : above, plus timestamp of successful job completion</span>
+<span class="sd"> level 2 : above, plus a checksum of the pipeline function body</span>
+<span class="sd"> level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</span>
+<span class="sd"> :param history_file: The database file which stores checksums and file timestamps for input/output files.</span>
+<span class="sd"> :param one_second_per_job: To work around poor file timepstamp resolution for some file systems. Defaults to True if checksum_level is 0 forcing Tasks to take a minimum of 1 second to complete.</span>
+<span class="sd"> :param runtime_data: Experimental feature for passing data to tasks at run time</span>
+<span class="sd"> :param gnu_make_maximal_rebuild_mode: Defaults to re-running *all* out-of-date tasks. Runs minimal</span>
+<span class="sd"> set to build targets if set to ``True``. Use with caution.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">touch_files_only</span> <span class="o">==</span> <span class="bp">False</span><span class="p">:</span>
+ <span class="n">touch_files_only</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">elif</span> <span class="n">touch_files_only</span> <span class="o">==</span> <span class="bp">True</span><span class="p">:</span>
+ <span class="n">touch_files_only</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">touch_files_only</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="c"># we are not running anything so do it as quickly as possible</span>
+ <span class="n">one_second_per_job</span> <span class="o">=</span> <span class="bp">False</span>
+
+ <span class="n">syncmanager</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">Manager</span><span class="p">()</span>
+
+ <span class="k">if</span> <span class="n">runtime_data</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">runtime_data</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">runtime_data</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"pipeline_run parameter runtime_data should be a dictionary of "</span>
+ <span class="s">"values passes to jobs at run time."</span><span class="p">)</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># whether using multiprocessing or multithreading</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">multithread</span><span class="p">:</span>
+ <span class="n">pool</span> <span class="o">=</span> <span class="n">ThreadPool</span><span class="p">(</span><span class="n">multithread</span><span class="p">)</span>
+ <span class="n">parallelism</span> <span class="o">=</span> <span class="n">multithread</span>
+ <span class="k">elif</span> <span class="n">multiprocess</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">pool</span> <span class="o">=</span> <span class="n">Pool</span><span class="p">(</span><span class="n">multiprocess</span><span class="p">)</span>
+ <span class="n">parallelism</span> <span class="o">=</span> <span class="n">multiprocess</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">parallelism</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="n">pool</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">checksum_level</span> <span class="o">=</span> <span class="n">get_default_checksum_level</span><span class="p">()</span>
+
+ <span class="c">#</span>
+ <span class="c"># Supplement mtime with system clock if using CHECKSUM_HISTORY_TIMESTAMPS</span>
+ <span class="c"># we don't need to default to adding 1 second delays between jobs</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">one_second_per_job</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="o">==</span> <span class="n">CHECKSUM_FILE_TIMESTAMPS</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Checksums rely on FILE TIMESTAMPS only and we don't know if the system file time resolution: Pause 1 second..."</span><span class="p">)</span>
+ <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ONE_SECOND_PER_JOB"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Checksum use calculated time as well: No 1 second pause..."</span><span class="p">)</span>
+ <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ONE_SECOND_PER_JOB"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" One second per job specified to be </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">one_second_per_job</span><span class="p">)</span>
+ <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ONE_SECOND_PER_JOB"</span><span class="p">]</span> <span class="o">=</span> <span class="n">one_second_per_job</span>
+
+
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">black_hole_logger</span>
+ <span class="k">elif</span> <span class="n">verbose</span> <span class="o">>=</span> <span class="mi">11</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="s">"add_unique_prefix"</span><span class="p">):</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">add_unique_prefix</span><span class="p">()</span>
+
+ <span class="k">if</span> <span class="n">touch_files_only</span> <span class="ow">and</span> <span class="n">verbose</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Touch output files instead of remaking them."</span><span class="p">)</span>
+
+ <span class="n">link_task_names_to_functions</span> <span class="p">()</span>
+ <span class="n">update_checksum_level_on_tasks</span> <span class="p">(</span><span class="n">checksum_level</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># If we aren't using checksums, and history file hasn't been specified,</span>
+ <span class="c"># we might be a bit surprised to find Ruffus writing to a sqlite db anyway.</span>
+ <span class="c"># Let us just use a in-memory db which will be thrown away</span>
+ <span class="c"># Of course, if history_file is specified, we presume you know what you are doing</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">checksum_level</span> <span class="o">==</span> <span class="n">CHECKSUM_FILE_TIMESTAMPS</span> <span class="ow">and</span> <span class="n">history_file</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">history_file</span> <span class="o">=</span> <span class="s">':memory:'</span>
+
+ <span class="n">job_history</span> <span class="o">=</span> <span class="n">open_job_history</span> <span class="p">(</span><span class="n">history_file</span><span class="p">)</span>
+
+
+
+
+ <span class="c">#</span>
+ <span class="c"># @active_if decorated tasks can change their active state every time</span>
+ <span class="c"># pipeline_run / pipeline_printout / pipeline_printout_graph is called</span>
+ <span class="c">#</span>
+ <span class="n">update_active_states_for_all_tasks</span> <span class="p">()</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># target jobs</span>
+ <span class="c">#</span>
+ <span class="n">target_tasks</span> <span class="o">=</span> <span class="n">task_names_to_tasks</span> <span class="p">(</span><span class="s">"Target"</span><span class="p">,</span> <span class="n">target_tasks</span><span class="p">)</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="n">task_names_to_tasks</span> <span class="p">(</span><span class="s">"Forced to run"</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">)</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># To update the checksum file, we force all tasks to rerun but then don't actually call the task function...</span>
+ <span class="c">#</span>
+ <span class="c"># So starting with target_tasks and forcedtorun_tasks, we harvest all upstream dependencies willy, nilly</span>
+ <span class="c"># and assign the results to forcedtorun_tasks</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">touch_files_only</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">forcedtorun_tasks</span><span class="p">,</span> <span class="n">ignore_param1</span><span class="p">,</span> <span class="n">ignore_param2</span><span class="p">,</span>
+ <span class="n">ignore_param3</span><span class="p">)</span> <span class="o">=</span> <span class="n">topologically_sorted_nodes</span><span class="p">(</span><span class="n">target_tasks</span> <span class="o">+</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span><span class="p">,</span>
+ <span class="n">extra_data_for_signal</span> <span class="o">=</span> <span class="p">[</span><span class="n">t_verbose_logger</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">),</span> <span class="n">job_history</span><span class="p">])</span>
+
+
+
+ <span class="p">(</span><span class="n">topological_sorted</span><span class="p">,</span>
+ <span class="n">self_terminated_nodes</span><span class="p">,</span>
+ <span class="n">dag_violating_edges</span><span class="p">,</span>
+ <span class="n">dag_violating_nodes</span><span class="p">)</span> <span class="o">=</span> <span class="n">topologically_sorted_nodes</span><span class="p">(</span> <span class="n">target_tasks</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span>
+ <span class="n">gnu_make_maximal_rebuild_mode</span><span class="p">,</span>
+ <span class="n">extra_data_for_signal</span> <span class="o">=</span> <span class="p">[</span><span class="n">t_verbose_logger</span><span class="p">(</span><span class="n">verbose</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">runtime_data</span><span class="p">),</span> <span class="n">job_history</span><span class="p">])</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">dag_violating_nodes</span><span class="p">):</span>
+ <span class="n">dag_violating_tasks</span> <span class="o">=</span> <span class="s">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">_name</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">dag_violating_nodes</span><span class="p">)</span>
+
+ <span class="n">e</span> <span class="o">=</span> <span class="n">error_circular_dependencies</span><span class="p">(</span><span class="s">"Circular dependencies found in the "</span>
+ <span class="s">"pipeline involving one or more of (</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">dag_violating_tasks</span><span class="p">))</span>
+ <span class="k">raise</span> <span class="n">e</span>
+
+
+
+ <span class="c">#</span>
+ <span class="c"># get dependencies. Only include tasks which will be run</span>
+ <span class="c">#</span>
+ <span class="n">incomplete_tasks</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">topological_sorted</span><span class="p">)</span>
+ <span class="n">task_parents</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">incomplete_tasks</span><span class="p">:</span>
+ <span class="n">task_parents</span><span class="p">[</span><span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">parent</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">_outward</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">parent</span> <span class="ow">in</span> <span class="n">incomplete_tasks</span><span class="p">:</span>
+ <span class="n">task_parents</span><span class="p">[</span><span class="n">t</span><span class="p">]</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">parent</span><span class="p">)</span>
+ <span class="c">#print json.dumps(task_parents.items(), indent=4, cls=task_encoder)</span>
+
+
+ <span class="c"># prepare tasks for pipeline run</span>
+ <span class="c"># **********</span>
+ <span class="c"># BEWARE</span>
+ <span class="c"># **********</span>
+ <span class="c">#</span>
+ <span class="c"># Because state is stored, ruffus is *not* reentrant.</span>
+ <span class="c">#</span>
+ <span class="c"># **********</span>
+ <span class="c"># BEWARE</span>
+ <span class="c"># **********</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">topological_sorted</span><span class="p">:</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">init_for_pipeline</span><span class="p">()</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># prime queue with initial set of job parameters</span>
+ <span class="c">#</span>
+ <span class="n">parameter_q</span> <span class="o">=</span> <span class="n">Queue</span><span class="o">.</span><span class="n">Queue</span><span class="p">()</span>
+ <span class="n">task_with_completed_job_q</span> <span class="o">=</span> <span class="n">Queue</span><span class="o">.</span><span class="n">Queue</span><span class="p">()</span>
+ <span class="n">parameter_generator</span> <span class="o">=</span> <span class="n">make_job_parameter_generator</span> <span class="p">(</span><span class="n">incomplete_tasks</span><span class="p">,</span> <span class="n">task_parents</span><span class="p">,</span>
+ <span class="n">logger</span><span class="p">,</span> <span class="n">forcedtorun_tasks</span><span class="p">,</span>
+ <span class="n">task_with_completed_job_q</span><span class="p">,</span>
+ <span class="n">runtime_data</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span>
+ <span class="n">syncmanager</span><span class="p">,</span>
+ <span class="n">touch_files_only</span><span class="p">,</span> <span class="n">job_history</span><span class="p">)</span>
+ <span class="n">job_parameters</span> <span class="o">=</span> <span class="n">parameter_generator</span><span class="p">()</span>
+ <span class="n">fill_queue_with_job_parameters</span><span class="p">(</span><span class="n">job_parameters</span><span class="p">,</span> <span class="n">parameter_q</span><span class="p">,</span> <span class="n">parallelism</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># N.B.</span>
+ <span class="c"># Handling keyboard shortcuts may require</span>
+ <span class="c"># See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool</span>
+ <span class="c">#</span>
+ <span class="c"># When waiting for a condition in threading.Condition.wait(), KeyboardInterrupt is never sent</span>
+ <span class="c"># unless a timeout is specified</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># #</span>
+ <span class="c"># # whether using multiprocessing</span>
+ <span class="c"># #</span>
+ <span class="c"># pool = Pool(parallelism) if multiprocess > 1 else None</span>
+ <span class="c"># if pool:</span>
+ <span class="c"># pool_func = pool.imap_unordered</span>
+ <span class="c"># job_iterator_timeout = []</span>
+ <span class="c"># else:</span>
+ <span class="c"># pool_func = imap</span>
+ <span class="c"># job_iterator_timeout = [999999999999]</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># ....</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># it = pool_func(run_pooled_job_without_exceptions, feed_job_params_to_process_pool())</span>
+ <span class="c"># while 1:</span>
+ <span class="c"># try:</span>
+ <span class="c"># job_result = it.next(*job_iterator_timeout)</span>
+ <span class="c">#</span>
+ <span class="c"># ...</span>
+ <span class="c">#</span>
+ <span class="c"># except StopIteration:</span>
+ <span class="c"># break</span>
+
+
+
+
+ <span class="k">if</span> <span class="n">pool</span><span class="p">:</span>
+ <span class="n">pool_func</span> <span class="o">=</span> <span class="n">pool</span><span class="o">.</span><span class="n">imap_unordered</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">pool_func</span> <span class="o">=</span> <span class="n">imap</span>
+
+
+
+ <span class="n">feed_job_params_to_process_pool</span> <span class="o">=</span> <span class="n">feed_job_params_to_process_pool_factory</span> <span class="p">(</span><span class="n">parameter_q</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># for each result from job</span>
+ <span class="c">#</span>
+ <span class="n">job_errors</span> <span class="o">=</span> <span class="n">RethrownJobError</span><span class="p">()</span>
+ <span class="n">tasks_with_errors</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># job_result.job_name / job_result.return_value</span>
+ <span class="c"># Reserved for returning result from job...</span>
+ <span class="c"># How?</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">job_result</span> <span class="ow">in</span> <span class="n">pool_func</span><span class="p">(</span><span class="n">run_pooled_job_without_exceptions</span><span class="p">,</span> <span class="n">feed_job_params_to_process_pool</span><span class="p">()):</span>
+ <span class="n">t</span> <span class="o">=</span> <span class="n">node</span><span class="o">.</span><span class="n">lookup_node_from_name</span><span class="p">(</span><span class="n">job_result</span><span class="o">.</span><span class="n">task_name</span><span class="p">)</span>
+
+ <span class="c"># remove failed jobs from history-- their output is bogus now!</span>
+ <span class="k">if</span> <span class="n">job_result</span><span class="o">.</span><span class="n">state</span> <span class="ow">in</span> <span class="p">(</span><span class="n">JOB_ERROR</span><span class="p">,</span> <span class="n">JOB_SIGNALLED_BREAK</span><span class="p">):</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">job_result</span><span class="o">.</span><span class="n">params</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> <span class="c"># some jobs have no outputs</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="n">job_result</span><span class="o">.</span><span class="n">params</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> <span class="c"># some have multiple outputs from one job</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="p">[</span><span class="n">output_file_name</span><span class="p">]</span>
+ <span class="c">#</span>
+ <span class="c"># N.B. output parameters are not necessary all strings</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">o_f_n</span> <span class="ow">in</span> <span class="n">get_strings_in_nested_sequence</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># use paths relative to working directory</span>
+ <span class="c">#</span>
+ <span class="n">o_f_n</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="n">o_f_n</span><span class="p">)</span>
+ <span class="n">job_history</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">o_f_n</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span> <span class="c"># remove outfile from history if it exists</span>
+
+ <span class="c"># only save poolsize number of errors</span>
+ <span class="k">if</span> <span class="n">job_result</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">JOB_ERROR</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Exception caught for </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">job_result</span><span class="o">.</span><span class="n">job_name</span><span class="p">)</span>
+ <span class="n">job_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">job_result</span><span class="o">.</span><span class="n">exception</span><span class="p">)</span>
+ <span class="n">tasks_with_errors</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># print to logger immediately</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">log_exceptions</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Log Exception"</span><span class="p">)</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">job_errors</span><span class="o">.</span><span class="n">get_nth_exception_str</span><span class="p">())</span>
+
+ <span class="c">#</span>
+ <span class="c"># break if too many errors</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">job_errors</span><span class="p">)</span> <span class="o">>=</span> <span class="n">parallelism</span> <span class="ow">or</span> <span class="n">exceptions_terminate_immediately</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Break loop </span><span class="si">%s</span><span class="s"> </span><span class="si">%s</span><span class="s"> </span><span class="si">%s</span><span class="s"> "</span> <span class="o">%</span> <span class="p">(</span><span cl [...]
+ <span class="n">parameter_q</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">all_tasks_complete</span><span class="p">())</span>
+ <span class="k">break</span>
+
+
+ <span class="c"># break immediately if the user says stop</span>
+ <span class="k">elif</span> <span class="n">job_result</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">JOB_SIGNALLED_BREAK</span><span class="p">:</span>
+ <span class="n">job_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">job_result</span><span class="o">.</span><span class="n">exception</span><span class="p">)</span>
+ <span class="n">job_errors</span><span class="o">.</span><span class="n">specify_task</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="s">"Exceptions running jobs"</span><span class="p">)</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Break loop JOB_SIGNALLED_BREAK </span><span class="si">%s</span><span class="s"> </span><span class="si">%s</span><span class="s"> "</span> <span class="o">%</span> <span class="p">(</span><span class="nb">len</span><span class="p" [...]
+ <span class="n">parameter_q</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">all_tasks_complete</span><span class="p">())</span>
+ <span class="k">break</span>
+
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">job_result</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">JOB_UP_TO_DATE</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">" </span><span class="si">%s</span><span class="s"> unnecessary: already up to date"</span> <span class="o">%</span> <span class="n">job_result</span><span class="o">.</span><span class="n">job_name</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">" </span><span class="si">%s</span><span class="s"> completed"</span> <span class="o">%</span> <span class="n">job_result</span><span class="o">.</span><span class="n">job_name</span><span class="p">)</span>
+ <span class="c"># save this task name and the job (input and output files)</span>
+ <span class="c"># alternatively, we could just save the output file and its</span>
+ <span class="c"># completion time, or on the other end of the spectrum,</span>
+ <span class="c"># we could save a checksum of the function that generated</span>
+ <span class="c"># this file, something akin to:</span>
+ <span class="c"># chksum = md5.md5(marshal.dumps(t.user_defined_work_func.func_code.co_code))</span>
+ <span class="c"># we could even checksum the arguments to the function that</span>
+ <span class="c"># generated this file:</span>
+ <span class="c"># chksum2 = md5.md5(marshal.dumps(t.user_defined_work_func.func_defaults) +</span>
+ <span class="c"># marshal.dumps(t.args))</span>
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">job_result</span><span class="o">.</span><span class="n">params</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> <span class="c"># some jobs have no outputs</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="n">job_result</span><span class="o">.</span><span class="n">params</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> <span class="c"># some have multiple outputs from one job</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="p">[</span><span class="n">output_file_name</span><span class="p">]</span>
+ <span class="c">#</span>
+ <span class="c"># N.B. output parameters are not necessary all strings</span>
+ <span class="c"># and not all files have been successfully created,</span>
+ <span class="c"># even though the task apparently completed properly!</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">o_f_n</span> <span class="ow">in</span> <span class="n">get_strings_in_nested_sequence</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># use paths relative to working directory</span>
+ <span class="c">#</span>
+ <span class="n">o_f_n</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="n">o_f_n</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Job History for : "</span> <span class="o">+</span> <span class="n">o_f_n</span><span class="p">)</span>
+ <span class="n">mtime</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">getmtime</span><span class="p">(</span><span class="n">o_f_n</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># use probably higher resolution time.time() over mtime</span>
+ <span class="c"># which might have 1 or 2s resolutions, unless there is</span>
+ <span class="c"># clock skew and the filesystem time > system time</span>
+ <span class="c"># (e.g. for networks)</span>
+ <span class="c">#</span>
+ <span class="n">epoch_seconds</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
+ <span class="c"># Aargh. go back to insert one second between jobs</span>
+ <span class="k">if</span> <span class="n">epoch_seconds</span> <span class="o"><</span> <span class="n">mtime</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">one_second_per_job</span> <span class="o">==</span> <span class="bp">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ONE_SECOND_PER_JOB"</span><span class="p">]:</span>
+ <span class="n">log_at_level</span> <span class="p">(</span><span class="n">logger</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="s">" Switch to one second per job"</span><span class="p">)</span>
+ <span class="n">runtime_data</span><span class="p">[</span><span class="s">"ONE_SECOND_PER_JOB"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">elif</span> <span class="n">epoch_seconds</span> <span class="o">-</span> <span class="n">mtime</span> <span class="o"><</span> <span class="mf">1.1</span><span class="p">:</span>
+ <span class="n">mtime</span> <span class="o">=</span> <span class="n">epoch_seconds</span>
+ <span class="n">chksum</span> <span class="o">=</span> <span class="n">JobHistoryChecksum</span><span class="p">(</span><span class="n">o_f_n</span><span class="p">,</span> <span class="n">mtime</span><span class="p">,</span> <span class="n">job_result</span><span class="o">.</span><span class="n">params</span><span class="p">[</span><span class="mi">2</span><span class="p">:],</span> <span class="n">t</span><span class="p">)</span>
+ <span class="n">job_history</span><span class="p">[</span><span class="n">o_f_n</span><span class="p">]</span> <span class="o">=</span> <span class="n">chksum</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="k">pass</span>
+
+ <span class="c">##for output_file_name in t.output_filenames:</span>
+ <span class="c">## # could use current time instead...</span>
+ <span class="c">## if not isinstance(output_file_name, list):</span>
+ <span class="c">## output_file_name = [output_file_name]</span>
+ <span class="c">## for o_f_n in output_file_name:</span>
+ <span class="c">## mtime = os.path.getmtime(o_f_n)</span>
+ <span class="c">## chksum = JobHistoryChecksum(o_f_n, mtime, job_result.params[2:], t)</span>
+ <span class="c">## job_history[o_f_n] = chksum</span>
+
+
+ <span class="c">#</span>
+ <span class="c"># signal completed task after checksumming</span>
+ <span class="c">#</span>
+ <span class="n">task_with_completed_job_q</span><span class="o">.</span><span class="n">put</span><span class="p">((</span><span class="n">t</span><span class="p">,</span> <span class="n">job_result</span><span class="o">.</span><span class="n">task_name</span><span class="p">,</span> <span class="n">job_result</span><span class="o">.</span><span class="n">job_name</span><span class="p">))</span>
+
+
+ <span class="c"># make sure queue is still full after each job is retired</span>
+ <span class="c"># do this after undating which jobs are incomplete</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">job_errors</span><span class="p">):</span>
+ <span class="c">#parameter_q.clear()</span>
+ <span class="c">#if len(job_errors) == 1 and not parameter_q._closed:</span>
+ <span class="n">parameter_q</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">all_tasks_complete</span><span class="p">())</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">fill_queue_with_job_parameters</span><span class="p">(</span><span class="n">job_parameters</span><span class="p">,</span> <span class="n">parameter_q</span><span class="p">,</span> <span class="n">parallelism</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
+
+
+ <span class="n">syncmanager</span><span class="o">.</span><span class="n">shutdown</span><span class="p">()</span>
+
+
+ <span class="k">if</span> <span class="n">pool</span><span class="p">:</span>
+ <span class="n">pool</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">pool</span><span class="o">.</span><span class="n">terminate</span><span class="p">()</span>
+
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">job_errors</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">job_errors</span>
+
+
+
+<span class="c"># use high resolution timestamps where available</span>
+<span class="c"># default in python 2.5 and greater</span>
+<span class="c"># N.B. File modify times / stat values have 1 second precision for many file systems</span>
+<span class="c"># and may not be accurate to boot, especially over the network.</span></div>
+<span class="n">os</span><span class="o">.</span><span class="n">stat_float_times</span><span class="p">(</span><span class="bp">True</span><span class="p">)</span>
+
+
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">unittest</span>
+
+
+
+
+ <span class="c">#</span>
+ <span class="c"># debug parameter ignored if called as a module</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s">"--debug"</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="s">"--debug"</span><span class="p">)</span>
+ <span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
+</pre></div>
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ <li><a href="../index.html" >Module code</a> »</li>
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/_sources/cheatsheet.txt b/doc/_build/html/_sources/cheatsheet.txt
new file mode 100644
index 0000000..aa5585d
--- /dev/null
+++ b/doc/_build/html/_sources/cheatsheet.txt
@@ -0,0 +1,85 @@
+.. include:: global.inc
+.. _cheat_sheet:
+
+
+#####################
+Cheat Sheet
+#####################
+
+The ``ruffus`` module is a lightweight way to add support
+for running computational pipelines.
+
+| Each stage or **task** in a computational pipeline is represented by a python function
+| Each python function can be called in parallel to run multiple **jobs**.
+
+================================================
+1. Annotate functions with **Ruffus** decorators
+================================================
+
+
+******
+Core
+******
+.. csv-table::
+ :header: "Decorator", "Syntax"
+ :widths: 100, 600,1
+
+ "@originate (:ref:`Manual <new_manual.originate>`)
+ ", "
+ :ref:`@originate <decorators.originate>` ( ``output_files``, [``extra_parameters``,...] )
+ ", ""
+ "@split (:ref:`Manual <new_manual.split>`)
+ ", "
+ :ref:`@split <decorators.split>` ( ``tasks_or_file_names``, ``output_files``, [``extra_parameters``,...] )
+ ", ""
+ "@transform (:ref:`Manual <new_manual.transform>`)
+ ", "
+ | :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`suffix <decorators.transform.suffix_string>`\ *(*\ ``suffix_string``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ | :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`regex <decorators.transform.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+
+ ", ""
+ "@merge (:ref:`Manual <new_manual.merge>`)
+ ", "
+ :ref:`@merge <decorators.merge>` (``tasks_or_file_names``, ``output``, [``extra_parameters``,...] )
+ ", ""
+ "@posttask (:ref:`Manual <new_manual.posttask>`)
+ ", "
+ | :ref:`@posttask <decorators.posttask>` ( ``signal_task_completion_function`` )
+ | :ref:`@posttask <decorators.posttask>` (:ref:`touch_file <decorators.touch_file>`\ ( ``'task1.completed'`` ))
+
+ ", ""
+
+************************************************************************************************
+See :ref:`Decorators <decorators>` for a complete list of decorators
+************************************************************************************************
+
+
+
+================================================
+2. Print dependency graph if necessary
+================================================
+
+- For a graphical flowchart in ``jpg``, ``svg``, ``dot``, ``png``, ``ps``, ``gif`` formats::
+
+ pipeline_printout_graph ( "flowchart.svg")
+
+.. comment
+
+ This requires the `dot programme <http://www.graphviz.org/>`_ to be installed
+
+- For a text printout of all jobs ::
+
+ pipeline_printout()
+
+
+================================================
+3. Run the pipeline
+================================================
+
+::
+
+ pipeline_run(multiprocess = N_PARALLEL_JOBS)
+
+
+
+
diff --git a/doc/_build/html/_sources/contents.txt b/doc/_build/html/_sources/contents.txt
new file mode 100644
index 0000000..0e91439
--- /dev/null
+++ b/doc/_build/html/_sources/contents.txt
@@ -0,0 +1,197 @@
+.. include:: global.inc
+***************************
+**Ruffus** documentation
+***************************
+==========================================
+Start Here:
+==========================================
+.. toctree::
+ :maxdepth: 2
+
+ installation.rst
+ tutorials/new_tutorial/manual_contents.rst
+ tutorials/new_tutorial/introduction.rst
+ tutorials/new_tutorial/transform.rst
+ tutorials/new_tutorial/transform_in_parallel.rst
+ tutorials/new_tutorial/originate.rst
+ tutorials/new_tutorial/pipeline_printout.rst
+ tutorials/new_tutorial/command_line.rst
+ tutorials/new_tutorial/pipeline_printout_graph.rst
+ tutorials/new_tutorial/output_file_names.rst
+ tutorials/new_tutorial/mkdir.rst
+ tutorials/new_tutorial/checkpointing.rst
+ tutorials/new_tutorial/decorators_compendium.rst
+ tutorials/new_tutorial/split.rst
+ tutorials/new_tutorial/merge.rst
+ tutorials/new_tutorial/multiprocessing.rst
+ tutorials/new_tutorial/logging.rst
+ tutorials/new_tutorial/subdivide_collate.rst
+ tutorials/new_tutorial/combinatorics.rst
+ tutorials/new_tutorial/active_if.rst
+ tutorials/new_tutorial/posttask.rst
+ tutorials/new_tutorial/inputs.rst
+ tutorials/new_tutorial/onthefly.rst
+ tutorials/new_tutorial/parallel.rst
+ tutorials/new_tutorial/check_if_uptodate.rst
+ tutorials/new_tutorial/flowchart_colours.rst
+ tutorials/new_tutorial/dependencies.rst
+ tutorials/new_tutorial/exceptions.rst
+ tutorials/new_tutorial/list_of_ruffus_names.rst
+ tutorials/new_tutorial/deprecated_files.rst
+ tutorials/new_tutorial/deprecated_files_re.rst
+
+
+Example code for:
+
+.. toctree::
+ :maxdepth: 1
+
+ tutorials/new_tutorial/introduction_code.rst
+ tutorials/new_tutorial/transform_code.rst
+ tutorials/new_tutorial/transform_in_parallel_code.rst
+ tutorials/new_tutorial/originate_code.rst
+ tutorials/new_tutorial/pipeline_printout_code.rst
+ tutorials/new_tutorial/pipeline_printout_graph_code.rst
+ tutorials/new_tutorial/output_file_names_code.rst
+ tutorials/new_tutorial/mkdir_code.rst
+ tutorials/new_tutorial/checkpointing_code.rst
+ tutorials/new_tutorial/split_code.rst
+ tutorials/new_tutorial/merge_code.rst
+ tutorials/new_tutorial/multiprocessing_code.rst
+ tutorials/new_tutorial/logging_code.rst
+ tutorials/new_tutorial/subdivide_collate_code.rst
+ tutorials/new_tutorial/combinatorics_code.rst
+ tutorials/new_tutorial/inputs_code.rst
+ tutorials/new_tutorial/onthefly_code.rst
+ tutorials/new_tutorial/flowchart_colours_code.rst
+
+
+
+=====================
+Overview:
+=====================
+.. toctree::
+ :maxdepth: 2
+
+ cheatsheet.rst
+ pipeline_functions.rst
+ drmaa_wrapper_functions.rst
+ installation.rst
+ design.rst
+ Bugs and Updates <history>
+ Future plans <todo>
+ Implementation_notes <implementation_notes.rst>
+ faq.rst
+ glossary.rst
+ gallery.rst
+ why_ruffus.rst
+
+=====================
+Examples
+=====================
+.. toctree::
+ :maxdepth: 2
+
+ examples/bioinformatics/index.rst
+ examples/bioinformatics/part2.rst
+ examples/bioinformatics/part1_code.rst
+ examples/bioinformatics/part2_code.rst
+ examples/paired_end_data.py.rst
+
+
+
+=====================
+Reference:
+=====================
+######################
+Decorators
+######################
+.. toctree::
+ :maxdepth: 1
+
+ decorators/decorators.rst
+ decorators/indicator_objects.rst
+
+
+.. topic::
+ Core
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/originate.rst
+ decorators/split.rst
+ decorators/transform.rst
+ decorators/merge.rst
+
+.. topic::
+ For advanced users
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/subdivide.rst
+ decorators/transform_ex.rst
+ decorators/collate.rst
+ decorators/collate_ex.rst
+ decorators/graphviz.rst
+ decorators/mkdir.rst
+ decorators/jobs_limit.rst
+ decorators/posttask.rst
+ decorators/active_if.rst
+ decorators/follows.rst
+
+.. topic::
+ Combinatorics
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/product.rst
+ decorators/permutations.rst
+ decorators/combinations.rst
+ decorators/combinations_with_replacement.rst
+
+.. topic::
+ Esoteric
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/files_ex.rst
+ decorators/check_if_uptodate.rst
+ decorators/parallel.rst
+
+.. topic::
+ Deprecated
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/files.rst
+ decorators/files_re.rst
+
+
+######################
+Modules:
+######################
+
+.. toctree::
+ :maxdepth: 2
+
+ task.rst
+ proxy_logger.rst
+
+.. comment
+ graph.rst
+ print_dependencies.rst
+ adjacent_pairs_iterate.rst
+
+
+=====================
+Indices and tables
+=====================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/doc/_build/html/_sources/decorators/active_if.txt b/doc/_build/html/_sources/decorators/active_if.txt
new file mode 100644
index 0000000..1b02e1f
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/active_if.txt
@@ -0,0 +1,110 @@
+.. include:: ../global.inc
+.. _decorators.active_if:
+.. index::
+ pair: @active_if; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+ * More on @active_if in the ``Ruffus`` :ref:`Manual <new_manual.active_if>`
+
+
+############
+ at active_if
+############
+
+.. Comment. These are parameter names
+
+.. |on_or_off| replace:: `on_or_off`
+.. _on_or_off: `decorators.active_if.on_or_off`_
+
+***************************************************************************************************************************************************
+*@active_if*\ (on_or_off1, [on_or_off2,...])
+***************************************************************************************************************************************************
+ **Purpose:**
+
+ * Switches tasks on and off at run time depending on its parameters
+ * Evaluated each time ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+ * The Design and initial implementation were contributed by Jacob Biesinger
+ * Dormant tasks behave as if they are up to date and have no output.
+
+ **Example**:
+
+ .. code-block:: python
+ :emphasize-lines: 20
+
+ from ruffus import *
+ run_if_true_1 = True
+ run_if_true_2 = False
+ run_if_true_3 = True
+
+
+ #
+ # task1
+ #
+ @originate(['a.foo', 'b.foo'])
+ def create_files(outfile):
+ """
+ create_files
+ """
+ open(outfile, "w").write(outfile + "\n")
+
+ #
+ # Only runs if all three run_if_true conditions are met
+ #
+ # @active_if determines if task is active
+ @active_if(run_if_true_1, lambda: run_if_true_2)
+ @active_if(run_if_true_3)
+ @transform(create_files, suffix(".foo"), ".bar")
+ def this_task_might_be_inactive(infile, outfile):
+ open(outfile, "w").write("%s -> %s\n" % (infile, outfile))
+
+
+ # @active_if switches off task because run_if_true_2 == False
+ pipeline_run(verbose = 3)
+
+ # @active_if switches on task because all run_if_true conditions are met
+ run_if_true_2 = True
+ pipeline_run(verbose = 3)
+
+
+ Produces the following output:
+
+ .. code-block:: pycon
+ :emphasize-lines: 1,13
+
+ >>> # @active_if switches off task "this_task_might_be_inactive" because run_if_true_2 == False
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = create_files
+ create_files
+ Job = [None -> a.foo] Missing file [a.foo]
+ Job = [None -> b.foo] Missing file [b.foo]
+ Job = [None -> a.foo] completed
+ Job = [None -> b.foo] completed
+ Completed Task = create_files
+ Inactive Task = this_task_might_be_inactive
+
+ >>> # @active_if switches on task "this_task_might_be_inactive" because all run_if_true conditions are met
+ >>> run_if_true_2 = True
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = this_task_might_be_inactive
+
+ Job = [a.foo -> a.bar] Missing file [a.bar]
+ Job = [b.foo -> b.bar] Missing file [b.bar]
+ Job = [a.foo -> a.bar] completed
+ Job = [b.foo -> b.bar] completed
+ Completed Task = this_task_might_be_inactive
+
+
+ **Parameters:**
+
+.. _decorators.active_if.on_or_off:
+
+ * *on_or_off*:
+ A comma separated list of boolean conditions. These can be values, functions or callable objects which return True / False
+
+ Multiple ``@active_if`` decorators can be stacked for clarity as in the example
+
+
diff --git a/doc/_build/html/_sources/decorators/check_if_uptodate.txt b/doc/_build/html/_sources/decorators/check_if_uptodate.txt
new file mode 100644
index 0000000..893f430
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/check_if_uptodate.txt
@@ -0,0 +1,68 @@
+.. include:: ../global.inc
+.. _decorators.check_if_uptodate:
+
+.. index::
+ pair: @check_if_uptodate; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+.. |dependency_checking_function| replace:: `dependency_checking_function`
+.. _dependency_checking_function: `decorators.check_if_uptodate.dependency_checking_function`_
+
+########################
+ at check_if_uptodate
+########################
+
+*******************************************************************************************
+*@check_if_uptodate* (|dependency_checking_function|_)
+*******************************************************************************************
+
+ **Purpose:**
+ Checks to see if a job is up to date, and needs to be run.
+
+ Usually used in conjunction with :ref:`@parallel() <decorators.parallel>`
+
+ **Example**::
+
+ from ruffus import *
+ import os
+ def check_file_exists(input_file, output_file):
+ if not os.path.exists(output_file):
+ return True, "Missing file %s" % output_file
+ else:
+ return False, "File %s exists" % output_file
+
+ @parallel([[None, "a.1"]])
+ @check_if_uptodate(check_file_exists)
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+ Is equivalent to::
+
+ from ruffus import *
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+ Both produce the same output::
+
+ Task = create_if_necessary
+ Job = [null, "a.1"] completed
+
+ **Parameters:**
+
+.. _decorators.check_if_uptodate.dependency_checking_function:
+
+ * *dependency_checking_function*:
+ returns two parameters: if job needs to be run, and a message explaining why
+
+ dependency_checking_func() needs to handle the same number of parameters as the
+ task function e.g. ``input_file`` and ``output_file`` above.
+
+
diff --git a/doc/_build/html/_sources/decorators/collate.txt b/doc/_build/html/_sources/decorators/collate.txt
new file mode 100644
index 0000000..1121925
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/collate.txt
@@ -0,0 +1,154 @@
+.. include:: ../global.inc
+.. _decorators.collate:
+.. index::
+ pair: @collate; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at collate
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.collate.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.collate.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.collate.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.collate.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.collate.matching_formatter`_
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@collate* ( |tasks_or_file_names|_, :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+ Groups / collates sets of input files, each into a separate summary.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names.
+
+ String replacement occurs either through suffix matches via :ref:`suffix<decorators.suffix>` or
+ the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators.
+
+ ``@collate`` groups together all **Input** which result in identical **Output** and **extra**
+ parameters.
+
+ It is a **many to fewer** operation.
+
+
+ **Example**:
+ ``regex(r".*(\..+)"), "\1.summary"`` creates a separate summary file for each suffix::
+
+ animal_files = "a.fish", "b.fish", "c.mammals", "d.mammals"
+ # summarise by file suffix:
+ @collate(animal_files, regex(r"\.(.+)$"), r'\1.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ **Parameters:**
+
+
+.. _decorators.collate.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.collate.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+.. _decorators.collate.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.collate.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.collate.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are passed verbatim to the task function
+
+ #. *outputs* and optional extra parameters are passed to the functions after string
+ substitution in any strings. Non-string values are passed through unchanged.
+ #. Each collate job consists of input files which are aggregated by string substitution
+ to a single set of output / extra parameter matches
+ #. In the above cases, ``a.fish`` and ``b.fish`` both produce ``fish.summary`` after regular
+ expression subsitution, and are collated into a single job:
+ ``["a.fish", "b.fish" -> "fish.summary"]``
+ while ``c.mammals``, ``d.mammals`` both produce ``mammals.summary``, are collated in a separate job:
+ ``["c.mammals", "d.mammals" -> "mammals.summary"]``
+
+ **Example2**:
+
+ Suppose we had the following files::
+
+ cows.mammals.animal
+ horses.mammals.animal
+ sheep.mammals.animal
+
+ snake.reptile.animal
+ lizard.reptile.animal
+ crocodile.reptile.animal
+
+ pufferfish.fish.animal
+
+ and we wanted to end up with three different resulting output::
+
+ cow.mammals.animal
+ horse.mammals.animal
+ sheep.mammals.animal
+ -> mammals.results
+
+ snake.reptile.animal
+ lizard.reptile.animal
+ crocodile.reptile.animal
+ -> reptile.results
+
+ pufferfish.fish.animal
+ -> fish.results
+
+ This is the ``@collate`` code required::
+
+ animals = [ "cows.mammals.animal",
+ "horses.mammals.animal",
+ "sheep.mammals.animal",
+ "snake.reptile.animal",
+ "lizard.reptile.animal",
+ "crocodile.reptile.animal",
+ "pufferfish.fish.animal"]
+
+ @collate(animals, regex(r"(.+)\.(.+)\.animal"), r"\2.results")
+ # \1 = species [cow, horse]
+ # \2 = phylogenetics group [mammals, reptile, fish]
+ def summarize_animals_into_groups(species_file, result_file):
+ " ... more code here"
+ pass
+
+
+
+See :ref:`@merge <decorators.merge>` for an alternative way to summarise files.
diff --git a/doc/_build/html/_sources/decorators/collate_ex.txt b/doc/_build/html/_sources/decorators/collate_ex.txt
new file mode 100644
index 0000000..50175f4
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/collate_ex.txt
@@ -0,0 +1,145 @@
+.. include:: ../global.inc
+.. _decorators.collate_ex:
+.. index::
+ pair: @collate (Advanced Usage); Syntax
+ pair: @collate, inputs(...); Syntax
+ pair: @collate, add_inputs(...); Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+####################################################
+ at collate with ``add_inputs`` and ``inputs``
+####################################################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.collate_ex.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.collate_ex.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.collate_ex.output_pattern`_
+.. |input_pattern_or_glob| replace:: `input_pattern_or_glob`
+.. _input_pattern_or_glob: `decorators.collate_ex.input_pattern_or_glob`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.collate_ex.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.collate_ex.matching_formatter`_
+
+
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+*@collate* ( |tasks_or_file_names|_, :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, [:ref:`inputs<decorators.inputs>`\ *(*\ |input_pattern_or_glob|_\ *)* | :ref:`add_inputs<decorators.add_inputs>`\ *(*\ |input_pattern_or_glob|_\ *)*\] , |output_pattern|_, [|extra_parameters|_,...] )
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+ **Purpose:**
+ Groups / collates sets of input files, each into a separate summary.
+
+ This variant of ``@collate`` allows additional inputs or dependencies to be added
+ dynamically to the task.
+
+ Output file names are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names.
+
+ This variant of ``@collate`` allows input file names to be derived in the same way.
+
+ :ref:`add_inputs<decorators.add_inputs>` nests the the original input parameters in a list before adding additional dependencies.
+
+ :ref:`inputs<decorators.inputs>` replaces the original input parameters wholescale.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Example of** :ref:`add_inputs<decorators.add_inputs>`
+
+ ``regex(r".*(\..+)"), "\1.summary"`` creates a separate summary file for each suffix.
+ But we also add date of birth data for each species::
+
+ animal_files = "tuna.fish", "shark.fish", "dog.mammals", "cat.mammals"
+ # summarise by file suffix:
+ @collate(animal_files, regex(r".+\.(.+)$"), add_inputs(r"\1.date_of_birth"), r'\1.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ This results in the following equivalent function calls::
+
+ summarize([ ["shark.fish", "fish.date_of_birth" ],
+ ["tuna.fish", "fish.date_of_birth" ] ], "fish.summary")
+ summarize([ ["cat.mammals", "mammals.date_of_birth"],
+ ["dog.mammals", "mammals.date_of_birth"] ], "mammals.summary")
+
+ **Example of** :ref:`add_inputs<decorators.inputs>`
+
+ using ``inputs(...)`` will summarise only the dates of births for each species group::
+
+ animal_files = "tuna.fish", "shark.fish", "dog.mammals", "cat.mammals"
+ # summarise by file suffix:
+ @collate(animal_files, regex(r".+\.(.+)$"), inputs(r"\1.date_of_birth"), r'\1.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ This results in the following equivalent function calls::
+
+ summarize(["fish.date_of_birth" ], "fish.summary")
+ summarize(["mammals.date_of_birth"], "mammals.summary")
+
+ **Parameters:**
+
+
+.. _decorators.collate_ex.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.collate_ex.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+.. _decorators.collate_ex.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.collate_ex.input_pattern_or_glob:
+
+ * *input_pattern*
+ Specifies the resulting input(s) to each job.
+ Must be wrapped in an :ref:`inputs<decorators.inputs>` or an :ref:`inputs<decorators.add_inputs>` indicator object.
+
+ Can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ Strings will be subject to substitution.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+
+.. _decorators.collate_ex.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.collate_ex.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are passed verbatim to the task function
+
+ #. *outputs* and optional extra parameters are passed to the functions after string
+ substitution in any strings. Non-string values are passed through unchanged.
+ #. Each collate job consists of input files which are aggregated by string substitution
+ to a single set of output / extra parameter matches
+
+
+See :ref:`@collate <decorators.collate>` for more straightforward ways to use collate.
diff --git a/doc/_build/html/_sources/decorators/combinations.txt b/doc/_build/html/_sources/decorators/combinations.txt
new file mode 100644
index 0000000..65f9ec9
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/combinations.txt
@@ -0,0 +1,153 @@
+.. include:: ../global.inc
+.. _decorators.combinations:
+.. index::
+ pair: @combinations; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at combinations
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.combinations.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.combinations.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.combinations.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.combinations.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@combinations* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the **combinations**, between all the elements of a set of **Input** (e.g. **A B C D**),
+ i.e. r-length tuples of *input* elements with no repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.combinations>`__
+ function of the same name:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations
+ >>> # combinations('ABCD', 3) --> ABC ABD ACD BCD
+ >>> [ "".join(a) for a in combinations("ABCD", 3)]
+ ['ABC', 'ABD', 'ACD', 'BCD']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* in each tuple of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ **Example**:
+
+ Calculates the **@combinations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations
+ @combinations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 3 at a time
+ 3,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}_vs_"
+ "{basename[2][1]}.combinations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def combinations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - B - C
+ A - B - D
+ A - C - D
+ B - C - D
+
+
+ **Parameters:**
+
+
+.. _decorators.combinations.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.combinations.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.combinations.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.combinations.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/_build/html/_sources/decorators/combinations_with_replacement.txt b/doc/_build/html/_sources/decorators/combinations_with_replacement.txt
new file mode 100644
index 0000000..fbb0daf
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/combinations_with_replacement.txt
@@ -0,0 +1,157 @@
+.. include:: ../global.inc
+.. _decorators.combinations_with_replacement:
+.. index::
+ pair: @combinations_with_replacement; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+################################################
+ at combinations_with_replacement
+################################################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.combinations_with_replacement.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.combinations_with_replacement.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.combinations_with_replacement.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.combinations_with_replacement.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@combinations_with_replacement* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the **combinations_with_replacement**, between all the elements of a set of **Input** (e.g. **A B C D**),
+ i.e. r-length tuples of *input* elements included repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement>`__
+ function of the same name:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations_with_replacement
+ >>> # combinations_with_replacement('ABCD', 2) --> AA AB AC AD BB BC BD CC CD DD
+ >>> [ "".join(a) for a in combinations_with_replacement('ABCD', 2)]
+ ['AA', 'AB', 'AC', 'AD', 'BB', 'BC', 'BD', 'CC', 'CD', 'DD']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* in each tuple of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ **Example**:
+
+ Calculates the **@combinations_with_replacement** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations_with_replacement
+ @combinations_with_replacement(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.combinations_with_replacement",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2rd
+ ])
+ def combinations_with_replacement_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - A
+ A - B
+ A - C
+ A - D
+ B - B
+ B - C
+ B - D
+ C - C
+ C - D
+ D - D
+
+
+ **Parameters:**
+
+
+.. _decorators.combinations_with_replacement.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.combinations_with_replacement.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.combinations_with_replacement.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.combinations_with_replacement.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/_build/html/_sources/decorators/decorators.txt b/doc/_build/html/_sources/decorators/decorators.txt
new file mode 100644
index 0000000..5d70df8
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/decorators.txt
@@ -0,0 +1,296 @@
+.. include:: ../global.inc
+#######################
+Ruffus Decorators
+#######################
+
+.. seealso::
+ :ref:`Indicator objects <decorators.indicator_objects>`
+
+.. _decorators:
+
+
+=============================================
+*Core*
+=============================================
+.. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@originate** (:ref:`Summary <decorators.originate>` / :ref:`Manual <new_manual.originate>`)
+
+ - Creates (originates) a set of starting file without dependencies from scratch (*ex nihilo*!)
+ - Only called to create files which do not exist.
+ - Invoked onces (a job created) per item in the ``output_files`` list.
+
+ ", "
+ * :ref:`@originate <decorators.originate>` ( ``output_files``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@split** (:ref:`Summary <decorators.split>` / :ref:`Manual <new_manual.split>`)
+
+ - Splits a single input into multiple output
+ - Globs in ``output`` can specify an indeterminate number of files.
+
+ ", "
+ * :ref:`@split <decorators.split>` ( ``tasks_or_file_names``, ``output_files``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@transform** (:ref:`Summary <decorators.transform>` / :ref:`Manual <new_manual.transform>`)
+
+ - Applies the task function to transform input data to output.
+
+ ", "
+ * :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`suffix <decorators.transform.suffix_string>`\ *(*\ ``suffix_string``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`regex <decorators.transform.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.transform.matching_formatter>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@merge** (:ref:`Summary <decorators.merge>` / :ref:`Manual <new_manual.merge>`)
+
+ - Merges multiple input files into a single output.
+
+ ", "
+ * :ref:`@merge <decorators.merge>` (``tasks_or_file_names``, ``output``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+
+.. _decorators.combinatorics:
+
+=============================================
+*Combinatorics*
+=============================================
+.. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@product** (:ref:`Summary <decorators.product>` / :ref:`Manual <new_manual.product>`)
+
+ - Generates the **product**, i.e. all vs all comparisons, between sets of input files.
+ ", "
+ * :ref:`@product <decorators.product>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])* ,*[* ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *]), ]*, ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@permutations** (:ref:`Summary <decorators.permutations>` / :ref:`Manual <new_manual.permutations>`)
+
+ - Generates the **permutations**, between all the elements of a set of **Input**
+ - Analogous to the python `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ - permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+
+ ", "
+ * :ref:`@permutations <decorators.permutations>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])*, ``tuple_size``, ``output_pattern``, [``extra_parameters``,...] )
+ \
+ ", ""
+ "**@combinations** (:ref:`Summary <decorators.combinations>` / :ref:`Manual <new_manual.combinations>`)
+
+ - Generates the **permutations**, between all the elements of a set of **Input**
+ - Analogous to the python `itertools.combinations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ - combinations('ABCD', 3) --> ABC ABD ACD BCD
+ - Generates the **combinations**, between all the elements of a set of **Input**:
+ i.e. r-length tuples of *input* elements with no repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ ", "
+ * :ref:`@combinations <decorators.permutations>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])*, ``tuple_size``, ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@combinations_with_replacement** (:ref:`Summary <decorators.combinations_with_replacement>` / :ref:`Manual <new_manual.combinations_with_replacement>`)
+
+ - Generates the **permutations**, between all the elements of a set of **Input**
+ - Analogous to the python `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ - combinations('ABCD', 3) --> ABC ABD ACD BCD
+ - Generates the **combinations_with_replacement**, between all the elements of a set of **Input**:
+ i.e. r-length tuples of *input* elements with no repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ ", "
+ * :ref:`@combinations_with_replacement <decorators.permutations>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])*, ``tuple_size``, ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+
+=============================================
+*Advanced*
+=============================================
+ .. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@subdivide** (:ref:`Summary <decorators.subdivide>` / :ref:`Manual <new_manual.subdivide>`)
+ - Subdivides a set of *Inputs* each further into multiple *Outputs*.
+ - The number of files in each *Output* can be set at runtime by the use of globs.
+ - **Many to Even More** operator.
+ - The use of **split** is a synonym for subdivide is deprecated.
+
+ ", "
+ * :ref:`@subdivide <decorators.subdivide>` ( ``tasks_or_file_names``, :ref:`regex <decorators.subdivide.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@subdivide <decorators.subdivide>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.subdivide.matching_formatter>`\ *(*\ [``regex_pattern``] *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@transform** (:ref:`Summary <decorators.transform_ex>` / :ref:`Manual <new_manual.inputs>`)
+
+ - Infers input as well as output from regular expression substitutions
+ - Useful for adding additional file dependencies
+
+ ", "
+ * :ref:`@transform <decorators.transform_ex>` ( ``tasks_or_file_names``, :ref:`regex <decorators.transform.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@transform <decorators.transform_ex>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.transform.matching_formatter>`\ *(*\ ``regex_pattern``\ *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@collate** (:ref:`Summary <decorators.collate>` / :ref:`Manual <new_manual.collate>`)
+
+ - Groups multiple input files using regular expression matching
+ - Input resulting in the same output after substitution will be collated together.
+
+ ", "
+ * :ref:`@collate <decorators.collate>` (``tasks_or_file_names``, :ref:`regex <decorators.collate.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@collate <decorators.collate_ex>` (``tasks_or_file_names``, :ref:`regex <decorators.collate_ex.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@collate <decorators.collate>` (``tasks_or_file_names``, :ref:`formatter <decorators.collate.matching_formatter>`\ *(*\ ``formatter_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@collate <decorators.collate_ex>` (``tasks_or_file_names``, :ref:`formatter <decorators.collate_ex.matching_formatter>`\ *(*\ ``formatter_pattern``\ *)*\ , :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@follows** (:ref:`Summary <decorators.follows>` / :ref:`Manual <new_manual.follows>`)
+
+ - Indicates task dependency
+ - optional :ref:`mkdir <decorators.follows.directory_name>` prerequisite (:ref:`see Manual <new_manual.follows.mkdir>`)
+
+ ", "
+ * :ref:`@follows <decorators.follows>` ( ``task1``, ``'task2'`` ))
+ \
+ * :ref:`@follows <decorators.follows>` ( ``task1``, :ref:`mkdir <decorators.follows.directory_name>`\ ( ``'my/directory/'`` ))
+ \
+
+ ", ""
+ "**@posttask** (:ref:`Summary <decorators.posttask>` / :ref:`Manual <new_manual.posttask>`)
+
+ - Calls function after task completes
+ - Optional :ref:`touch_file <decorators.posttask.file_name>` indicator (:ref:`Manual <new_manual.posttask.touch_file>`)
+
+ ", "
+ * :ref:`@posttask <decorators.posttask>` ( ``signal_task_completion_function`` )
+ \
+ * :ref:`@posttask <decorators.posttask>` (:ref:`touch_file <decorators.touch_file>`\ ( ``'task1.completed'`` ))
+ \
+
+ ", ""
+ "**@active_if** (:ref:`Summary <decorators.active_if>` / :ref:`Manual <new_manual.active_if>`)
+
+ - Switches tasks on and off at run time depending on its parameters
+ - Evaluated each time :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`, :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` or :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>` is called.
+ - Dormant tasks behave as if they are up to date and have no output.
+
+ ", "
+ * :ref:`@active_if <decorators.active_if>` ( ``on_or_off1, [on_or_off2, ...]`` )
+ \
+
+ ", ""
+ "**@jobs_limit** (:ref:`Summary <decorators.jobs_limit>` / :ref:`Manual <new_manual.jobs_limit>`)
+
+ - Limits the amount of multiprocessing for the specified task
+ - Ensures that fewer than N jobs for this task are run in parallel
+ - Overrides ``multiprocess`` parameter in :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`
+ ", "
+ * :ref:`@jobs_limit <decorators.jobs_limit>` ( ``NUMBER_OF_JOBS_RUNNING_CONCURRENTLY`` )
+ \
+
+ ", ""
+ "**@mkdir** (:ref:`Summary <decorators.mkdir>` / :ref:`Manual <new_manual.mkdir>`)
+
+ - Generates paths for `os.makedirs <http://docs.python.org/2/library/os.html#os.makedirs>`__
+
+ ", "
+ * :ref:`@mkdir <decorators.mkdir>` ( ``tasks_or_file_names``, :ref:`suffix <decorators.mkdir.suffix_string>`\ *(*\ ``suffix_string``\ *)*\ , ``output_pattern`` )
+ \
+ * :ref:`@mkdir <decorators.mkdir>` ( ``tasks_or_file_names``, :ref:`regex <decorators.mkdir.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern`` )
+ \
+ * :ref:`@mkdir <decorators.mkdir>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.mkdir.matching_formatter>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``)
+ \
+
+ ", ""
+ "**@graphviz** (:ref:`Summary <decorators.graphviz>` / :ref:`Manual <new_manual.pipeline_printout_graph>`)
+
+ - Customise the graphic for each task in printed flowcharts
+
+ ", "
+ * :ref:`@graphviz <decorators.graphviz>` ( ``graphviz_parameter = XXX``, ``[graphviz_parameter2 = YYY ...]``)
+ \
+
+ ", ""
+
+
+
+=============================================
+*Esoteric!*
+=============================================
+ .. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@files** (:ref:`Summary <decorators.files>` / :ref:`Manual <new_manual.deprecated_files>`)
+
+ - I/O parameters
+ - skips up-to-date jobs
+ - Should use :ref:`@transform <decorators.transform>` etc instead
+
+ ", "
+ * :ref:`@files <decorators.files>`\ ( ``parameter_list`` )
+ \
+ * :ref:`@files <decorators.files>`\ ( ``parameter_generating_function`` )
+ \
+ * :ref:`@files <decorators.files>` ( ``input_file``, ``output_file``, ``other_params``, ... )
+ \
+
+ ", ""
+ "**@parallel** (:ref:`Summary <decorators.parallel>` / :ref:`Manual <new_manual.deprecated_parallel>`)
+
+ - By default, does not check if jobs are up to date
+ - Best used in conjuction with :ref:`@check_if_uptodate <decorators.check_if_uptodate>`
+
+ ", "
+ * :ref:`@parallel <decorators.parallel>` ( ``parameter_list`` ) (:ref:`see Manual <new_manual.deprecated_parallel>`)
+ \
+ * :ref:`@parallel <decorators.parallel>` ( ``parameter_generating_function`` ) (:ref:`see Manual <new_manual.on_the_fly>`)
+ \
+
+ ", ""
+ "**@check_if_uptodate** (:ref:`Summary <decorators.check_if_uptodate>` / :ref:`Manual <new_manual.check_if_uptodate>`)
+
+ - Custom function to determine if jobs need to be run
+
+ ", "
+ * :ref:`@check_if_uptodate <decorators.check_if_uptodate>` ( ``is_task_up_to_date_function`` )
+ \
+
+ ", ""
+ ".. tip::
+ The use of this overly complicated function is discouraged.
+ **@files_re** (:ref:`Summary <decorators.files_re>`)
+
+ - I/O file names via regular
+ expressions
+ - start from lists of file names
+ or |glob|_ results
+ - skips up-to-date jobs
+ ", "
+ * :ref:`@files_re <decorators.files_re>` ( ``tasks_or_file_names``, ``matching_regex``, [``input_pattern``,] ``output_pattern``, ``...`` )
+ ``input_pattern``/``output_pattern`` are regex patterns
+ used to create input/output file names from the starting
+ list of either glob_str or file names
+
+ ", ""
+
diff --git a/doc/_build/html/_sources/decorators/files.txt b/doc/_build/html/_sources/decorators/files.txt
new file mode 100644
index 0000000..8f73ba5
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/files.txt
@@ -0,0 +1,155 @@
+.. include:: ../global.inc
+.. _decorators.files:
+.. index::
+ pair: @files; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+.. |input| replace:: `input`
+.. _input: `decorators.files.input`_
+.. |input1| replace:: `input1`
+.. _input1: `decorators.files.input1`_
+.. |output| replace:: `output`
+.. _output: `decorators.files.output`_
+.. |output1| replace:: `output1`
+.. _output1: `decorators.files.output1`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.files.extra_parameters`_
+.. |extra_parameters1| replace:: `extra_parameters1`
+.. _extra_parameters1: `decorators.files.extra_parameters1`_
+
+
+########################
+ at files
+########################
+
+*******************************************************************************************
+*@files* (|input1|_, |output1|_, [|extra_parameters1|_, ...])
+*******************************************************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ at files for single jobs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ **Purpose:**
+ Provides parameters to run a task.
+
+ The first two parameters in each set represent the input and output which are
+ used to see if the job is out of date and needs to be (re-)run.
+
+ By default, out of date checking uses input/output file timestamps.
+ (On some file systems, timestamps have a resolution in seconds.)
+ See :ref:`@check_if_uptodate() <decorators.check_if_uptodate>` for alternatives.
+
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ @files('a.1', 'a.2', 'A file')
+ def transform_files(infile, outfile, text):
+ pass
+ pipeline_run([transform_files])
+
+ If ``a.2`` is missing or was created before ``a.1``, then the following will be called:
+ ::
+
+ transform_files('a.1', 'a.2', 'A file')
+
+ **Parameters:**
+
+.. _decorators.files.input1:
+
+ * *input*
+ Input file names
+
+
+.. _decorators.files.output1:
+
+ * *output*
+ Output file names
+
+
+.. _decorators.files.extra_parameters1:
+
+ * *extra_parameters*
+ optional ``extra_parameters`` are passed verbatim to each job.
+
+
+ **Checking if jobs are up to date:**
+ Strings in ``input`` and ``output`` (including in nested sequences) are interpreted as file names and
+ used to check if jobs are up-to-date.
+
+ See :ref:`above <decorators.files.check_up_to_date>` for more details
+
+
+*******************************************************************************************
+*@files* ( *((* |input|_, |output|_, [|extra_parameters|_,...] *), (...), ...)* )
+*******************************************************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ at files in parallel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ **Purpose:**
+
+ Passes each set of parameters to separate jobs which can run in parallel
+
+ The first two parameters in each set represent the input and output which are
+ used to see if the job is out of date and needs to be (re-)run.
+
+ By default, out of date checking uses input/output file timestamps.
+ (On some file systems, timestamps have a resolution in seconds.)
+ See :ref:`@check_if_uptodate() <decorators.check_if_uptodate>` for alternatives.
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ parameters = [
+ [ 'a.1', 'a.2', 'A file'], # 1st job
+ [ 'b.1', 'b.2', 'B file'], # 2nd job
+ ]
+
+ @files(parameters)
+ def parallel_io_task(infile, outfile, text):
+ pass
+ pipeline_run([parallel_io_task])
+
+ is the equivalent of calling:
+ ::
+
+ parallel_io_task('a.1', 'a.2', 'A file')
+ parallel_io_task('b.1', 'b.2', 'B file')
+
+ **Parameters:**
+
+.. _decorators.files.input:
+
+ * *input*
+ Input file names
+
+
+.. _decorators.files.output:
+
+ * *output*
+ Output file names
+
+
+.. _decorators.files.extra_parameters:
+
+ * *extra_parameters*
+ optional ``extra_parameters`` are passed verbatim to each job.
+
+.. _decorators.files.check_up_to_date:
+
+ **Checking if jobs are up to date:**
+ #. Strings in ``input`` and ``output`` (including in nested sequences) are interpreted as file names and
+ used to check if jobs are up-to-date.
+ #. In the absence of input files (e.g. ``input == None``), the job will run if any output file is missing.
+ #. In the absence of output files (e.g. ``output == None``), the job will always run.
+ #. If any of the output files is missing, the job will run.
+ #. If any of the input files is missing when the job is run, a
+ ``MissingInputFileError`` exception will be raised.
+
+
diff --git a/doc/_build/html/_sources/decorators/files_ex.txt b/doc/_build/html/_sources/decorators/files_ex.txt
new file mode 100644
index 0000000..d200d85
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/files_ex.txt
@@ -0,0 +1,77 @@
+.. include:: ../global.inc
+.. _decorators.files_on_the_fly:
+.. index::
+ pair: @files (on-the-fly parameter generation); Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+.. |custom_function| replace:: `custom_function`
+.. _custom_function: `decorators.files.custom_function`_
+
+
+################################################
+Generating parameters on the fly for @files
+################################################
+
+*******************************************************************************************
+*@files* (|custom_function|_)
+*******************************************************************************************
+ **Purpose:**
+
+ Uses a custom function to generate sets of parameters to separate jobs which can run in parallel.
+
+ The first two parameters in each set represent the input and output which are
+ used to see if the job is out of date and needs to be (re-)run.
+
+ By default, out of date checking uses input/output file timestamps.
+ (On some file systems, timestamps have a resolution in seconds.)
+ See :ref:`@check_if_uptodate() <decorators.check_if_uptodate>` for alternatives.
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ def generate_parameters_on_the_fly():
+ parameters = [
+ ['input_file1', 'output_file1', 1, 2], # 1st job
+ ['input_file2', 'output_file2', 3, 4], # 2nd job
+ ['input_file3', 'output_file3', 5, 6], # 3rd job
+ ]
+ for job_parameters in parameters:
+ yield job_parameters
+
+ @files(generate_parameters_on_the_fly)
+ def parallel_io_task(input_file, output_file, param1, param2):
+ pass
+
+ pipeline_run([parallel_task])
+
+ is the equivalent of calling:
+ ::
+
+ parallel_io_task('input_file1', 'output_file1', 1, 2)
+ parallel_io_task('input_file2', 'output_file2', 3, 4)
+ parallel_io_task('input_file3', 'output_file3', 5, 6)
+
+
+ **Parameters:**
+
+
+.. _decorators.files.custom_function:
+
+ * *custom_function*:
+ Generator function which yields each time a complete set of parameters for one job
+
+ **Checking if jobs are up to date:**
+ Strings in ``input`` and ``output`` (including in nested sequences) are interpreted as file names and
+ used to check if jobs are up-to-date.
+
+ See :ref:`above <decorators.files.check_up_to_date>` for more details
+
+
+
+
+
diff --git a/doc/_build/html/_sources/decorators/files_re.txt b/doc/_build/html/_sources/decorators/files_re.txt
new file mode 100644
index 0000000..8d3df68
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/files_re.txt
@@ -0,0 +1,130 @@
+.. include:: ../global.inc
+.. _decorators.files_re:
+
+.. index::
+ pair: @files_re; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at files_re
+########################
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.files_re.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.files_re.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.files_re.output_pattern`_
+.. |input_pattern| replace:: `input_pattern`
+.. _input_pattern: `decorators.files_re.input_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.files_re.matching_regex`_
+
+*****************************************************************************************************************************************
+*@files_re* (|tasks_or_file_names|_, |matching_regex|_, [|input_pattern|_], |output_pattern|_, [|extra_parameters|_,...])
+*****************************************************************************************************************************************
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Legacy design now deprecated. We suggest using :ref:`@transform() <decorators.transform>` instead
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ **Purpose:**
+
+ All singing, all dancing decorator which can do everything that :ref:`@merge() <decorators.merge>` and
+ :ref:`@transform() <decorators.transform>` can do.
+
+ Applies the task function to transform data from input to output files.
+
+ Output file names are determined from |tasks_or_file_names|_, i.e. from the output
+ of specified tasks, or a list of file names, using regular expression pattern substitutions.
+
+ Only out of date tasks (comparing input and output files) will be run.
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ #
+ # convert all files ending in ".1" into files ending in ".2"
+ #
+ @files_re('*.1', '(.*).1', r'\1.2')
+ def transform_func(infile, outfile):
+ open(outfile, "w").write(open(infile).read() + "\nconverted\n")
+
+ pipeline_run([task_re])
+
+ If the following files are present ``a.1``, ``b.1``, ``c.1``, this will result in the following function calls:
+ ::
+
+ transform_func("a.1", "a.2")
+ transform_func("b.1", "b.2")
+ transform_func("c.1", "c.2")
+
+ **Parameters:**
+
+.. _decorators.files_re.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_ .
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.files_re.matching_regex:
+
+ * *matching_regex*
+ a python regular expression string.
+
+ | See python `regular expression (re) <http://docs.python.org/library/re.html>`_ documentation for details of regular expression syntax
+ | Each output file name is created using regular expression substitution with |output_pattern|_
+
+.. _decorators.files_re.input_pattern:
+
+ * *input_pattern*
+ Optionally specifies the resulting input file name(s).
+
+.. _decorators.files_re.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.files_re.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed to the task function.
+
+ | Regular expression substitution is first applied to (even nested) string parameters.
+ | Other data types are passed verbatim.
+
+ For example:
+ ::
+
+ from ruffus import *
+ #
+ # convert all files ending in ".1" into files ending in ".2"
+ #
+ @files_re('*.1', '(.*).1', r'\1.2', [r'\1', 55], 17)
+ def transform_func(infile, outfile, extras, extra3):
+ extra1, extra2 = extras
+ open(outfile, "w").write(open(infile).read() + "\nconverted%s\n" % (extra1, extra2, extra3))
+
+ pipeline_run([transform_func])
+
+ If the following files are present ``a.1``, ``b.1``, ``c.1``, this will result in the following function calls:
+ ::
+
+ transform_func("a.1", "a.2", ["a", 55], 17)
+ transform_func("b.1", "b.2", ["b", 55], 17)
+ transform_func("c.1", "c.2", ["c", 55], 17)
+
+
+
+
+
+
diff --git a/doc/_build/html/_sources/decorators/follows.txt b/doc/_build/html/_sources/decorators/follows.txt
new file mode 100644
index 0000000..58e6d5c
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/follows.txt
@@ -0,0 +1,82 @@
+.. include:: ../global.inc
+.. _decorators.follows:
+.. index::
+ pair: @follows; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+ * More on @follows in the ``Ruffus`` :ref:`Manual <new_manual.follows>`
+
+ .. note::
+
+ Only missing directories are created.
+
+ In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+ Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+ directories are always created or available *before* the pipeline runs.
+
+
+############
+ at follows
+############
+
+.. _decorators.follows.mkdir:
+
+.. |task| replace:: `task`
+.. _task: `decorators.follows.task`_
+.. |task_name| replace:: `"task_name"`
+.. _task_name: `decorators.follows.task_name`_
+.. |directory_name| replace:: `directory_name`
+.. _directory_name: `decorators.follows.directory_name`_
+
+***************************************************************************************************************************************************
+*@follows*\ (|task|_ | |task_name|_ | :ref:`mkdir<decorators.mkdir>` (|directory_name|_), [more_tasks, ...])
+***************************************************************************************************************************************************
+ **Purpose:**
+
+ Indicates either
+
+ * task dependencies
+ * that the task requires a directory to be created first *if necessary*. (Existing directories will not be overwritten)
+
+
+ **Example**::
+
+ def task1():
+ print "doing task 1"
+
+ @follows(task1)
+ def task2():
+ print "doing task 2"
+
+
+ **Parameters:**
+
+.. _decorators.follows.task:
+
+ * *task*:
+ a list of tasks which have to be run **before** this function
+
+.. _decorators.follows.task_name:
+
+ * *"task_name"*:
+ Dependencies can be quoted function names.
+ Quoted function names allow dependencies to be added before the function is defined.
+
+ Functions in other modules need to be fully qualified.
+
+
+.. _decorators.follows.directory_name:
+
+ * *directory_name*:
+ Directories which need to be created (*only if they don't exist*) before
+ the task is run can be specified via a ``mkdir`` indicator object:
+
+ ::
+
+ @follows(task_x, mkdir("/output/directory") ...)
+ def task():
+ pass
+
+
diff --git a/doc/_build/html/_sources/decorators/graphviz.txt b/doc/_build/html/_sources/decorators/graphviz.txt
new file mode 100644
index 0000000..ebda4df
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/graphviz.txt
@@ -0,0 +1,92 @@
+.. include:: ../global.inc
+.. _decorators.graphviz:
+.. index::
+ pair: @graphviz; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at graphviz
+########################
+
+.. |graphviz_parameters| replace:: `graphviz_parameters`
+.. _graphviz_parameters: `decorators.graphviz.graphviz_parameters`_
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@graphviz* ( |graphviz_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ *Contributed by Sean Davis, with improved syntax via Jake Biesinger*
+
+ **Purpose:**
+ Customise the graphic for each task in printed flowcharts by adding
+ `graphviz attributes <http://www.graphviz.org/doc/info/attrs.html>`__,
+ (URL, shape, colour) to that node.
+
+ * This allows HTML formatting in the task names (using the ``label`` parameter as in the following example).
+ HTML labels **must** be enclosed in ``<`` and ``>``. E.g.
+
+ .. code-block:: python
+
+ label = "<Line <BR/> wrapped task_name()>"
+
+ * You can also opt to keep the task name and wrap it with a prefix and suffix:
+
+ .. code-block:: python
+
+ label_suffix = "??? ", label_prefix = ": What is this?"
+
+ * The ``URL`` attribute allows the generation of clickable svg, and also client / server
+ side image maps usable in web pages.
+ See `Graphviz documentation <http://www.graphviz.org/content/output-formats#dimap>`__
+
+
+ **Example**:
+ .. code-block:: python
+
+
+ @graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ # Can use dictionary if you wish...
+ graphviz_params = {"URL":"http://cnn.com", "fontcolor": '"#FF00FF"'}
+ @graphviz(**graphviz_params)
+ def myTask(input,output):
+ pass
+
+ .. **
+
+ .. image:: ../images/history_html_flowchart.png
+ :scale: 30
+
+ **Parameters:**
+
+
+.. _decorators.graphviz.graphviz_parameters:
+
+ * named *graphviz_parameters*
+
+ Including among others:
+
+ * URL (e.g. ``"www.ruffus.org.uk"``)
+ * fillcolor
+ * color
+ * pencolor
+ * fontcolor
+ * label_suffix (appended to task name)
+ * label_prefix (precedes task name)
+ * label (replaces task name)
+ * shape (e.g. ``"component", "box", "diamond", "doubleoctagon"`` etc., see `graphviz <http://www.graphviz.org/doc/info/shapes.html>`__ )
+ * height
+ * peripheries (Number of borders)
+ * style (e.g. ``"solid", "wedged", "dashed"`` etc., see `graphviz <http://www.graphviz.org/doc/info/attrs.html#k:style>`__ )
+
+ Colours may specified as ``'"#FFCCCC"', 'red', 'red:blue', '/bugn9/7'`` etc. see `color names <http://www.graphviz.org/doc/info/attrs.html#k:color>`__ and `colour schemes <http://www.graphviz.org/doc/info/colors.html>`__
diff --git a/doc/_build/html/_sources/decorators/indicator_objects.txt b/doc/_build/html/_sources/decorators/indicator_objects.txt
new file mode 100644
index 0000000..e41cc4c
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/indicator_objects.txt
@@ -0,0 +1,547 @@
+.. include:: ../global.inc
+
+
+.. seealso::
+ :ref:`Decorators <decorators>`
+
+.. index::
+ single: Indicator Object (Disambiguating parameters)
+
+.. _decorators.indicator_objects:
+
+
+########################
+Indicator Objects
+########################
+
+
+
+ How *ruffus* disambiguates certain parameters to decorators.
+
+ They are like `keyword arguments <http://docs.python.org/tutorial/controlflow.html#keyword-arguments>`_ in python, a little more verbose but they make the syntax much simpler.
+
+ Indicator objects are also "self-documenting" so you can see
+ exactly what is happening clearly.
+
+
+.. index::
+ pair: formatter; Indicator Object (Disambiguating parameters)
+
+.. _decorators.formatter:
+
+
+*********************************************
+*formatter*
+*********************************************
+
+ **formatter([** ``regex | None`` **, regex | None...])**
+
+ * The optional enclosed parameters are a python regular expression strings
+ * Each regular expression matches a corresponding *Input* file name string
+ * *formatter* parses each file name string into path and regular expression components
+ * Parsing fails altogether if the regular expression is not matched
+
+ Path components include:
+
+ * ``basename``: The `base name <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ *excluding* `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``"file.name"``
+ * ``ext`` : The `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``".ext"``
+ * ``path`` : The `dirname <http://docs.python.org/2/library/os.path.html#os.path.dirname>`__, ``"/directory/to/a"``
+ * ``subdir`` : A list of sub-directories in the ``path`` in reverse order, ``["a", "to", "directory", "/"]``
+ * ``subpath`` : A list of descending sub-paths in reverse order, ``["/directory/to/a", "/directory/to", "/directory", "/"]``
+
+ The replacement string refers to these components using python `string.format <http://docs.python.org/2/library/string.html#string-formatting>`__ style curly braces. ``{NAME}``
+
+ We refer to an element from the Nth input string by index, for example:
+
+ * ``"{ext[0]}"`` is the extension of the first input string.
+ * ``"{basename[1]}"`` is the basename of the second input string.
+ * ``"{basename[1][0:3]}"`` are the first three letters from the basename of the second input string.
+
+ **Used by:**
+ * :ref:`@split <decorators.split>`
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@merge <decorators.merge>`
+ * :ref:`@subdivide <decorators.subdivide>`
+ * :ref:`@collate <decorators.collate>`
+ * :ref:`@product <decorators.product>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+
+ **@transform example**:
+
+ .. code-block:: python
+ :emphasize-lines: 14, 18,19
+
+ from ruffus import *
+
+ # create initial file pairs
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"])
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+ This produces:
+
+ .. code-block:: pycon
+
+ input_parameters = ['job1.a.start',
+ 'job1.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs1.output.a.1',
+ '/home/lg/src/temp/jobs1.output.b.1', 45]
+
+ input_parameters = ['job2.a.start',
+ 'job2.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs2.output.a.1',
+ '/home/lg/src/temp/jobs2.output.b.1', 45]
+
+ **@permutations example**:
+
+ Combinatoric decorators such as :ref:`@product <decorators.product>` or
+ :ref:`@product <decorators.permutations>` behave much
+ like nested for loops in enumerating, combining, and permutating the original sets
+ of inputs.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ .. code-block:: python
+ :emphasize-lines: 14, 18,19
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # create initial files
+ @originate([ 'a.start', 'b.start', 'c.start'])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+ @permutations(create_initial_files, # Input
+
+ formatter("(.start)$"), # match input file in permutations
+ 2,
+
+ "{path[0][0]}/{basename[0][0]}_vs_{basename[1][0]}.product", # Output Replacement string
+ "{path[0][0]}", # path for 1st set of files, 1st file name
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}"]) # basename for 2nd set of files, 1st file name
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter
+ print "shared_path = ", shared_path
+ print "basenames = ", basenames
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+ This produces:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ input_parameter = ('a.start', 'b.start')
+ output_parameter = /home/lg/src/oss/ruffus/a_vs_b.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['a', 'b']
+
+ input_parameter = ('a.start', 'c.start')
+ output_parameter = /home/lg/src/oss/ruffus/a_vs_c.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['a', 'c']
+
+ input_parameter = ('b.start', 'a.start')
+ output_parameter = /home/lg/src/oss/ruffus/b_vs_a.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['b', 'a']
+
+ input_parameter = ('b.start', 'c.start')
+ output_parameter = /home/lg/src/oss/ruffus/b_vs_c.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['b', 'c']
+
+ input_parameter = ('c.start', 'a.start')
+ output_parameter = /home/lg/src/oss/ruffus/c_vs_a.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['c', 'a']
+
+ input_parameter = ('c.start', 'b.start')
+ output_parameter = /home/lg/src/oss/ruffus/c_vs_b.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['c', 'b']
+
+
+
+.. index::
+ pair: suffix; Indicator Object (Disambiguating parameters)
+
+.. _decorators.suffix:
+
+
+*********************************************
+*suffix*
+*********************************************
+
+ **suffix(** ``string`` **)**
+
+ The enclosed parameter is a string which must match *exactly* to the end
+ of a file name.
+
+
+ **Used by:**
+ * :ref:`@transform <decorators.transform>`
+
+ **Example**:
+ ::
+
+ #
+ # Transforms ``*.c`` to ``*.o``::
+ #
+ @transform(previous_task, suffix(".c"), ".o")
+ def compile(infile, outfile):
+ pass
+
+.. index::
+ pair: regex; Indicator Object (Disambiguating parameters)
+
+.. _decorators.regex:
+
+*********************************************
+*regex*
+*********************************************
+
+ **regex(** ``regular_expression`` **)**
+
+
+ The enclosed parameter is a python regular expression string,
+ which must be wrapped in a ``regex`` indicator object.
+
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+
+ **Used by:**
+
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@subdivide <decorators.subdivide>`
+ * :ref:`@collate <decorators.collate>`
+ * The deprecated :ref:`@files_re <decorators.files_re>`
+
+ **Example**:
+ ::
+
+ @transform(previous_task, regex(r".c$"), ".o")
+ def compile(infile, outfile):
+ pass
+
+.. index::
+ pair: add_inputs; Indicator Object (Adding additional input parameters)
+
+.. _decorators.add_inputs:
+
+***********************************************
+*add_inputs*
+***********************************************
+
+ **add_inputs(** ``input_file_pattern`` **)**
+
+ The enclosed parameter(s) are pattern strings or a nested structure which is added to the
+ input for each job.
+
+ **Used by:**
+ * :ref:`@transform <decorators.transform_ex>`
+ * :ref:`@collate <decorators.transform_ex>`
+ * :ref:`@subdivide <decorators.subdivide>`
+
+ **Example @transform with suffix(...)**
+
+ A common task in compiling C code is to include the corresponding header file for the source.
+ To compile ``*.c`` to ``*.o``, adding ``*.h`` and the common header ``universal.h``:
+
+ ::
+
+ @transform(["1.c", "2.c"], suffix(".c"), add_inputs([r"\1.h", "universal.h"]), ".o")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ | The starting files names are ``1.c`` and ``2.c``.
+ | ``suffix(".c")`` matches ".c" so ``\1`` stands for the unmatched prefices ``"1"`` and ``"2"``
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.c", "1.h", "universal.h"], "1.o")
+ compile(["2.c", "2.h", "universal.h"], "2.o")
+
+
+ A string like ``universal.h`` in ``add_inputs`` will added *as is*.
+ ``r"\1.h"``, however, performs suffix substitution, with the special form ``r"\1"`` matching everything up to the suffix.
+ Remember to 'escape' ``r"\1"`` otherwise Ruffus will complain and throw an ``Exception`` to remind you.
+ The most convenient way is to use a python "raw" string.
+
+ **Example of add_inputs(...) with regex(...)**
+
+ The suffix match (``suffix(...)``) is exactly equivalent to the following code using regular expression (``regex(...)``):
+ ::
+
+ @transform(["1.c", "2.c"], regex(r"^(.+)\.c$"), add_inputs([r"\1.h", "universal.h"]), r"\1.o")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ The ``suffix(..)`` code is much simpler but the regular expression allows more complex substitutions.
+
+ **add_inputs(...) preserves original inputs**
+
+ ``add_inputs`` nests the the original input parameters in a list before adding additional dependencies.
+
+ This can be seen in the following example:
+ ::
+
+ @transform([ ["1.c", "A.c", 2]
+ ["2.c", "B.c", "C.c", 3]],
+ suffix(".c"), add_inputs([r"\1.h", "universal.h"]), ".o")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile([["1.c", "A.c", 2], "1.h", "universal.h"], "1.o")
+ compile([["3.c", "B.c", "C.c", 3], "2.h", "universal.h"], "2.o")
+
+
+ The original parameters are retained unchanged as the first item in a list
+
+
+
+
+.. index::
+ pair: inputs; Indicator Object (Replacing input parameters)
+
+.. _decorators.inputs:
+
+***************************************
+*inputs*
+***************************************
+
+ **inputs(** ``input_file_pattern`` **)**
+
+ **Used by:**
+ * :ref:`@transform <decorators.transform_ex>`
+ * :ref:`@collate <decorators.transform_ex>`
+ * :ref:`@subdivide <decorators.subdivide>`
+
+ The enclosed single parameter is a pattern string or a nested structure which is
+ used to construct the input for each job.
+
+ If more than one argument is supplied to inputs, an exception will be raised.
+
+ Use a tuple or list (as in the following example) to send multiple input arguments to each job.
+
+ **Used by:**
+ * The advanced form of :ref:`@transform <decorators.transform_ex>`
+
+ **inputs(...) replaces original inputs**
+
+ ``inputs(...)`` allows the original input parameters to be replaced wholescale.
+
+ This can be seen in the following example:
+ ::
+
+ @transform([ ["1.c", "A.c", 2]
+ ["2.c", "B.c", "C.c", 3]],
+ suffix(".c"), inputs([r"\1.py", "docs.rst"]), ".pyc")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.py", "docs.rst"], "1.pyc")
+ compile(["2.py", "docs.rst"], "2.pyc")
+
+ In this example, the corresponding python files have been sneakily substituted
+ without trace in the place of the C source files.
+
+
+.. index::
+ single: @follows; mkdir (Syntax)
+ single: mkdir; @follows (Syntax)
+ single: Indicator Object (Disambiguating parameters); mkdir
+
+.. _decorators.indicator_objects.mkdir:
+
+
+******************************************************************************************
+*mkdir*
+******************************************************************************************
+
+ **mkdir(** ``directory_name1`` **, [** ``directory_name2`` **, ...] )**
+
+ The enclosed parameter is a directory name or a sequence of directory names.
+ These directories will be created as part of the prerequisites of running a task.
+
+ **Used by:**
+ * :ref:`@follows <decorators.follows>`
+
+ **Example:**
+ ::
+
+ @follows(mkdir("/output/directory"))
+ def task():
+ pass
+
+
+.. index::
+ single: @posttask; touch_file (Syntax)
+ single: touch_file; @posttask (Syntax)
+ single: Indicator Object (Disambiguating parameters); touch_file
+
+.. _decorators.touch_file:
+
+
+******************************************************************************************
+*touch_file*
+******************************************************************************************
+
+ **touch_file(** ``file_name`` **)**
+
+ The enclosed parameter is a file name. This file will be ``touch``\ -ed after a
+ task is executed.
+
+ This will change the date/time stamp of the ``file_name`` to the current date/time.
+ If the file does not exist, an empty file will be created.
+
+
+ **Used by:**
+ * :ref:`@posttask <decorators.posttask>`
+
+ **Example:**
+ ::
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def do_task(input_file, output_file):
+ pass
+
+
+.. index::
+ pair: output_from; Indicator Object (Disambiguating parameters)
+
+.. _decorators.output_from:
+
+******************************************************************************************
+*output_from*
+******************************************************************************************
+
+ **output_from (** ``file_name_string1`` **[,** ``file_name_string1`` **, ...] )**
+
+ Indicates that any enclosed strings are not file names but refer to task functions.
+
+ **Used by:**
+ * :ref:`@split <decorators.split>`
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@merge <decorators.merge>`
+ * :ref:`@collate <decorators.collate>`
+ * :ref:`@subdivide <decorators.subdivide>`
+ * :ref:`@product <decorators.product>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+ * :ref:`@files <decorators.files>`
+
+ **Example:**
+ ::
+
+ @split(["a.file", ("b.file", output_from("task1", 76, "task2"))], "*.split")
+ def task2(input, output):
+ pass
+
+
+ is equivalent to:
+
+ ::
+
+ @split(["a.file", ("b.file", (task1, 76, task2))], "*.split")
+ def task2(input, output):
+ pass
+
+
+
+
+.. index::
+ single: @files_re; combine (Deprecated Syntax)
+ single: combine; @follows (Deprecated Syntax)
+ single: Indicator Object (Disambiguating parameters); combine
+
+.. _decorators.combine:
+
+******************************************************************************************
+*combine*
+******************************************************************************************
+
+ **combine(** ``arguments`` **)**
+
+ .. warning::
+
+ This is deprecated syntax.
+
+ Please do not use!
+
+ :ref:`@merge <decorators.merge>` and :ref:`@collate <decorators.collate>` are more powerful
+ and have straightforward syntax.
+
+ Indicates that the *inputs* of :ref:`@files_re <decorators.files_re>` will be collated
+ or summarised into *outputs* by category. See the :ref:`Manual <new_manual.files_re.combine>` or
+ :ref:` @collate <new_manual.collate>` for examples.
+
+
+ **Used by:**
+ * :ref:`@files_re <new_manual.files_re.combine>`
+
+ **Example:**
+ ::
+
+ @files_re('*.animals', # inputs = all *.animal files
+ r'mammals.([^.]+)', # regular expression
+ combine(r'\1/animals.in_my_zoo'), # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
diff --git a/doc/_build/html/_sources/decorators/jobs_limit.txt b/doc/_build/html/_sources/decorators/jobs_limit.txt
new file mode 100644
index 0000000..1e0ee41
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/jobs_limit.txt
@@ -0,0 +1,73 @@
+.. include:: ../global.inc
+.. _decorators.jobs_limit:
+.. index::
+ pair: @jobs_limit; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at jobs_limit
+########################
+
+.. |maximum_num_of_jobs| replace:: `maximum_num_of_jobs`
+.. _maximum_num_of_jobs: `decorators.jobs_limit.maximum_num_of_jobs`_
+.. |name| replace:: `name`
+.. _name: `decorators.jobs_limit.name`_
+
+*****************************************************************************************************************************************
+*@jobs_limit* ( |maximum_num_of_jobs|_, [ |name|_ ])
+*****************************************************************************************************************************************
+ **Purpose:**
+ | Manages the resources available for a task.
+ | Limits the number of concurrent jobs which can be run in parallel for this task
+ | Overrides the value for ``multiprocess`` in :ref:`pipeline_run <pipeline_functions.pipeline_run>`
+ | If an optional ``name`` is given, the same limit is shared across all tasks with the same @job_limit name.
+
+
+ **Parameters:**
+
+.. _decorators.jobs_limit.maximum_num_of_jobs:
+
+
+ * *maximum_num_of_jobs*
+ The maximum number of concurrent jobs for this task. Must be an integer number
+ greater than or equal to 1.
+
+.. _decorators.jobs_limit.name:
+
+ * *name*
+ Optional name for the limit. All tasks with the same name share the same limit if they
+ are running concurrently.
+
+ **Example**
+ ::
+
+ from ruffus import *
+
+ # make list of 10 files
+ @split(None, "*.stage1")
+ def make_files(input_file, output_files):
+ for i in range(10):
+ open("%d.stage1" % i, "w")
+
+ @jobs_limit(2)
+ @transform(make_files, suffix(".stage1"), ".stage2")
+ def stage1(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(stage1, suffix(".stage2"), ".stage3")
+ def stage2(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([stage2], multiprocess = 5)
+
+ will run the 10 jobs of ``stage1`` 2 at a time, while `` stage2`` will
+ run 5 at a time (from ``multiprocess = 5``):
+
+ .. image:: ../images/jobs_limit.png
+
+
+
diff --git a/doc/_build/html/_sources/decorators/merge.txt b/doc/_build/html/_sources/decorators/merge.txt
new file mode 100644
index 0000000..bd85ee8
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/merge.txt
@@ -0,0 +1,64 @@
+.. include:: ../global.inc
+.. _decorators.merge:
+.. index::
+ pair: @merge; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.merge.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.merge.extra_parameters`_
+.. |output_file| replace:: `output_file`
+.. _output_file: `decorators.merge.output_file`_
+
+########################
+ at merge
+########################
+
+************************************************************************************
+*@merge* ( |tasks_or_file_names|_, |output_file|_, [|extra_parameters|_,...] )
+************************************************************************************
+ **Purpose:**
+ Merges multiple input files into a single output.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Example**::
+
+ @merge(previous_task, 'all.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ **Parameters:**
+
+
+.. _decorators.merge.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.merge.output_file:
+
+ * *output_file*
+ Specifies the resulting output file name(s).
+
+.. _decorators.merge.extra_parameters:
+
+ * *extra_parameters, ...*
+ Any optional extra parameters are passed verbatim to the task function
+
+
+
+See :ref:`here <decorators.collate>` for more advanced uses of merging.
+
+
diff --git a/doc/_build/html/_sources/decorators/mkdir.txt b/doc/_build/html/_sources/decorators/mkdir.txt
new file mode 100644
index 0000000..b516b1a
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/mkdir.txt
@@ -0,0 +1,220 @@
+.. include:: ../global.inc
+.. _decorators.mkdir:
+.. index::
+ pair: @mkdir; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+ * More on @mkdir in the ``Ruffus`` :ref:`Manual <new_manual.mkdir>`
+ * :ref:`@follows(mkdir("dir")) <decorators.follows>` specifies the creation of a *single* directory as a task pre-requisite.
+
+########################
+ at mkdir
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.mkdir.tasks_or_file_names`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.mkdir.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.mkdir.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.mkdir.matching_formatter`_
+.. |suffix_string| replace:: `suffix_string`
+.. _suffix_string: `decorators.mkdir.suffix_string`_
+
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@mkdir* ( |tasks_or_file_names|_, :ref:`suffix<decorators.suffix>`\ *(*\ |suffix_string|_\ *)*\ | :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_)
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ * Prepares directories to receive *Output* files
+ * Used when *Output* path names are generated at runtime from *Inputs*. **mkdir** can make sure these runtime specified paths exist.
+ * Directory names are generated from **Input** using string substitution via :ref:`formatter() <decorators.formatter>`, :ref:`suffix() <decorators.suffix>` or :ref:`regex() <decorators.regex>`.
+ * Behaves essentially like ``@transform`` but with its own (internal) function which does the actual work of making a directory
+ * Does *not* invoke the host task function to which it is attached
+ * Makes specified directories using `os.makedirs <http://docs.python.org/2/library/os.html#os.makedirs>`__
+ * Multiple directories can be created in a list
+
+ .. note::
+
+ Only missing directories are created.
+
+ In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+
+ Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+ directories are always created or available *before* the pipeline runs.
+
+ **Simple Example**
+
+ Creates multiple directories per job to hold the results of :ref:`@transform<decorators.transform>`
+
+ .. code-block:: python
+ :emphasize-lines: 10,20
+
+ from ruffus import *
+
+ # initial files
+ @originate([ 'A.start',
+ 'B.start'])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # create files without making directories -> ERROR
+ @transform( create_initial_files,
+ formatter(),
+ ["{path[0]}/{basename[0]}/processed.txt",
+ "{path[0]}/{basename[0]}.tmp/tmp.processed.txt"])
+ def create_files_without_mkdir(input_file, output_files):
+ open(output_files[0], "w")
+ open(output_files[1], "w")
+
+
+ # create files after making corresponding directories
+ @mkdir( create_initial_files,
+ formatter(),
+ ["{path[0]}/{basename[0]}", # create directory
+ "{path[0]}/{basename[0]}.tmp"]) # create directory.tmp
+ @transform( create_initial_files,
+ formatter(),
+ ["{path[0]}/{basename[0]}/processed.txt",
+ "{path[0]}/{basename[0]}.tmp/tmp.processed.txt"])
+ def create_files_with_mkdir(input_file, output_files):
+ open(output_files[0], "w")
+ open(output_files[1], "w")
+
+ pipeline_run([create_files_without_mkdir])
+ pipeline_run([create_files_with_mkdir])
+
+ Running without making the directories first gives errors:
+
+ .. code-block:: python
+ :emphasize-lines: 14-19
+
+ >>> pipeline_run([create_files_without_mkdir])
+ Job = [None -> A.start] completed
+ Job = [None -> B.start] completed
+ Completed Task = create_initial_files
+
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 3738, in pipeline_run
+ raise job_errors
+ ruffus.ruffus_exceptions.RethrownJobError:
+
+ Original exception:
+
+ >>> # Exception #1
+ >>> # 'exceptions.IOError([Errno 2] No such file or directory: 'A/processed.txt')' raised in ...
+ >>> # Task = def create_files_without_mkdir(...):
+ >>> # Job = [A.start -> [processed.txt, tmp.processed.txt]]
+
+
+ Running after making the directories first:
+
+ .. code-block:: python
+ :emphasize-lines: 15
+
+ >>> pipeline_run([create_files_with_mkdir])
+ Job = [None -> A.start] completed
+ Job = [None -> B.start] completed
+ Completed Task = create_initial_files
+ Make directories [A, A.tmp] completed
+ Make directories [B, B.tmp] completed
+ Completed Task = (mkdir 1) before create_files_with_mkdir
+ Job = [A.start -> [processed.txt, tmp.processed.txt]] completed
+ Job = [B.start -> [processed.txt, tmp.processed.txt]] completed
+ Completed Task = create_files_with_mkdir
+
+
+
+ **Escaping regular expression patterns**
+
+ A string like ``universal.h`` in ``add_inputs`` will added *as is*.
+ ``r"\1.h"``, however, performs suffix substitution, with the special form ``r"\1"`` matching everything up to the suffix.
+ Remember to 'escape' ``r"\1"`` otherwise Ruffus will complain and throw an Exception to remind you.
+ The most convenient way is to use a python "raw" string.
+
+ **Parameters:**
+
+.. _decorators.mkdir.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.mkdir.suffix_string:
+
+ * *suffix_string*
+ must be wrapped in a :ref:`suffix<decorators.suffix>` indicator object.
+ The end of each input file name which matches ``suffix_string`` will be replaced by ``output_pattern``.
+
+ Input file names which do not match suffix_string will be ignored
+
+
+ The non-suffix part of the match can be referred to using the ``"\1"`` pattern. This
+ can be useful for putting the output in different directory, for example::
+
+
+ @mkdir(["1.c", "2.c"], suffix(".c"), r"my_path/\1.o")
+ def compile(infile, outfile):
+ pass
+
+ This results in the following function calls:
+
+ ::
+
+ # 1.c -> my_path/1.o
+ # 2.c -> my_path/2.o
+ compile("1.c", "my_path/1.o")
+ compile("2.c", "my_path/2.o")
+
+ For convenience and visual clarity, the ``"\1"`` can be omitted from the output parameter.
+ However, the ``"\1"`` is mandatory for string substitutions in additional parameters, ::
+
+
+ @mkdir(["1.c", "2.c"], suffix(".c"), [r"\1.o", ".o"], "Compiling \1", "verbatim")
+ def compile(infile, outfile):
+ pass
+
+ Results in the following function calls:
+
+ ::
+
+ compile("1.c", ["1.o", "1.o"], "Compiling 1", "verbatim")
+ compile("2.c", ["2.o", "2.o"], "Compiling 2", "verbatim")
+
+ Since r"\1" is optional for the output parameter, ``"\1.o"`` and ``".o"`` are equivalent.
+ However, strings in other parameters which do not contain r"\1" will be included verbatim, much
+ like the string ``"verbatim"`` in the above example.
+
+
+
+
+.. _decorators.mkdir.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>`\ indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+ Each output file name is created using regular expression substitution with ``output_pattern``
+
+.. _decorators.mkdir.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.mkdir.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
diff --git a/doc/_build/html/_sources/decorators/originate.txt b/doc/_build/html/_sources/decorators/originate.txt
new file mode 100644
index 0000000..513a285
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/originate.txt
@@ -0,0 +1,79 @@
+.. include:: ../global.inc
+.. _decorators.originate:
+.. index::
+ pair: @originate; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at originate
+########################
+
+.. |output_files| replace:: `output_files`
+.. _output_files: `decorators.originate.output_files`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.originate.extra_parameters`_
+
+
+***********************************************************************************************************************************************************
+*@originate* ( |output_files|_, [|extra_parameters|_,...] )
+***********************************************************************************************************************************************************
+ **Purpose:**
+ * Creates (originates) a set of starting file without dependencies from scratch (*ex nihilo*!)
+ * Only called to create files which do not exist.
+ * Invoked onces (a job created) per item in the ``output_files`` list.
+
+ .. note::
+
+ The first argument for the task function is the *Output*. There is by definition no
+ *Input* for ``@originate``
+
+ **Example**:
+
+ .. code-block:: python
+
+ from ruffus import *
+ @originate(["a", "b", "c", "d"], "extra")
+ def test(output_file, extra):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ .. code-block:: pycon
+ :emphasize-lines: 8,11
+
+ >>> pipeline_run()
+ Job = [None -> a, extra] completed
+ Job = [None -> b, extra] completed
+ Job = [None -> c, extra] completed
+ Job = [None -> d, extra] completed
+ Completed Task = test
+
+ >>> # all files exist: nothing to do
+ >>> pipeline_run()
+
+ >>> # delete 'a' so that it is missing
+ >>> import os
+ >>> os.unlink("a")
+
+ >>> pipeline_run()
+ Job = [None -> a, extra] completed
+ Completed Task = test
+
+ **Parameters:**
+
+
+.. _decorators.originate.output_files:
+
+ * *output_files*
+ * Can be a single file name or a list of files
+ * Each item in the list is treated as the *Output* of a separate job
+
+
+.. _decorators.originate.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are passed verbatim to the task function
+
diff --git a/doc/_build/html/_sources/decorators/parallel.txt b/doc/_build/html/_sources/decorators/parallel.txt
new file mode 100644
index 0000000..f16cd48
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/parallel.txt
@@ -0,0 +1,81 @@
+.. include:: ../global.inc
+.. _decorators.parallel:
+.. index::
+ pair: @parallel; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at parallel
+########################
+
+.. |job_params| replace:: `job_params`
+.. _job_params: `decorators.parallel.job_params`_
+.. |parameter_generating_function| replace:: `parameter_generating_function`
+.. _parameter_generating_function: `decorators.parallel.parameter_generating_function`_
+
+
+*****************************************************************************************************************************************
+*@parallel* ( [ [|job_params|_, ...], [|job_params|_, ...]...] | |parameter_generating_function|_)
+*****************************************************************************************************************************************
+ **Purpose:**
+ To apply the (task) function to a set of parameters in parallel without file dependency checking.
+
+ Most useful allied to :ref:`@check_if_uptodate() <decorators.check_if_uptodate>`
+
+ **Example**::
+
+ from ruffus import *
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\\n" % (param1, param2, param1 + param2))
+
+ pipeline_run([parallel_task])
+
+ **Parameters:**
+
+
+.. _decorators.parallel.job_params:
+
+ * *job_params*:
+ Requires a sequence of parameters, one set for each job.
+
+ Each set of parameters can be one or more items in a sequence which will be passed to
+ the decorated task function iteratively (or in parallel)
+
+ For example::
+
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ pass
+
+ Will result in the following function calls::
+
+ parallel_task('A', 1, 2)
+ parallel_task('B', 3, 4)
+ parallel_task('C', 5, 6)
+
+
+
+.. _decorators.parallel.parameter_generating_function:
+
+ * *parameter_generating_function*
+ #. A generator yielding set of parameters (as above) in turn and on the fly
+ #. A function returning a sequence of parameter sets, as above
+
+
+
diff --git a/doc/_build/html/_sources/decorators/permutations.txt b/doc/_build/html/_sources/decorators/permutations.txt
new file mode 100644
index 0000000..1ef39a6
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/permutations.txt
@@ -0,0 +1,158 @@
+.. include:: ../global.inc
+.. _decorators.permutations:
+.. index::
+ pair: @permutations; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at permutations
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.permutations.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.permutations.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.permutations.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.permutations.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@permutations* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the **permutations**, between all the elements of a set of **Input**
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ function of the same name:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import permutations
+ >>> # permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+ >>> [ "".join(a) for a in permutations("ABCD", 2)]
+ ['AB', 'AC', 'AD', 'BA', 'BC', 'BD', 'CA', 'CB', 'CD', 'DA', 'DB', 'DC']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* in each tuple of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ **Example**:
+
+ Calculates the **@permutations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @permutations
+ @permutations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.permutations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ ])
+ def permutations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+
+ A - B
+ A - C
+ A - D
+ B - A
+ B - C
+ B - D
+ C - A
+ C - B
+ C - D
+ D - A
+ D - B
+ D - C
+
+
+ **Parameters:**
+
+
+.. _decorators.permutations.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.permutations.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.permutations.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.permutations.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/_build/html/_sources/decorators/posttask.txt b/doc/_build/html/_sources/decorators/posttask.txt
new file mode 100644
index 0000000..e0a6a2f
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/posttask.txt
@@ -0,0 +1,70 @@
+.. include:: ../global.inc
+.. _decorators.posttask:
+.. index::
+ pair: @posttask; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+.. |function| replace:: `function`
+.. _function: `decorators.posttask.function`_
+.. |file_name| replace:: `file_name`
+.. _file_name: `decorators.posttask.file_name`_
+
+########################
+ at posttask
+########################
+
+*****************************************************************************************************************************************
+*@posttask* (|function|_ | :ref:`touch_file<decorators.touch_file>`\ *(*\ |file_name|_\ *)*\)
+*****************************************************************************************************************************************
+ **Purpose:**
+ Calls functions to signal the completion of each task
+
+ **Example**::
+
+ from ruffus import *
+
+ def task_finished():
+ print "hooray"
+
+ @posttask(task_finished)
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+ **Parameters:**
+
+.. _decorators.posttask.function:
+
+ * *function*:
+ ``function()`` will be called when the ruffus passes through a task.
+
+ This may happen even if all of the jobs are up-to-date:
+ when a upstream task is out-of-date, and the execution passes through
+ this point in the pipeline
+
+.. _decorators.posttask.file_name:
+
+ * *file_name*
+ Files to be ``touch``\ -ed after the task is executed.
+
+ This will change the date/time stamp of the ``file_name`` to the current date/time.
+ If the file does not exist, an empty file will be created.
+
+ Requires to be wrapped in a :ref:`touch_file<decorators.touch_file>` indicator object::
+
+ from ruffus import *
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+
diff --git a/doc/_build/html/_sources/decorators/product.txt b/doc/_build/html/_sources/decorators/product.txt
new file mode 100644
index 0000000..5c77af4
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/product.txt
@@ -0,0 +1,192 @@
+.. include:: ../global.inc
+.. _decorators.product:
+.. index::
+ pair: @product; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at product
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.product.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.product.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.product.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.product.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@product* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, [|tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, ... ], |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the Cartesian **product**, i.e. all vs all comparisons, between sets of input files.
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.product>`__
+ function of the same name, i.e. a nested for loop.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import product
+ >>> # product('ABC', 'XYZ') --> AX AY AZ BX BY BZ CX CY CZ
+ >>> [ "".join(a) for a in product('ABC', 'XYZ')]
+ ['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* of inputs (e.g. **A,B** or **P,Q** or **X,Y**
+ in the following example.)
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ For example, ``'{basename[2][0]}'`` is the `basename <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ for
+ * the third set of inputs (**X,Y**) and
+ * the first file name string in each **Input** of that set (``"x.1_start"`` and ``"y.1_start"``)
+
+ **Example**:
+
+ Calculates the **@product** of **A,B** and **P,Q** and **X, Y** files
+
+ .. code-block:: python
+ :emphasize-lines: 4,17,19,22,25,27,28,29,30,32,34,35,36
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # Three sets of initial files
+ @originate([ 'a.start', 'b.start'])
+ def create_initial_files_ab(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ 'p.start', 'q.start'])
+ def create_initial_files_pq(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ ['x.1_start', 'x.2_start'],
+ ['y.1_start', 'y.2_start'] ])
+ def create_initial_files_xy(output_file):
+ with open(output_file, "w") as oo: pass
+
+ # @product
+ @product( create_initial_files_ab, # Input
+ formatter("(.start)$"), # match input file set # 1
+
+ create_initial_files_pq, # Input
+ formatter("(.start)$"), # match input file set # 2
+
+ create_initial_files_xy, # Input
+ formatter("(.start)$"), # match input file set # 3
+
+ "{path[0][0]}/" # Output Replacement string
+ "{basename[0][0]}_vs_" #
+ "{basename[1][0]}_vs_" #
+ "{basename[2][0]}.product", #
+
+ "{path[0][0]}", # Extra parameter: path for 1st set of files, 1st file name
+
+ ["{basename[0][0]}", # Extra parameter: basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "# basenames = ", " ".join(basenames)
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter, "\n"
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6,10,14,18,22,26,30
+
+ >>> pipeline_run(verbose=0)
+
+ # basenames = a p x
+ input_parameter = ('a.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_x.product
+
+ # basenames = a p y
+ input_parameter = ('a.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_y.product
+
+ # basenames = a q x
+ input_parameter = ('a.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_x.product
+
+ # basenames = a q y
+ input_parameter = ('a.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_y.product
+
+ # basenames = b p x
+ input_parameter = ('b.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_x.product
+
+ # basenames = b p y
+ input_parameter = ('b.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_y.product
+
+ # basenames = b q x
+ input_parameter = ('b.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_x.product
+
+ # basenames = b q y
+ input_parameter = ('b.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_y.product
+
+
+ **Parameters:**
+
+
+.. _decorators.product.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.product.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.product.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.product.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/_build/html/_sources/decorators/split.txt b/doc/_build/html/_sources/decorators/split.txt
new file mode 100644
index 0000000..e0a6f5f
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/split.txt
@@ -0,0 +1,92 @@
+.. include:: ../global.inc
+.. _decorators.split:
+.. index::
+ pair: @split; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at split
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.split.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.split.extra_parameters`_
+.. |output_files| replace:: `output_files`
+.. _output_files: `decorators.split.output_files`_
+
+*****************************************************************************************************************************************
+*@split* ( |tasks_or_file_names|_, |output_files|_, [|extra_parameters|_,...] )
+*****************************************************************************************************************************************
+ **Purpose:**
+ | Splits a single set of input files into multiple output file names, where the number of
+ output files may not be known beforehand.
+ | Only out of date tasks (comparing input and output files) will be run
+
+ **Example**::
+
+ @split("big_file", '*.little_files')
+ def split_big_to_small(input_file, output_files):
+ print "input_file = %s" % input_file
+ print "output_file = %s" % output_file
+
+ .
+
+ will produce::
+
+ input_file = big_file
+ output_file = *.little_files
+
+
+ **Parameters:**
+
+.. _decorators.split.tasks_or_file_names:
+
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. (Nested) list of file name strings (as in the example above).
+
+ | File names containing ``*[]?`` will be expanded as a |glob|_.
+ | E.g.:``"a.*" => "a.1", "a.2"``
+
+ #. Task / list of tasks.
+
+ File names are taken from the output of the specified task(s)
+
+
+.. _decorators.split.output_files:
+
+ * *output_files*
+ Specifies the resulting output file name(s).
+
+ | These are used **only** to check if the task is up to date.
+ | Normally you would use either a |glob|_ (e.g. ``*.little_files`` as above) or a "sentinel file"
+ to indicate that the task has completed successfully.
+ | You can of course do both:
+
+ ::
+
+ @split("big_file", ["sentinel.file", "*.little_files"])
+ def split_big_to_small(input_file, output_files):
+ pass
+
+
+.. _decorators.split.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed verbatim to the task function
+
+
+
+########################################################################
+ at split with ``regex(...)``, ``add_inputs`` and ``inputs``
+########################################################################
+
+ This deprecated syntax is a synonym for :ref:`@subdivide <decorators.subdivide>`.
+
diff --git a/doc/_build/html/_sources/decorators/subdivide.txt b/doc/_build/html/_sources/decorators/subdivide.txt
new file mode 100644
index 0000000..b8df277
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/subdivide.txt
@@ -0,0 +1,189 @@
+.. include:: ../global.inc
+.. _decorators.subdivide:
+.. index::
+ pair: @subdivide; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at subdivide
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.subdivide.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.subdivide.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.subdivide.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.subdivide.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.subdivide.matching_formatter`_
+.. |input_pattern_or_glob| replace:: `input_pattern_or_glob`
+.. _input_pattern_or_glob: `decorators.subdivide.input_pattern_or_glob`_
+
+
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@subdivide* ( |tasks_or_file_names|_, :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, [ :ref:`inputs<decorators.inputs>` *(*\ |input_pattern_or_glob|_\ *)* | :ref:`add_inputs<decorators.add_inputs>` *(*\ |input_pattern_or_glob|_\ *)* ], |output_pattern|_, [|extra_parameters|_,...] )
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ * Subdivides a set of *Inputs* each further into multiple *Outputs*.
+
+ * **Many to Even More** operator
+
+ * The number of files in each *Output* can be set at runtime by the use of globs
+
+ * Output file names are specified using the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators from |tasks_or_file_names|_, i.e. from the output
+ of specified tasks, or a list of file names, or a |glob|_ matching pattern.
+
+ * Additional inputs or dependencies can be added dynamically to the task:
+ :ref:`add_inputs<decorators.add_inputs>` nests the the original input parameters in a list before adding additional dependencies.
+
+ :ref:`inputs<decorators.inputs>` replaces the original input parameters wholescale.
+
+ * Only out of date tasks (comparing input and output files) will be run.
+
+ .. note::
+
+ The use of **split** is a synonym for subdivide is deprecated.
+
+
+ **Example**:
+
+ .. code-block:: python
+ :emphasize-lines: 12,13,20
+
+ from ruffus import *
+ from random import randint
+ from random import os
+
+ @originate(['0.start', '1.start', '2.start'])
+ def create_files(output_file):
+ with open(output_file, "w"):
+ pass
+
+
+ #
+ # Subdivide each of 3 start files further into [NNN1, NNN2, NNN3] number of files
+ # where NNN1, NNN2, NNN3 are determined at run time
+ #
+ @subdivide(create_files, formatter(),
+ "{path[0]}/{basename[0]}.*.step1", # Output parameter: Glob matches any number of output file names
+ "{path[0]}/{basename[0]}") # Extra parameter: Append to this for output file names
+ def subdivide_files(input_file, output_files, output_file_name_root):
+ #
+ # IMPORTANT: cleanup rubbish from previous run first
+ #
+ for oo in output_files:
+ os.unlink(oo)
+ # The number of output files is decided at run time
+ number_of_output_files = randint(2,4)
+ for ii in range(number_of_output_files):
+ output_file_name = "{output_file_name_root}.{ii}.step1".format(**locals())
+ with open(output_file_name, "w"):
+ pass
+
+
+ #
+ # Each output of subdivide_files results in a separate job for downstream tasks
+ #
+ @transform(subdivide_files, suffix(".step1"), ".step2")
+ def analyse_files(input_file, output_file_name):
+ with open(output_file_name, "w"):
+ pass
+
+ pipeline_run()
+
+ .. comment **
+
+ The Ruffus printout shows how each of the jobs in ``subdivide_files()`` spawns
+ multiple *Output* leading to more jobs in ``analyse_files()``
+
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [None -> 0.start] completed
+ Job = [None -> 1.start] completed
+ Job = [None -> 2.start] completed
+ Completed Task = create_files
+ Job = [0.start -> 0.*.step1, 0] completed
+ Job = [1.start -> 1.*.step1, 1] completed
+ Job = [2.start -> 2.*.step1, 2] completed
+ Completed Task = subdivide_files
+ Job = [0.0.step1 -> 0.0.step2] completed
+ Job = [0.1.step1 -> 0.1.step2] completed
+ Job = [0.2.step1 -> 0.2.step2] completed
+ Job = [1.0.step1 -> 1.0.step2] completed
+ Job = [1.1.step1 -> 1.1.step2] completed
+ Job = [1.2.step1 -> 1.2.step2] completed
+ Job = [1.3.step1 -> 1.3.step2] completed
+ Job = [2.0.step1 -> 2.0.step2] completed
+ Job = [2.1.step1 -> 2.1.step2] completed
+ Job = [2.2.step1 -> 2.2.step2] completed
+ Job = [2.3.step1 -> 2.3.step2] completed
+ Completed Task = analyse_files
+
+
+
+
+ **Parameters:**
+
+
+.. _decorators.subdivide.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.subdivide.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+.. _decorators.subdivide.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.subdivide.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s). Can include glob patterns.
+ Strings are subject to :ref:`regex<decorators.regex>` or :ref:`formatter<decorators.formatter>`
+ substitution.
+
+.. _decorators.subdivide.input_pattern_or_glob:
+
+ * *input_pattern*
+ Specifies the resulting input(s) to each job.
+ Must be wrapped in an :ref:`inputs<decorators.inputs>` or an :ref:`inputs<decorators.add_inputs>` indicator object.
+
+ Can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+
+ Strings are subject to :ref:`regex<decorators.regex>` or :ref:`formatter<decorators.formatter>` substitution.
+
+
+.. _decorators.subdivide.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are consumed by the task function and not forwarded further down the pipeline.
+ Strings are subject to :ref:`regex<decorators.regex>` or :ref:`formatter<decorators.formatter>`
+ substitution.
diff --git a/doc/_build/html/_sources/decorators/transform.txt b/doc/_build/html/_sources/decorators/transform.txt
new file mode 100644
index 0000000..494674e
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/transform.txt
@@ -0,0 +1,176 @@
+.. include:: ../global.inc
+.. _decorators.transform:
+.. index::
+ pair: @transform; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at transform
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.transform.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.transform.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.transform.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.transform.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.transform.matching_formatter`_
+.. |suffix_string| replace:: `suffix_string`
+.. _suffix_string: `decorators.transform.suffix_string`_
+
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@transform* ( |tasks_or_file_names|_, :ref:`suffix<decorators.suffix>`\ *(*\ |suffix_string|_\ *)*\ | :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+ Applies the task function to transform data from input to output files.
+
+ Output file names are specified from |tasks_or_file_names|_, i.e. from the output
+ of specified tasks, or a list of file names, or a |glob|_ matching pattern.
+
+ String replacement occurs either through suffix matches via :ref:`suffix<decorators.suffix>` or
+ the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Simple Example**
+
+ Transforms ``*.c`` to ``*.o``::
+
+ @transform(["1.c", "2.c"], suffix(".c"), ".o")
+ def compile(infile, outfile):
+ pass
+
+ Same example with a regular expression::
+
+ @transform(["1.c", "2.c"], regex(r".c$"), ".o")
+ def compile(infile, outfile):
+ pass
+
+ Both result in the following function calls:
+
+ ::
+
+ # 1.c -> 1.o
+ # 2.c -> 2.o
+ compile("1.c", "1.o")
+ compile("2.c", "2.o")
+
+
+ **Escaping regular expression patterns**
+
+ A string like ``universal.h`` in ``add_inputs`` will added *as is*.
+ ``r"\1.h"``, however, performs suffix substitution, with the special form ``r"\1"`` matching everything up to the suffix.
+ Remember to 'escape' ``r"\1"`` otherwise Ruffus will complain and throw an Exception to remind you.
+ The most convenient way is to use a python "raw" string.
+
+ **Parameters:**
+
+.. _decorators.transform.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.transform.suffix_string:
+
+ * *suffix_string*
+ must be wrapped in a :ref:`suffix<decorators.suffix>` indicator object.
+ The end of each input file name which matches ``suffix_string`` will be replaced by ``output_pattern``.
+
+ Input file names which do not match suffix_string will be ignored
+
+
+ The non-suffix part of the match can be referred to using the ``"\1"`` pattern. This
+ can be useful for putting the output in different directory, for example::
+
+
+ @transform(["1.c", "2.c"], suffix(".c"), r"my_path/\1.o")
+ def compile(infile, outfile):
+ pass
+
+ This results in the following function calls:
+
+ ::
+
+ # 1.c -> my_path/1.o
+ # 2.c -> my_path/2.o
+ compile("1.c", "my_path/1.o")
+ compile("2.c", "my_path/2.o")
+
+ For convenience and visual clarity, the ``"\1"`` can be omitted from the output parameter.
+ However, the ``"\1"`` is mandatory for string substitutions in additional parameters, ::
+
+
+ @transform(["1.c", "2.c"], suffix(".c"), [r"\1.o", ".o"], "Compiling \1", "verbatim")
+ def compile(infile, outfile):
+ pass
+
+ Results in the following function calls:
+
+ ::
+
+ compile("1.c", ["1.o", "1.o"], "Compiling 1", "verbatim")
+ compile("2.c", ["2.o", "2.o"], "Compiling 2", "verbatim")
+
+ Since r"\1" is optional for the output parameter, ``"\1.o"`` and ``".o"`` are equivalent.
+ However, strings in other parameters which do not contain r"\1" will be included verbatim, much
+ like the string ``"verbatim"`` in the above example.
+
+
+
+
+.. _decorators.transform.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>`\ indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+ Each output file name is created using regular expression substitution with ``output_pattern``
+
+.. _decorators.transform.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.transform.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.transform.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed to the task function.
+
+ If ``regex(matching_regex)`` or ``formatter(...)``` is used, then substitution
+ is first applied to (even nested) string parameters. Other data types are passed
+ verbatim.
+
+ For example::
+
+ @transform(["a.c", "b.c"], regex(r"(.*).c"), r"\1.o", r"\1")
+ def compile(infile, outfile):
+ pass
+
+ will result in the following function calls::
+
+ compile("a.c", "a.o", "a")
+ compile("b.c", "b.o", "b")
+
+
+
+
+See :ref:`here <decorators.transform_ex>` for more advanced uses of transform.
diff --git a/doc/_build/html/_sources/decorators/transform_ex.txt b/doc/_build/html/_sources/decorators/transform_ex.txt
new file mode 100644
index 0000000..2436696
--- /dev/null
+++ b/doc/_build/html/_sources/decorators/transform_ex.txt
@@ -0,0 +1,190 @@
+.. include:: ../global.inc
+.. _decorators.transform_ex:
+.. index::
+ pair: @transform, inputs(...); Syntax
+ pair: @transform, add_inputs(...); Syntax
+
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+####################################################
+ at transform with ``add_inputs`` and ``inputs``
+####################################################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.transform.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.transform.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.transform.output_pattern`_
+.. |input_pattern_or_glob| replace:: `input_pattern_or_glob`
+.. _input_pattern_or_glob: `decorators.transform.input_pattern_or_glob`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.transform.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.transform.matching_formatter`_
+.. |suffix_string| replace:: `suffix_string`
+.. _suffix_string: `decorators.transform.suffix_string`_
+
+
+
+
+
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+*@transform* ( |tasks_or_file_names|_, :ref:`suffix<decorators.suffix>`\ *(*\ |suffix_string|_\ *)*\ | :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, :ref:`inputs<decorators.inputs>` | :ref:`add_inputs<decorators.add_inputs>`\ *(*\ |input_pattern_or_glob|_\ *)*\ , |output_pattern|_, [|extra_parameters|_,...] )
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+ **Purpose:**
+ This variant of ``@transform`` allows additional inputs or dependencies to be added
+ dynamically to the task.
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names.
+
+ This variant of ``@transform`` allows input file names to be derived in the same way.
+
+ String replacement occurs either through suffix matches via :ref:`suffix<decorators.suffix>` or
+ the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators.
+
+ ``@collate`` groups together all **Input** which result in identical **Output** and **extra**
+ parameters.
+
+ It is a **many to fewer** operation.
+
+ :ref:`add_inputs<decorators.add_inputs>` nests the the original input parameters in a list before adding additional dependencies.
+
+ :ref:`inputs<decorators.inputs>` replaces the original input parameters wholescale.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Example of** :ref:`add_inputs<decorators.add_inputs>`
+
+ A common task in compiling C code is to include the corresponding header file for the source.
+
+ To compile ``*.c`` to ``*.o``, adding ``*.h`` and the common header ``universal.h``:
+ ::
+
+ @transform(["1.c", "2.c"], suffix(".c"), add_inputs([r"\1.h", "universal.h"]), ".o")
+ def compile(infile, outfile):
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.c", "1.h", "universal.h"], "1.o")
+ compile(["2.c", "2.h", "universal.h"], "2.o")
+
+ **Example of** :ref:`inputs<decorators.inputs>`
+
+ ``inputs(...)`` allows the original input parameters to be replaced wholescale.
+
+ This can be seen in the following example:
+ ::
+
+ @transform([ ["1.c", "A.c", 2]
+ ["2.c", "B.c", "C.c", 3]],
+ suffix(".c"), inputs([r"\1.py", "docs.rst"]), ".pyc")
+ def compile(infile, outfile):
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.py", "docs.rst"], "1.pyc")
+ compile(["2.py", "docs.rst"], "2.pyc")
+
+
+
+ **Parameters:**
+
+.. _decorators.transform.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.transform.suffix_string:
+
+ * *suffix_string*
+ must be wrapped in a :ref:`suffix<decorators.suffix>` indicator object.
+ The end of each file name which matches suffix_string will be replaced by `output_pattern`.
+ Thus::
+
+ @transform(["a.c", "b.c"], suffix(".c"), ".o")
+ def compile(infile, outfile):
+ pass
+
+ will result in the following function calls::
+
+ compile("a.c", "a.o")
+ compile("b.c", "b.o")
+
+ File names which do not match suffix_string will be ignored
+
+.. _decorators.transform.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+ Each output file name is created using regular expression substitution with ``output_pattern``
+
+.. _decorators.transform.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.transform.input_pattern_or_glob:
+
+ * *input_pattern*
+ Specifies the resulting input(s) to each job.
+ Must be wrapped in an :ref:`inputs<decorators.inputs>` or an :ref:`inputs<decorators.add_inputs>` indicator object.
+
+ Can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ Strings will be subject to substitution.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+
+.. _decorators.transform.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.transform.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed to the task function.
+
+ If the ``regex(...)`` or ``formatter(...)`` parameter is used, then substitution
+ is first applied to (even nested) string parameters. Other data types are passed
+ verbatim.
+
+ For example::
+
+ @transform(["a.c", "b.c"], regex(r"(.*).c"), inputs(r"\1.c", r"\1.h", "universal.h"), r"\1.o", r"\1")
+ def compile(infiles, outfile, file_name_root):
+ # do something here
+ pass
+
+ will result in the following function calls::
+
+ compile(["1.c", "1.h", "universal.h"], "1.o", "1")
+ compile(["2.c", "2.h", "universal.h"], "2.o", "2")
+
+
+See :ref:`here <decorators.transform>` for more straightforward ways to use transform.
diff --git a/doc/_build/html/_sources/design.txt b/doc/_build/html/_sources/design.txt
new file mode 100644
index 0000000..2dd0b12
--- /dev/null
+++ b/doc/_build/html/_sources/design.txt
@@ -0,0 +1,304 @@
+.. Design:
+
+.. include:: global.inc
+
+.. index::
+ pair: Design; Ruffus
+
+###############################
+Design & Architecture
+###############################
+
+ The *ruffus* module has the following design goals:
+
+ * Simplicity.
+ * Intuitive
+ * Lightweight
+ * Unintrusive
+ * Flexible/Powerful
+
+
+ Computational pipelines, especially in science, are best thought of in terms of data
+ flowing through successive, dependent stages (**ruffus** calls these :term:`task`\ s).
+ Traditionally, files have been used to
+ link pipelined stages together. This means that computational pipelines can be managed
+ using traditional software construction (`build`) systems.
+
+=================================================
+`GNU Make`
+=================================================
+ The grand-daddy of these is UNIX `make <http://en.wikipedia.org/wiki/Make_(software)>`_.
+ `GNU make <http://www.gnu.org/software/make/>`_ is ubiquitous in the linux world for
+ installing and compiling software.
+ It has been widely used to build computational pipelines because it supports:
+
+ * Stopping and restarting computational processes
+ * Running multiple, even thousands of jobs in parallel
+
+.. _design.make_syntax_ugly:
+
+******************************************************
+Deficiencies of `make` / `gmake`
+******************************************************
+
+ However, make and `GNU make <http://www.gnu.org/software/make/>`_ use a specialised (domain-specific)
+ language, which has is been much criticised because of poor support for modern
+ programming languages features, such as variable scope, pattern matching, debugging.
+ Make scripts require large amounts of often obscure shell scripting
+ and makefiles can quickly become unmaintainable.
+
+.. _design.scons_and_rake:
+
+=================================================
+`Scons`, `Rake` and other `Make` alternatives
+=================================================
+
+ Many attempts have been made to produce a more modern version of make, with less of its
+ historical baggage. These include the Java-based `Apache ant <http://ant.apache.org/>`_ which is specified in xml.
+
+ More interesting are a new breed of build systems whose scripts are written in modern programming
+ languages, rather than a specially-invented "build" specificiation syntax.
+ These include the Python `scons <http://www.scons.org/>`_, Ruby `rake <http://rake.rubyforge.org/>`_ and
+ its python port `Smithy <http://packages.python.org/Smithy/>`_.
+
+ The great advantages are that computation pipelines do not need to be artificially parcelled out
+ between (the often second-class) workflow management code, and the logic which does the real computation
+ in the pipeline. It also means that workflow management can use all the standard language and library
+ features, for example, to read in directories, match file names using regular expressions and so on.
+
+ **Ruffus** is much like scons in that the modern dynamic programming language python is used seamlessly
+ throughout its pipeline scripts.
+
+.. _design.implicit_dependencies:
+
+**************************************************************************
+Implicit dependencies: disadvantages of `make` / `scons` / `rake`
+**************************************************************************
+
+ Although Python `scons <http://www.scons.org/>`_ and Ruby `rake <http://rake.rubyforge.org/>`_
+ are in many ways more powerful and easier to use for building software, they are still an
+ imperfect fit to the world of computational pipelines.
+
+ This is a result of the way dependencies are specified, an essential part of their design inherited
+ from `GNU make <http://www.gnu.org/software/make/>`_.
+
+ The order of operations in all of these tools is specified in a *declarative* rather than
+ *imperative* manner. This means that the sequence of steps that a build should take are
+ not spelled out explicity and directly. Instead recipes are provided for turning input files
+ of each type to another.
+
+ So, for example, knowing that ``a->b``, ``b->c``, ``c->d``, the build
+ system can infer how to get from ``a`` to ``d`` by performing the necessary operations in the correct order.
+
+ This is immensely powerful for three reasons:
+ #) The plumbing, such as dependency checking, passing output
+ from one stage to another, are handled automatically by the build system. (This is the whole point!)
+ #) The same *recipe* can be re-used at different points in the build.
+ #) | Intermediate files do not need to be retained.
+ | Given the automatic inference that ``a->b->c->d``,
+ we don't need to keep ``b`` and ``c`` files around once ``d`` has been produced.
+ |
+
+
+ The disadvantage is that because stages are specified only indirectly, in terms of
+ file name matches, the flow through a complex build or a pipeline can be difficult to trace, and nigh
+ impossible to debug when there are problems.
+
+
+.. _design.explicit_dependencies_in_ruffus:
+
+**************************************************************************
+Explicit dependencies in `Ruffus`
+**************************************************************************
+
+ **Ruffus** takes a different approach. The order of operations is specified explicitly rather than inferred
+ indirectly from the input and output types. So, for example, we would explicitly specify three successive and
+ linked operations ``a->b``, ``b->c``, ``c->d``. The build system knows that the operations always proceed in
+ this order.
+
+ Looking at a **Ruffus** script, it is always clear immediately what is the succession of computational steps
+ which will be taken.
+
+ **Ruffus** values clarity over syntactic cleverness.
+
+.. _design.static_dependencies:
+
+**************************************************************************
+Static dependencies: What `make` / `scons` / `rake` can't do (easily)
+**************************************************************************
+
+ `GNU make <http://www.gnu.org/software/make/>`_, `scons <http://www.scons.org/>`_ and `rake <http://rake.rubyforge.org/>`_
+ work by infer a static dependency (diacyclic) graph between all the files which
+ are used by a computational pipeline. These tools locate the target that they are supposed
+ to build and work backward through the dependency graph from that target,
+ rebuilding anything that is out of date.This is perfect for building software,
+ where the list of files data files can be computed **statically** at the beginning of the build.
+
+ This is not ideal matches for scientific computational pipelines because:
+
+ * | Though the *stages* of a pipeline (i.e. `compile` or `DNA alignment`) are
+ invariably well-specified in advance, the number of
+ operations (*job*\s) involved at each stage may not be.
+ |
+
+ * | A common approach is to break up large data sets into manageable chunks which
+ can be operated on in parallel in computational clusters or farms
+ (See `embarassingly parallel problems <http://en.wikipedia.org/wiki/Embarrassingly_parallel>`_).
+ | This means that the number of parallel operations or jobs varies with the data (the number of manageable chunks),
+ and dependency trees cannot be calculated statically beforehand.
+ |
+
+ Computational pipelines require **dynamic** dependencies which are not calculated up-front, but
+ at each stage of the pipeline
+
+ This is a *known* issue with traditional build systems each of which has partial strategies to work around
+ this problem:
+
+ * gmake always builds the dependencies when first invoked, so dynamic dependencies require (complex!) recursive calls to gmake
+ * `Rake dependencies unknown prior to running tasks <http://objectmix.com/ruby/759716-rake-dependencies-unknown-prior-running-tasks-2.html>`_.
+ * `Scons: Using a Source Generator to Add Targets Dynamically <http://www.scons.org/wiki/DynamicSourceGenerator>`_
+
+
+ **Ruffus** explicitly and straightforwardly handles tasks which produce an indeterminate (i.e. runtime dependent)
+ number of output, using its **@split**, **@transform**, **merge** function annotations.
+
+=============================================================================
+Managing pipelines stage-by-stage using **Ruffus**
+=============================================================================
+ **Ruffus** manages pipeline stages directly.
+
+ #) | The computational operations for each stage of the pipeline are written by you, in
+ separate python functions.
+ | (These correspond to `gmake pattern rules <http://www.gnu.org/software/make/manual/make.html#Pattern-Rules>`_)
+ |
+
+ #) | The dependencies between pipeline stages (python functions) are specified up-front.
+ | These can be displayed as a flow chart.
+
+ .. image:: images/front_page_flowchart.png
+
+ #) **Ruffus** makes sure pipeline stage functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if necessary.
+
+ #) Data file timestamps can be used to automatically determine if all or any parts
+ of the pipeline are out-of-date and need to be rerun.
+
+ #) Separate pipeline stages, and operations within each pipeline stage,
+ can be run in parallel provided they are not inter-dependent.
+
+ Another way of looking at this is that **ruffus** re-constructs datafile dependencies dynamically
+ on-the-fly when it gets to each stage of the pipeline, giving much more flexibility.
+
+**************************************************************************
+Disadvantages of the Ruffus design
+**************************************************************************
+ Are there any disadvantages to this trade-off for additional clarity?
+
+ #) Each pipeline stage needs to take the right input and output. For example if we specified the
+ steps in the wrong order: ``a->b``, ``c->d``, ``b->c``, then no useful output would be produced.
+ #) We cannot re-use the same recipes in different parts of the pipeline
+ #) Intermediate files need to be retained.
+
+
+ In our experience, it is always obvious when pipeline operations are in the wrong order, precisely because the
+ order of computation is the very essense of the design of each pipeline. Ruffus produces extra diagnostics when
+ no output is created in a pipeline stage (usually happens for incorrectly specified regular expressions.)
+
+ Re-use of recipes is as simple as an extra call to common function code.
+
+ Finally, some users have proposed future enhancements to **Ruffus** to handle unnecessary temporary / intermediate files.
+
+
+.. index::
+ pair: Design; Comparison of Ruffus with alternatives
+
+=================================================
+Alternatives to **Ruffus**
+=================================================
+
+ A comparison of more make-like tools is available from `Ian Holmes' group <http://biowiki.org/MakeComparison>`_.
+
+ Build systems include:
+
+ * `GNU make <http://www.gnu.org/software/make/>`_
+ * `scons <http://www.scons.org/>`_
+ * `ant <http://ant.apache.org/>`_
+ * `rake <http://rake.rubyforge.org/>`_
+
+ There are also complete workload managements systems such as Condor.
+ Various bioinformatics pipelines are also available, including that used by the
+ leading genome annotation website Ensembl, Pegasys, GPIPE, Taverna, Wildfire, MOWserv,
+ Triana, Cyrille2 etc. These all are either hardwired to specific databases, and tasks,
+ or have steep learning curves for both the scientist/developer and the IT system
+ administrators.
+
+ **Ruffus** is designed to be lightweight and unintrusive enough to use for writing pipelines
+ with just 10 lines of code.
+
+
+.. seealso::
+
+
+ **Bioinformatics workload managements systems**
+
+ Condor:
+ http://www.cs.wisc.edu/condor/description.html
+
+ Ensembl Analysis pipeline:
+ http://www.ncbi.nlm.nih.gov/pubmed/15123589
+
+
+ Pegasys:
+ http://www.ncbi.nlm.nih.gov/pubmed/15096276
+
+ GPIPE:
+ http://www.biomedcentral.com/pubmed/15096276
+
+ Taverna:
+ http://www.ncbi.nlm.nih.gov/pubmed/15201187
+
+ Wildfire:
+ http://www.biomedcentral.com/pubmed/15788106
+
+ MOWserv:
+ http://www.biomedcentral.com/pubmed/16257987
+
+ Triana:
+ http://dx.doi.org/10.1007/s10723-005-9007-3
+
+ Cyrille2:
+ http://www.biomedcentral.com/1471-2105/9/96
+
+
+.. index::
+ single: Acknowledgements
+
+**************************************************
+Acknowledgements
+**************************************************
+ * Bruce Eckel's insightful article on
+ `A Decorator Based Build System <http://www.artima.com/weblogs/viewpost.jsp?thread=241209>`_
+ was the obvious inspiration for the use of decorators in *Ruffus*.
+
+ The rest of the *Ruffus* takes uses a different approach. In particular:
+ #. *Ruffus* uses task-based not file-based dependencies
+ #. *Ruffus* tries to have minimal impact on the functions it decorates.
+
+ Bruce Eckel's design wraps functions in "rule" objects.
+
+ *Ruffus* tasks are added as attributes of the functions which can be still be
+ called normally. This is how *Ruffus* decorators can be layered in any order
+ onto the same task.
+
+ * Languages like c++ and Java would probably use a "mixin" approach.
+ Python's easy support for reflection and function references,
+ as well as the necessity of marshalling over process boundaries, dictated the
+ internal architecture of *Ruffus*.
+ * The `Boost Graph library <http://www.boost.org>`_ for text book implementations of directed
+ graph traversals.
+ * `Graphviz <http://www.graphviz.org/>`_. Just works. Wonderful.
+ * Andreas Heger, Christoffer Nellåker and Grant Belgard for driving Ruffus towards
+ ever simpler syntax.
+
+
+
diff --git a/doc/_build/html/_sources/drmaa_wrapper_functions.txt b/doc/_build/html/_sources/drmaa_wrapper_functions.txt
new file mode 100644
index 0000000..5ad9ddc
--- /dev/null
+++ b/doc/_build/html/_sources/drmaa_wrapper_functions.txt
@@ -0,0 +1,234 @@
+.. include:: global.inc
+.. _drmaa_functions:
+
+.. comments: function name
+
+.. |run_job| replace:: `drmaa_wrapper.run_job`
+.. _run_job: `drmaa_wrapper.run_job`_
+
+.. comments: parameters
+
+.. |dw_cmd_str| replace:: `cmd_str`
+.. _dw_cmd_str: `drmaa_wrapper.run_job.cmd_str`_
+
+.. |dw_job_script_directory| replace:: `job_script_directory`
+.. _dw_job_script_directory: `drmaa_wrapper.run_job.job_script_directory`_
+.. |dw_job_environment| replace:: `job_environment`
+.. _dw_job_environment: `drmaa_wrapper.run_job.job_environment`_
+.. |dw_working_directory| replace:: `working_directory`
+.. _dw_working_directory: `drmaa_wrapper.run_job.working_directory`_
+.. |dw_retain_job_scripts| replace:: `retain_job_scripts`
+.. _dw_retain_job_scripts: `drmaa_wrapper.run_job.retain_job_scripts`_
+.. |dw_job_name| replace:: `job_name`
+.. _dw_job_name: `drmaa_wrapper.run_job.job_name`_
+.. |dw_job_other_options| replace:: `job_other_options`
+.. _dw_job_other_options: `drmaa_wrapper.run_job.job_other_options`_
+.. |dw_logger| replace:: `logger`
+.. _dw_logger: `drmaa_wrapper.run_job.logger`_
+.. |dw_drmaa_session| replace:: `drmaa_session`
+.. _dw_drmaa_session: `drmaa_wrapper.run_job.drmaa_session`_
+.. |dw_run_locally| replace:: `run_locally`
+.. _dw_run_locally: `drmaa_wrapper.run_job.run_locally`_
+.. |dw_output_files| replace:: `output_files`
+.. _dw_output_files: `drmaa_wrapper.run_job.output_files`_
+.. |dw_touch_only| replace:: `touch_only`
+.. _dw_touch_only: `drmaa_wrapper.run_job.touch_only`_
+
+
+################################################
+drmaa functions
+################################################
+
+ ``drmaa_wrapper`` is not exported automatically by ruffus and must be specified explicitly:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+
+ # imported ruffus.drmaa_wrapper explicitly
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+
+.. _drmaa_wrapper.run_job:
+
+.. index::
+ single: drmaa ; run_job
+ pair: run_job; Run drmaa
+
+
+************************************************************************************************************************************************************************************************************************************************************************************
+*run_job*
+************************************************************************************************************************************************************************************************************************************************************************************
+**run_job** (|dw_cmd_str|_, |dw_job_name|_ = None, |dw_job_other_options|_ = None, |dw_job_script_directory|_ = None, |dw_job_environment|_ = None, |dw_working_directory|_ = None, |dw_logger|_ = None, |dw_drmaa_session|_ = None, |dw_retain_job_scripts|_ = False, |dw_run_locally|_ = False, |dw_output_files|_ = None, |dw_touch_only|_ = False)
+
+ **Purpose:**
+
+ ``ruffus.drmaa_wrapper.run_job`` dispatches a command with arguments to a cluster or Grid Engine node and waits for the command to complete.
+
+ It is the semantic equivalent of calling `os.system <http://docs.python.org/2/library/os.html#os.system>`__ or
+ `subprocess.check_output <http://docs.python.org/2/library/subprocess.html#subprocess.check_call>`__.
+
+ **Example**:
+
+ .. code-block:: python
+
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+ import drmaa
+ my_drmaa_session = drmaa.Session()
+ my_drmaa_session.initialize()
+
+ run_job("ls",
+ job_name = "test",
+ job_other_options="-P mott-flint.prja -q short.qa",
+ job_script_directory = "test_dir",
+ job_environment={ 'BASH_ENV' : '~/.bashrc' },
+ retain_job_scripts = True, drmaa_session=my_drmaa_session)
+ run_job("ls",
+ job_name = "test",
+ job_other_options="-P mott-flint.prja -q short.qa",
+ job_script_directory = "test_dir",
+ job_environment={ 'BASH_ENV' : '~/.bashrc' },
+ retain_job_scripts = True,
+ drmaa_session=my_drmaa_session,
+ working_directory = "/gpfs1/well/mott-flint/lg/src/oss/ruffus/doc")
+
+ #
+ # catch exceptions
+ #
+ try:
+ stdout_res, stderr_res = run_job(cmd,
+ job_name = job_name,
+ logger = logger,
+ drmaa_session = drmaa_session,
+ run_locally = options.local_run,
+ job_other_options = get_queue_name())
+
+ # relay all the stdout, stderr, drmaa output to diagnose failures
+ except error_drmaa_job as err:
+ raise Exception("\n".join(map(str,
+ ["Failed to run:",
+ cmd,
+ err,
+ stdout_res,
+ stderr_res])))
+
+ my_drmaa_session.exit()
+
+
+
+ **Parameters:**
+
+.. _drmaa_wrapper.run_job.cmd_str:
+
+ * *cmd_str*
+
+ The command which will be run remotely including all parameters
+
+.. _drmaa_wrapper.run_job.job_name:
+
+ * *job_name*
+
+ A descriptive name for the command. This will be displayed by `SGE qstat <http://gridscheduler.sourceforge.net/htmlman/htmlman1/qstat.html>`__, for example.
+ Defaults to "ruffus_job"
+
+.. _drmaa_wrapper.run_job.job_other_options:
+
+ * *job_other_options*
+
+ Other drmaa parameters can be passed verbatim as a string.
+
+ Examples for SGE include project name (``-P project_name``), parallel environment (``-pe parallel_environ``), account (``-A account_string``), resource (``-l resource=expression``),
+ queue name (``-q a_queue_name``), queue priority (``-p 15``).
+
+ These are parameters which you normally need to include when submitting jobs interactively, for example via
+ `SGE qsub <http://gridscheduler.sourceforge.net/htmlman/htmlman1/qsub.html>`__
+ or `SLURM <http://apps.man.poznan.pl/trac/slurm-drmaa/wiki/WikiStart#Nativespecification>`__ (`srun <https://computing.llnl.gov/linux/slurm/srun.html>`__)
+
+.. _drmaa_wrapper.run_job.job_script_directory:
+
+ * *job_script_directory*
+
+ The directory where drmaa temporary script files will be found. Defaults to the current working directory.
+
+
+.. _drmaa_wrapper.run_job.job_environment:
+
+ * *job_environment*
+
+ A dictionary of key / values with environment variables. E.g. ``"{'BASH_ENV': '~/.bashrc'}"``
+
+
+.. _drmaa_wrapper.run_job.working_directory:
+
+ * *working_directory*
+
+ * Sets the working directory.
+ * Should be a fully qualified path.
+ * Defaults to the current working directory.
+
+
+.. _drmaa_wrapper.run_job.retain_job_scripts:
+
+ * *retain_job_scripts*
+
+ Do not delete temporary script files containg drmaa commands. Useful for
+ debugging, running on the command line directly, and can provide a useful record of the commands.
+
+.. _drmaa_wrapper.run_job.logger:
+
+ * *logger*
+
+ For logging messages indicating the progress of the pipeline in terms of tasks and jobs. Takes objects with the standard python
+ `logging <https://docs.python.org/2/library/logging.html>`__ module interface.
+
+.. _drmaa_wrapper.run_job.drmaa_session:
+
+ * *drmaa_session*
+
+ A shared drmaa session created and managed separately.
+
+ In the main part of your **Ruffus** pipeline script somewhere there should be code looking like this:
+
+ .. code-block:: python
+
+ #
+ # start shared drmaa session for all jobs / tasks in pipeline
+ #
+ import drmaa
+ drmaa_session = drmaa.Session()
+ drmaa_session.initialize()
+
+
+ #
+ # pipeline functions
+ #
+
+ if __name__ == '__main__':
+ cmdline.run (options, multithread = options.jobs)
+ drmaa_session.exit()
+
+.. _drmaa_wrapper.run_job.run_locally:
+
+ * *run_locally*
+
+ Runs commands locally using the standard python `subprocess <https://docs.python.org/2/library/subprocess.html>`__ module
+ rather than dispatching remotely. This allows scripts to be debugged easily
+
+.. _drmaa_wrapper.run_job.touch_only:
+
+ * *touch_only*
+
+ Create or update :ref:`Output files <drmaa_wrapper.run_job.output_files>`
+ only to simulate the running of the pipeline.
+ Does not dispatch commands remotely or locally. This is most useful to force a
+ pipeline to acknowledge that a particular part is now up-to-date.
+
+ See also: :ref:`pipeline_run(touch_files_only=True) <pipeline_functions.pipeline_run.touch_files_only>`
+
+
+.. _drmaa_wrapper.run_job.output_files:
+
+ * *output_files*
+
+ Output files which will be created or updated if :ref:`touch_only <drmaa_wrapper.run_job.touch_only>` ``=True``
+
+
diff --git a/doc/_build/html/_sources/examples/bioinformatics/index.txt b/doc/_build/html/_sources/examples/bioinformatics/index.txt
new file mode 100644
index 0000000..fe17943
--- /dev/null
+++ b/doc/_build/html/_sources/examples/bioinformatics/index.txt
@@ -0,0 +1,290 @@
+.. _examples_bioinformatics_part1:
+
+
+
+###################################################################
+Construction of a simple pipeline to run BLAST jobs
+###################################################################
+
+============
+Overview
+============
+
+ This is a simple example to illustrate the convenience **Ruffus**
+ brings to simple tasks in bioinformatics.
+
+ 1. **Split** a problem into multiple fragments that can be
+ 2. **Run in parallel** giving partial solutions that can be
+ 3. **Recombined** into the complete solution.
+
+ The example code runs a `ncbi <http://blast.ncbi.nlm.nih.gov/>`__
+ `blast <http://en.wikipedia.org/wiki/BLAST>`__ search for four sequences
+ against the human `refseq <http://en.wikipedia.org/wiki/RefSeq>`_ protein sequence database.
+
+ #. **Split** each of the four sequences into a separate file.
+ #. **Run in parallel** Blastall on each sequence file
+ #. **Recombine** the BLAST results by simple concatenation.
+
+
+ In real life,
+
+ * `BLAST <http://blast.ncbi.nlm.nih.gov/>`__ already provides support for multiprocessing
+ * Sequence files would be split in much larger chunks, with many sequences
+ * The jobs would be submitted to large computational farms (in our case, using the SunGrid Engine).
+ * The High Scoring Pairs (HSPs) would be parsed / filtered / stored in your own formats.
+
+
+
+
+ .. note::
+
+ This bioinformatics example is intended to showcase *some* of the features of Ruffus.
+
+ #. See the :ref:`manual <new_manual.introduction>` to learn about the various features in Ruffus.
+
+
+========================
+Prerequisites
+========================
+
+-------------
+1. Ruffus
+-------------
+ To install Ruffus on most systems with python installed:
+
+ ::
+
+ easy_install -U ruffus
+
+ Otherwise, `download <http://code.google.com/p/ruffus/downloads/list>`_ Ruffus and run:
+
+ ::
+
+ tar -xvzf ruffus-xxx.tar.gz
+ cd ruffus-xxx
+ ./setup install
+
+ where xxx is the latest Ruffus version.
+
+
+-------------
+2. BLAST
+-------------
+ This example assumes that the `BLAST <http://blast.ncbi.nlm.nih.gov/>`__ ``blastall`` and ``formatdb`` executables are
+ installed and on the search path. Otherwise download from `here <http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download>`_.
+
+
+---------------------------------------
+3. human refseq sequence database
+---------------------------------------
+
+ We also need to download the human refseq sequence file and format the ncbi database:
+
+ ::
+
+ wget ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz
+ gunzip human.protein.faa.gz
+
+ formatdb -i human.protein.faa
+
+---------------------------------------
+4. test sequences
+---------------------------------------
+ Query sequences in FASTA format can be found in `original.fa <../../_static/examples/bioinformatics/original.fa>`_
+
+
+=========================
+Code
+=========================
+ The code for this example can be found :ref:`here <examples_bioinformatics_part1_code>` and
+ pasted into the python command shell.
+
+
+================================================
+Step 1. Splitting up the query sequences
+================================================
+
+ We want each of our sequences in the query file `original.fa <../../_static/examples/bioinformatics/original.fa>`_ to be placed
+ in a separate files named ``XXX.segment`` where ``XXX`` = 1 -> the number of sequences.
+
+ ::
+
+ current_file_index = 0
+ for line in open("original.fa"):
+ # start a new file for each accession line
+ if line[0] == '>':
+ current_file_index += 1
+ current_file = open("%d.segment" % current_file_index, "w")
+ current_file.write(line)
+
+
+
+ To use this in a pipeline, we only need to wrap this in a function, "decorated" with the Ruffus
+ keyword :ref:`@split <new_manual.split>`:
+
+
+
+ .. image:: ../../images/examples_bioinformatics_split.jpg
+
+
+ | This indicates that we are splitting up the input file `original.fa <../../_static/examples/bioinformatics/original.fa>`_ into however many
+ ``*.segment`` files as it takes.
+ | The pipelined function itself takes two arguments, for the input and output.
+
+ We shall see later this simple :ref:`@split <new_manual.split>` decorator already gives all the benefits of:
+
+ * Dependency checking
+ * Flowchart printing
+
+================================================
+Step 2. Run BLAST jobs in parallel
+================================================
+
+ Assuming that blast is already installed, sequence matches can be found with this python
+ code:
+
+ ::
+
+ os.system("blastall -p blastp -d human.protein.faa -i 1.segment > 1.blastResult")
+
+ To pipeline this, we need to simply wrap in a function, decorated with the **Ruffus**
+ keyword :ref:`@transform <new_manual.transform>`.
+
+ .. image:: ../../images/examples_bioinformatics_transform.jpg
+
+ This indicates that we are taking all the output files from the previous ``splitFasta``
+ operation (``*.segment``) and :ref:`@transform <new_manual.transform>`-ing each to a new file with the ``.blastResult``
+ suffix. Each of these transformation operations can run in parallel if specified.
+
+
+================================================
+Step 3. Combining BLAST results
+================================================
+
+ The following python code will concatenate the results together
+ ::
+
+ output_file = open("final.blast_results", "w")
+ for i in glob("*.blastResults"):
+ output_file.write(open(i).read())
+
+
+
+ To pipeline this, we need again to decorate with the **Ruffus** keyword :ref:`@merge <new_manual.merge>`.
+
+ .. image:: ../../images/examples_bioinformatics_merge.jpg
+
+ This indicates that we are taking all the output files from the previous ``runBlast``
+ operation (``*.blastResults``) and :ref:`@merge <new_manual.merge>`-ing them to the new file ``final.blast_results``.
+
+
+================================================
+Step 4. Running the pipeline
+================================================
+
+ We can run the completed pipeline using a maximum of 4 parallel processes by calling
+ :ref:`pipeline_run <pipeline_functions.pipeline_run>` :
+
+ ::
+
+ pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
+
+ Though we have only asked Ruffus to run ``combineBlastResults``, it traces all the dependencies
+ of this task and runs all the necessary parts of the pipeline.
+
+
+ .. note ::
+
+ The full code for this example can be found :ref:`here <examples_bioinformatics_part1_code>`
+ suitable for pasting into the python command shell.
+
+ The ``verbose`` parameter causes the following output to be printed to stderr as the pipeline
+ runs:
+
+ ::
+
+ >>> pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+ Job = [original.fa -> *.segment] completed
+ Completed Task = splitFasta
+ Job = [1.segment -> 1.blastResult] completed
+ Job = [3.segment -> 3.blastResult] completed
+ Job = [2.segment -> 2.blastResult] completed
+ Job = [4.segment -> 4.blastResult] completed
+ Completed Task = runBlast
+ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] -> final.blast_results] completed
+ Completed Task = combineBlastResults
+
+
+================================================
+Step 5. Testing dependencies
+================================================
+
+ If we invoked :ref:`pipeline_run <pipeline_functions.pipeline_run>` again, nothing
+ further would happen because the
+ pipeline is now up-to-date. But what if the pipeline had not run to completion?
+
+ We can simulate the failure of one of the ``blastall`` jobs by deleting its results:
+
+ ::
+
+ os.unlink("4.blastResult")
+
+ Let us use the :ref:`pipeline_printout <pipeline_functions.pipeline_printout>`
+ function to print out the dependencies of the pipeline at a high ``verbose`` level which
+ will show both complete and incomplete jobs:
+
+ ::
+
+ >>> import sys
+ >>> pipeline_printout(sys.stdout, [combineBlastResults], verbose = 4)
+
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = splitFasta
+ "Split sequence file into as many fragments as appropriate depending on the size of
+ original_fasta"
+
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = runBlast
+ "Run blast"
+ Job = [4.segment
+ ->4.blastResult]
+ Job needs update: Missing file 4.blastResult
+
+ Task = combineBlastResults
+ "Combine blast results"
+ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult]
+ ->final.blast_results]
+ Job needs update: Missing file 4.blastResult
+
+ ________________________________________
+
+ Only the parts of the pipeline which involve the missing BLAST result will be rerun.
+ We can confirm this by invoking the pipeline.
+
+ ::
+
+ >>> pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
+ Job = [1.segment -> 1.blastResult] unnecessary: already up to date
+ Job = [2.segment -> 2.blastResult] unnecessary: already up to date
+ Job = [3.segment -> 3.blastResult] unnecessary: already up to date
+ Job = [4.segment -> 4.blastResult] completed
+ Completed Task = runBlast
+ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] -> final.blast_results] completed
+ Completed Task = combineBlastResults
+
+================================================
+What is next?
+================================================
+
+
+ In the :ref:`next (short) part <examples_bioinformatics_part2>`,
+ we shall add some standard (boilerplate) code to
+ turn this BLAST pipeline into a (slightly more) useful python program.
+
diff --git a/doc/_build/html/_sources/examples/bioinformatics/part1_code.txt b/doc/_build/html/_sources/examples/bioinformatics/part1_code.txt
new file mode 100644
index 0000000..c02232f
--- /dev/null
+++ b/doc/_build/html/_sources/examples/bioinformatics/part1_code.txt
@@ -0,0 +1,70 @@
+.. include:: ../../global.inc
+.. _examples_bioinformatics_part1_code:
+
+
+###################################################################
+Ruffus code
+###################################################################
+
+::
+
+ import os, sys
+
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..","..")))
+
+ from ruffus import *
+
+
+ original_fasta = "original.fa"
+ database_file = "human.protein.faa"
+
+ @split(original_fasta, "*.segment")
+ def splitFasta (seqFile, segments):
+ """Split sequence file into
+ as many fragments as appropriate
+ depending on the size of original_fasta"""
+ current_file_index = 0
+ for line in open(original_fasta):
+ #
+ # start a new file for each accession line
+ #
+ if line[0] == '>':
+ current_file_index += 1
+ current_file = open("%d.segment" % current_file_index, "w")
+ current_file.write(line)
+
+
+
+ @transform(splitFasta, suffix(".segment"), ".blastResult")
+ def runBlast(seqFile, blastResultFile):
+ """Run blast"""
+ os.system("blastall -p blastp -d %s -i %s > %s" %
+ (database_file, seqFile, blastResultFile))
+
+
+ @merge(runBlast, "final.blast_results")
+ def combineBlastResults (blastResultFiles, combinedBlastResultFile):
+ """Combine blast results"""
+ output_file = open(combinedBlastResultFile, "w")
+ for i in blastResultFiles:
+ output_file.write(open(i).read())
+
+
+ pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
+
+ #
+ # Simulate interuption of the pipeline by
+ # deleting the output of one of the BLAST jobs
+ #
+ os.unlink("4.blastResult")
+
+ pipeline_printout(sys.stdout, [combineBlastResults], verbose = 4)
+
+
+ #
+ # Re-running the pipeline
+ #
+ pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
diff --git a/doc/_build/html/_sources/examples/bioinformatics/part2.txt b/doc/_build/html/_sources/examples/bioinformatics/part2.txt
new file mode 100644
index 0000000..30e1343
--- /dev/null
+++ b/doc/_build/html/_sources/examples/bioinformatics/part2.txt
@@ -0,0 +1,152 @@
+.. _examples_bioinformatics_part2:
+
+
+###################################################################
+Part 2: A slightly more practical pipeline to run blasts jobs
+###################################################################
+
+============
+Overview
+============
+
+ :ref:`Previously <examples_bioinformatics_part1>`, we had built
+ a simple pipeline to split up a FASTA file of query sequences so
+ that these can be matched against a sequence database in parallel.
+
+ We shall wrap this code so that
+
+ * It is more robust to interruptions
+ * We can specify the file names on the command line
+
+==================================================================
+Step 1. Cleaning up any leftover junk from previous pipeline runs
+==================================================================
+
+ | We split up each of our sequences in the query file `original.fa <../../_static/examples/bioinformatics/original.fa>`_
+ into a separate files named ``XXX.segment`` where ``XXX`` is the number of sequences in
+ the FASTA file.
+
+ | However, if we start with 6 sequences (giving ``1.segment`` ... ``6.segment``), and we
+ then edited `original.fa <../../_static/examples/bioinformatics/original.fa>`_
+ so that only 5 were left, the file ``6.segment`` would still be left
+ hanging around as an unwanted, extraneous and confusing orphan.
+
+ As a general rule, it is a good idea to clean up the results of a previous run in
+ a :ref:`@split <manual.split>` operation:
+
+ ::
+
+ @split("original.fa", "*.segment")
+ def splitFasta (seqFile, segments):
+
+ #
+ # Clean up any segment files from previous runs before creating new one
+ #
+ for i in glob.glob("*.segment"):
+ os.unlink(i)
+
+ # code as before...
+
+.. _examples_bioinformatics_part2.step2:
+
+===============================================================
+Step 2. Adding a "flag" file to mark successful completion
+===============================================================
+
+ When pipelined tasks are interrupted half way through an operation, the output may
+ only contain part of the results in an incomplete or inconsistent state.
+ There are three general options to deal with this:
+
+ #. Catch any interrupting conditions and delete the incomplete output
+ #. Tag successfully completed output with a special marker at the end of the file
+ #. Create an empty "flag" file whose only point is to signal success
+
+ Option (3) is the most reliable way and involves the least amount of work in Ruffus.
+ We add flag files with the suffix ``.blastSuccess`` for our parallel BLAST jobs:
+
+ ::
+
+ @transform(splitFasta, suffix(".segment"), [".blastResult", ".blastSuccess"])
+ def runBlast(seqFile, output_files):
+
+ blastResultFile, flag_file = output_files
+
+ #
+ # Existing code unchanged
+ #
+ os.system("blastall -p blastp -d human.protein.faa "+
+ "-i %s > %s" % (seqFile, blastResultFile))
+
+ #
+ # "touch" flag file to indicate success
+ #
+ open(flag_file, "w")
+
+
+==============================================================
+Step 3. Allowing the script to be invoked on the command line
+==============================================================
+
+ We allow the query sequence file, as well as the sequence database and end results
+ to be specified at runtime using the standard python `optparse <http://docs.python.org/library/optparse.html>`_ module.
+ We find this approach to run time arguments generally useful for many Ruffus scripts.
+ The full code can be :ref:`viewed here <examples_bioinformatics_part2_code>` and
+ `downloaded from run_parallel_blast.py <../../_static/examples/bioinformatics/run_parallel_blast.py>`_.
+
+ The different options can be inspected by running the script with the ``--help`` or ``-h``
+ argument.
+
+ The following options are useful for developing Ruffus scripts:
+
+ ::
+
+ --verbose | -v : Print more detailed messages for each additional verbose level.
+ E.g. run_parallel_blast --verbose --verbose --verbose ... (or -vvv)
+
+ --jobs | -j : Specifies the number of jobs (operations) to run in parallel.
+
+ --flowchart FILE : Print flowchart of the pipeline to FILE. Flowchart format
+ depends on extension. Alternatives include (".dot", ".jpg",
+ "*.svg", "*.png" etc). Formats other than ".dot" require
+ the dot program to be installed (http://www.graphviz.org/).
+
+ --just_print | -n Only print a trace (description) of the pipeline.
+ The level of detail is set by --verbose.
+
+
+============================================================
+Step 4. Printing out a flowchart for the pipeline
+============================================================
+ The ``--flowchart`` argument results in a call to ``pipeline_printout_graph(...)``
+ This prints out a flowchart of the pipeline. Valid formats include ".dot", ".jpg", ".svg", ".png"
+ but all except for the first require the ``dot`` program to be installed
+ (http://www.graphviz.org/).
+
+ The state of the pipeline is reflected in the flowchart:
+
+ .. image:: ../../images/examples_bioinformatics_pipeline.jpg
+
+
+============================================================
+Step 5. Errors
+============================================================
+ Because Ruffus scripts are just normal python functions, you can debug them using
+ your usual tools, or jump to the offending line(s) even when the pipeline is running in
+ parallel.
+
+ For example, these are the what the error messages would look like if we had mis-spelt ``blastal``.
+ In :ref:`run_parallel_blast.py <examples_bioinformatics_part2_code>`,
+ python exceptions are raised if the ``blastall`` command fails.
+
+ Each of the exceptions for the parallel operations are printed out with the
+ offending lines (line 204), and problems (``blastal`` not found)
+ highlighted in red.
+
+ .. image:: ../../images/examples_bioinformatics_error.png
+
+============================================================
+Step 6. Will it run?
+============================================================
+ The full code can be :ref:`viewed here <examples_bioinformatics_part2_code>` and
+ `downloaded from run_parallel_blast.py <../../_static/examples/bioinformatics/run_parallel_blast.py>`_.
+
diff --git a/doc/_build/html/_sources/examples/bioinformatics/part2_code.txt b/doc/_build/html/_sources/examples/bioinformatics/part2_code.txt
new file mode 100644
index 0000000..93b628d
--- /dev/null
+++ b/doc/_build/html/_sources/examples/bioinformatics/part2_code.txt
@@ -0,0 +1,267 @@
+.. include:: ../../global.inc
+.. _examples_bioinformatics_part2_code:
+
+
+###################################################################
+Ruffus code
+###################################################################
+
+::
+
+ #!/usr/bin/env python
+ """
+
+ run_parallel_blast.py
+ [--log_file PATH]
+ [--quiet]
+
+ """
+
+ ################################################################################
+ #
+ # run_parallel_blast
+ #
+ #
+ # Copyright (c) 4/21/2010 Leo Goodstadt
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
+ # of this software and associated documentation files (the "Software"), to deal
+ # in the Software without restriction, including without limitation the rights
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ # copies of the Software, and to permit persons to whom the Software is
+ # furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice shall be included in
+ # all copies or substantial portions of the Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ # THE SOFTWARE.
+ #################################################################################
+ import os, sys
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # options
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ from optparse import OptionParser
+ import sys, os
+
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %prog --input_file QUERY_FASTA --database_file FASTA_DATABASE [more_options]")
+ parser.add_option("-i", "--input_file", dest="input_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of query sequence file in FASTA format. ")
+ parser.add_option("-d", "--database_file", dest="database_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of FASTA database to search. ")
+ parser.add_option("--result_file", dest="result_file",
+ metavar="FILE",
+ type="string",
+ default="final.blast_results",
+ help="Name and path of where the files should end up. ")
+ parser.add_option("-t", "--temp_directory", dest="temp_directory",
+ metavar="PATH",
+ type="string",
+ default="tmp",
+ help="Name and path of temporary directory where calculations "
+ "should take place. ")
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more detailed messages for each additional verbose level."
+ " E.g. run_parallel_blast --verbose --verbose --verbose ... (or -vvv)")
+
+ #
+ # pipeline
+ #
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (operations) to run in parallel.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Print flowchart of the pipeline to FILE. Flowchart format "
+ "depends on extension. Alternatives include ('.dot', '.jpg', "
+ "'*.svg', '*.png' etc). Formats other than '.dot' require "
+ "the dot program to be installed (http://www.graphviz.org/).")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Only print a trace (description) of the pipeline. "
+ " The level of detail is set by --verbose.")
+
+ (options, remaining_args) = parser.parse_args()
+
+
+ if not options.flowchart:
+ if not options.database_file:
+ parser.error("\n\n\tMissing parameter --database_file FILE\n\n")
+ if not options.input_file:
+ parser.error("\n\n\tMissing parameter --input_file FILE\n\n")
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ from ruffus import *
+ import subprocess
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Functions
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ def run_cmd(cmd_str):
+ """
+ Throw exception if run command fails
+ """
+ process = subprocess.Popen(cmd_str, stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE, shell = True)
+ stdout_str, stderr_str = process.communicate()
+ if process.returncode != 0:
+ raise Exception("Failed to run '%s'\n%s%sNon-zero exit status %s" %
+ (cmd_str, stdout_str, stderr_str, process.returncode))
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Logger
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ import logging
+ logger = logging.getLogger("run_parallel_blast")
+ #
+ # We are interesting in all messages
+ #
+ if options.verbose:
+ logger.setLevel(logging.DEBUG)
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ logger.addHandler(stderrhandler)
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Pipeline tasks
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ original_fasta = options.input_file
+ database_file = options.database_file
+ temp_directory = options.temp_directory
+ result_file = options.result_file
+
+ @follows(mkdir(temp_directory))
+
+ @split(original_fasta, os.path.join(temp_directory, "*.segment"))
+ def splitFasta (seqFile, segments):
+ """Split sequence file into
+ as many fragments as appropriate
+ depending on the size of original_fasta"""
+ #
+ # Clean up any segment files from previous runs before creating new one
+ #
+ for i in segments:
+ os.unlink(i)
+ #
+ current_file_index = 0
+ for line in open(original_fasta):
+ #
+ # start a new file for each accession line
+ #
+ if line[0] == '>':
+ current_file_index += 1
+ file_name = "%d.segment" % current_file_index
+ file_path = os.path.join(temp_directory, file_name)
+ current_file = open(file_path, "w")
+ current_file.write(line)
+
+
+ @transform(splitFasta, suffix(".segment"), [".blastResult", ".blastSuccess"])
+ def runBlast(seqFile, output_files):
+ #
+ blastResultFile, flag_file = output_files
+ #
+ run_cmd("blastall -p blastp -d human.protein.faa -i %s > %s" % (seqFile, blastResultFile))
+ #
+ # "touch" flag file to indicate success
+ #
+ open(flag_file, "w")
+
+
+ @merge(runBlast, result_file)
+ def combineBlastResults (blastResult_and_flag_Files, combinedBlastResultFile):
+ """Combine blast results"""
+ #
+ output_file = open(combinedBlastResultFile, "w")
+ for blastResult_file, flag_file in blastResult_and_flag_Files:
+ output_file.write(open(blastResult_file).read())
+
+
+
+
+
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Print list of tasks
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ if options.just_print:
+ pipeline_printout(sys.stdout, [combineBlastResults], verbose=options.verbose)
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Print flowchart
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ elif options.flowchart:
+ # use file extension for output format
+ output_format = os.path.splitext(options.flowchart)[1][1:]
+ pipeline_printout_graph (open(options.flowchart, "w"),
+ output_format,
+ [combineBlastResults],
+ no_key_legend = True)
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Run Pipeline
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ else:
+ pipeline_run([combineBlastResults], multiprocess = options.jobs,
+ logger = logger, verbose=options.verbose)
+
+
diff --git a/doc/_build/html/_sources/examples/paired_end_data.py.txt b/doc/_build/html/_sources/examples/paired_end_data.py.txt
new file mode 100644
index 0000000..cbf1b58
--- /dev/null
+++ b/doc/_build/html/_sources/examples/paired_end_data.py.txt
@@ -0,0 +1,122 @@
+.. _faq.paired_files.code:
+
+############################################################################################################################################################################################################
+Example code for :ref:`FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?" <faq.paired_files>`
+############################################################################################################################################################################################################
+
+ .. seealso::
+
+ * :ref:`@collate <new_manual.collate>`
+
+
+ .. code-block:: python
+ :emphasize-lines: 10-21,29-31,40-43,70-74
+
+ #!/usr/bin/env python
+ import sys, os
+
+ from ruffus import *
+ import ruffus.cmdline as cmdline
+ from subprocess import check_call
+
+ parser = cmdline.get_argparse(description="Parimala's pipeline?")
+
+ # .
+ # Very flexible handling of input files .
+ # .
+ # input files can be specified flexibly as: .
+ # --input a.fastq b.fastq .
+ # --input a.fastq --input b.fastq .
+ # --input *.fastq --input other/*.fastq .
+ # --input "*.fastq" .
+ # .
+ # The last form is expanded in the script and avoids limitations on command .
+ # line lengths .
+ # .
+ parser.add_argument('-i', '--input', nargs='+', metavar="FILE", action="append", help = "Fastq files")
+
+ options = parser.parse_args()
+
+ # standard python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging ("PARIMALA", options.log_file, options.verbose)
+
+ # .
+ # Useful code to turn input files into a flat list .
+ # .
+ from glob import glob
+ original_data_files = [fn for grouped in options.input for glob_spec in grouped for fn in glob(glob_spec)] if options.input else []
+ if not original_data_files:
+ original_data_files = [["C1W1_R1.fastq.gz", "C1W1_R2.fastq.gz"]]
+ #raise Exception ("No matching files specified with --input.")
+
+ # <<<---- pipelined functions go here
+
+ #_________________________________________________________________________________
+ # .
+ # Group together file pairs .
+ #_________________________________________________________________________________
+ @collate(original_data_files,
+ # match file name up to the "R1.fastq.gz"
+ formatter("([^/]+)R[12].fastq.gz$"),
+ # Create output parameter supplied to next task
+ ["{path[0]}/{1[0]}paired.R1.fastq.gz", # paired file 1
+ "{path[0]}/{1[0]}paired.R2.fastq.gz"], # paired file 2
+ # Extra parameters for our own convenience and use
+ ["{path[0]}/{1[0]}unpaired.R1.fastq.gz", # unpaired file 1
+ "{path[0]}/{1[0]}unpaired.R2.fastq.gz"], # unpaired file 2
+ logger, logger_mutex)
+ def trim_fastq(input_files, output_paired_files, discarded_unpaired_files, logger, logger_mutex):
+ if len(input_files) != 2:
+ raise Exception("One of read pairs %s missing" % (input_files,))
+ cmd = ("java -jar ~/SPRING-SUMMER_2014/Softwares/Trimmomatic/Trimmomatic-0.32/trimmomatic-0.32.jar "
+ " PE -phred33 "
+ " {input_files[0]} {input_files[1]} "
+ " {output_paired_files[0]} {output_paired_files[1]} "
+ " {discarded_unpaired_files[0]} {discarded_unpaired_files[1]} "
+ " LEADING:30 TRAILING:30 SLIDINGWINDOW:4:15 MINLEN:50 "
+ )
+
+ check_call(cmd.format(**locals()))
+
+ with logger_mutex:
+ logger.debug("Hooray trim_fastq worked")
+
+ #_________________________________________________________________________________
+ # .
+ # Each file pair now makes its way down the rest of the pipeline as .
+ # a couple .
+ #_________________________________________________________________________________
+ @transform(trim_fastq,
+ # regular expression match on first of pe files
+ formatter("([^/]+)paired.R1.fastq.gz$"),
+ # Output parameter supplied to next task
+ "{path[0]}/{1[0]}.sam"
+
+ # Extra parameters for our own convenience and use
+ "{path[0]}/{1[0]}.pe_soap_pe", # soap intermediate file
+ "{path[0]}/{1[0]}.pe_soap_se", # soap intermediate file
+ logger, logger_mutex)
+ def align_seq(input_files, output_file, soap_pe_output_file, soap_se_output_file, logger, logger_mutex):
+ if len(input_files) != 2:
+ raise Exception("One of read pairs %s missing" % (input_files,))
+ cmd = ("~/SPRING-SUMMER_2014/Softwares/soap2.21release/soap "
+ " -a {input_files[0]} "
+ " -b {input_files[1]} "
+ " -D Y55_genome.fa.index* "
+ " -o {soap_pe_output_file} -2 {soap_se_output_file} -m 400 -x 600")
+
+ check_call(cmd.format(**locals()))
+
+
+ #Soap_to_sam
+ cmd = " perl ~/SPRING-SUMMER_2014/Softwares/soap2sam.pl -p {soap_pe_output_file} > {output_file}"
+
+ check_call(cmd.format(**locals()))
+
+
+ with logger_mutex:
+ logger.debug("Hooray align_seq worked")
+
+
+ cmdline.run (options)
+
diff --git a/doc/_build/html/_sources/faq.txt b/doc/_build/html/_sources/faq.txt
new file mode 100644
index 0000000..2b70f66
--- /dev/null
+++ b/doc/_build/html/_sources/faq.txt
@@ -0,0 +1,980 @@
+.. include:: global.inc
+#############
+FAQ
+#############
+
+**********************************************************
+Citations
+**********************************************************
+
+===============================================================
+Q. How should *Ruffus* be cited in academic publications?
+===============================================================
+
+ The official publication describing the original version of *Ruffus* is:
+
+ `Leo Goodstadt (2010) <http://bioinformatics.oxfordjournals.org/content/early/2010/09/16/bioinformatics.btq524>`_ : **Ruffus: a lightweight Python library for computational pipelines.** *Bioinformatics* 26(21): 2778-2779
+
+
+**********************************************************
+Good practices
+**********************************************************
+
+==================================================================================================================
+Q. What is the best way of keeping my data and workings separate?
+==================================================================================================================
+
+ It is good practice to run your pipeline in a temporary, "working" directory away from your original data.
+
+ The first step of your pipeline might be to make softlinks to your original data in your working directory.
+ This is example (relatively paranoid) code to do just this:
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ def re_symlink (input_file, soft_link_name, logger, logging_mutex):
+ """
+ Helper function: relinks soft symbolic link if necessary
+ """
+ # Guard agains soft linking to oneself: Disastrous consequences of deleting the original files!!
+ if input_file == soft_link_name:
+ logger.debug("Warning: No symbolic link made. You are using the original data directory as the working directory.")
+ return
+ # Soft link already exists: delete for relink?
+ if os.path.lexists(soft_link_name):
+ # do not delete or overwrite real (non-soft link) file
+ if not os.path.islink(soft_link_name):
+ raise Exception("%s exists and is not a link" % soft_link_name)
+ try:
+ os.unlink(soft_link_name)
+ except:
+ with logging_mutex:
+ logger.debug("Can't unlink %s" % (soft_link_name))
+ with logging_mutex:
+ logger.debug("os.symlink(%s, %s)" % (input_file, soft_link_name))
+ #
+ # symbolic link relative to original directory so that the entire path
+ # can be moved around with breaking everything
+ #
+ os.symlink( os.path.relpath(os.path.abspath(input_file),
+ os.path.abspath(os.path.dirname(soft_link_name))), soft_link_name)
+
+ #
+ # First task should soft link data to working directory
+ #
+ @jobs_limit(1)
+ @mkdir(options.working_dir)
+ @transform( input_files,
+ formatter(),
+ # move to working directory
+ os.path.join(options.working_dir, "{basename[0]}{ext[0]}"),
+ logger, logging_mutex
+ )
+ def soft_link_inputs_to_working_directory (input_file, soft_link_name, logger, logging_mutex):
+ """
+ Make soft link in working directory
+ """
+ with logging_mutex:
+ logger.info("Linking files %(input_file)s -> %(soft_link_name)s\n" % locals())
+ re_symlink(input_file, soft_link_name, logger, logging_mutex)
+
+
+.. _faq.paired_files:
+
+==================================================================================================================
+Q. What is the best way of handling data in file pairs (or triplets etc.)
+==================================================================================================================
+
+
+ In Bioinformatics, DNA data often consists of only the nucleotide sequence at the two ends of larger fragments.
+ The `paired_end <http://www.illumina.com/technology/next-generation-sequencing/paired-end-sequencing_assay.ilmn>`__ or
+ `mate pair <http://en.wikipedia.org/wiki/Shotgun_sequencing#Whole_genome_shotgun_sequencing>`__ data frequently
+ consists of of file pairs with conveniently related names such as "*.R1.fastq" and "*.R2.fastq".
+
+ At some point in data pipeline, these file pairs or triplets must find each other and be analysed in the same job.
+
+ Provided these file pairs or triplets are named consistently, an easiest way to regroup them is to use the
+ Ruffus :ref:`@collate <new_manual.collate>` decorator. For example:
+
+
+ .. code-block:: python
+
+ @collate(original_data_files,
+
+ # match file name up to the "R1.fastq.gz"
+ formatter("([^/]+)R[12].fastq.gz$"),
+
+ # Create output parameter supplied to next task
+ "{path[0]}/{1[0]}.sam",
+ logger, logger_mutex)
+ def handle_paired_end(input_files, output_paired_files, logger, logger_mutex):
+ # check that we really have a pair of two files not an orphaned singleton
+ if len(input_files) != 2:
+ raise Exception("One of read pairs %s missing" % (input_files,))
+
+ # do stuff here
+
+
+
+ This (incomplete, untested) :ref:`example code <faq.paired_files.code>` shows what this would look like *in vivo*.
+
+
+
+**********************************************************
+General
+**********************************************************
+
+=========================================================
+Q. *Ruffus* won't create dependency graphs
+=========================================================
+
+ A. You need to have installed ``dot`` from `Graphviz <http://www.graphviz.org/>`_ to produce
+ pretty flowcharts likes this:
+
+ .. image:: images/pretty_flowchart.png
+
+
+
+
+=========================================================
+Q. *Ruffus* seems to be hanging in the same place
+=========================================================
+
+ A. If *ruffus* is interrupted, for example, by a Ctrl-C,
+ you will often find the following lines of code highlighted::
+
+ File "build/bdist.linux-x86_64/egg/ruffus/task.py", line 1904, in pipeline_run
+ File "build/bdist.linux-x86_64/egg/ruffus/task.py", line 1380, in run_all_jobs_in_task
+ File "/xxxx/python2.6/multiprocessing/pool.py", line 507, in next
+ self._cond.wait(timeout)
+ File "/xxxxx/python2.6/threading.py", line 237, in wait
+ waiter.acquire()
+
+ This is *not* where *ruffus* is hanging but the boundary between the main programme process
+ and the sub-processes which run *ruffus* jobs in parallel.
+
+ This is naturally where broken execution threads get washed up onto.
+
+
+
+
+=========================================================
+Q. Regular expression substitutions don't work
+=========================================================
+
+ A. If you are using the special regular expression forms ``"\1"``, ``"\2"`` etc.
+ to refer to matching groups, remember to 'escape' the subsitution pattern string.
+ The best option is to use `'raw' python strings <http://docs.python.org/library/re.html>`_.
+ For example:
+
+ ::
+
+ r"\1_substitutes\2correctly\3four\4times"
+
+ Ruffus will throw an exception if it sees an unescaped ``"\1"`` or ``"\2"`` in a file name.
+
+========================================================================================
+Q. How to force a pipeline to appear up to date?
+========================================================================================
+
+ *I have made a trivial modification to one of my data files and now Ruffus wants to rerun my month long pipeline. How can I convince Ruffus that everything is fine and to leave things as they are?*
+
+ The standard way to do what you are trying to do is to touch all the files downstream...
+ That way the modification times of your analysis files would postdate your existing files.
+ You can do this manually but Ruffus also provides direct support:
+
+ .. code-block:: python
+
+ pipeline_run (touch_files_only = True)
+
+ pipeline_run will run your script normally stepping over up-to-date tasks and starting
+ with jobs which look out of date. However, after that, none of your pipeline task functions
+ will be called, instead, each non-up-to-date file is `touch <https://en.wikipedia.org/wiki/Touch_(Unix)>`__-ed in
+ turn so that the file modification dates follow on successively.
+
+ See the documentation for :ref:`pipeline_run() <pipeline_functions.pipeline_run>`
+
+ It is even simpler if you are using the new Ruffus.cmdline support from version 2.4. You can just type
+
+ .. code-block:: bash
+
+ your script --touch_files_only [--other_options_of_your_own_etc]
+
+ See :ref:`command line <new_manual.cmdline>` documentation.
+
+========================================================================================
+Q. How can I use my own decorators with Ruffus?
+========================================================================================
+
+(Thanks to Radhouane Aniba for contributing to this answer.)
+
+A. With care! If the following two points are observed:
+
+____________________________________________________________________________________________________________________________________________________________________________________________________________________
+1. Use `@wraps <https://docs.python.org/2/library/functools.html#functools.wraps>`__ from ``functools`` or Michele Simionato's `decorator <https://pypi.python.org/pypi/decorator>`__ module
+____________________________________________________________________________________________________________________________________________________________________________________________________________________
+
+ These will automatically forward attributes from the task function correctly:
+
+ * ``__name__`` and ``__module__`` is used to identify functions uniquely in a Ruffus pipeline, and
+ * ``pipeline_task`` is used to hold per task data
+
+__________________________________________________________________________________________________________
+2. Always call Ruffus decorators first before your own decorators.
+__________________________________________________________________________________________________________
+
+ Otherwise, your decorator will be ignored.
+
+ So this works:
+
+ .. code-block:: python
+
+ @follows(prev_task)
+ @custom_decorator(something)
+ def test():
+ pass
+
+ This is a bit futile
+
+ .. code-block:: python
+
+ # ignore @custom_decorator
+ @custom_decorator(something)
+ @follows(prev_task)
+ def test():
+ pass
+
+
+ This order dependency is an unfortunate quirk of how python decorators work. The last (rather futile)
+ piece of code is equivalent to:
+
+ .. code-block:: python
+
+ test = custom_decorator(something)(ruffus.follows(prev_task)(test))
+
+ Unfortunately, Ruffus has no idea that someone else (``custom_decorator``) is also modifying the ``test()`` function
+ after it (``ruffus.follows``) has had its go.
+
+
+
+_____________________________________________________
+Example decorator:
+_____________________________________________________
+
+ Let us look at a decorator to time jobs:
+
+ .. code-block:: python
+
+ import sys, time
+ def time_func_call(func, stream, *args, **kwargs):
+ """prints elapsed time to standard out, or any other file-like object with a .write() method.
+ """
+ start = time.time()
+ # Run the decorated function.
+ ret = func(*args, **kwargs)
+ # Stop the timer.
+ end = time.time()
+ elapsed = end - start
+ stream.write("{} took {} seconds\n".format(func.__name__, elapsed))
+ return ret
+
+
+ from ruffus import *
+ import sys
+ import time
+
+ @time_job(sys.stderr)
+ def first_task():
+ print "First task"
+
+
+ @follows(first_task)
+ @time_job(sys.stderr)
+ def second_task():
+ print "Second task"
+
+
+ @follows(second_task)
+ @time_job(sys.stderr)
+ def final_task():
+ print "Final task"
+
+ pipeline_run()
+
+
+ What would ``@time_job`` look like?
+
+__________________________________________________________________________________________________________
+1. Using functools `@wraps <https://docs.python.org/2/library/functools.html#functools.wraps>`__
+__________________________________________________________________________________________________________
+
+
+ .. code-block:: python
+
+ import functools
+ def time_job(stream=sys.stdout):
+ def actual_time_job(func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ return time_func_call(func, stream, *args, **kwargs)
+ return wrapper
+ return actual_time_job
+
+__________________________________________________________________________________________________________
+2. Using Michele Simionato's `decorator <https://pypi.python.org/pypi/decorator>`__ module
+__________________________________________________________________________________________________________
+
+
+ .. code-block:: python
+
+ import decorator
+ def time_job(stream=sys.stdout):
+ def time_job(func, *args, **kwargs):
+ return time_func_call(func, stream, *args, **kwargs)
+ return decorator.decorator(time_job)
+
+
+_______________________________________________________________________________________________________________________________________________________________
+2. By hand, using a `callable object <https://docs.python.org/2/reference/datamodel.html#emulating-callable-objects>`__
+_______________________________________________________________________________________________________________________________________________________________
+
+
+ .. code-block:: python
+
+ class time_job(object):
+ def __init__(self, stream=sys.stdout):
+ self.stream = stream
+ def __call__(self, func):
+ def inner(*args, **kwargs):
+ return time_func_call(func, self.stream, *args, **kwargs)
+ # remember to forward __name__
+ inner.__name__ = func.__name__
+ inner.__module__ = func.__module__
+ inner.__doc__ = func.__doc__
+ if hasattr(func, "pipeline_task"):
+ inner.pipeline_task = func.pipeline_task
+ return inner
+
+
+
+
+
+========================================================================================
+Q. Can a task function in a *Ruffus* pipeline be called normally outside of Ruffus?
+========================================================================================
+ A. Yes. Most python decorators wrap themselves around a function. However, *Ruffus* leaves the
+ original function untouched and unwrapped. Instead, *Ruffus* adds a ``pipeline_task`` attribute
+ to the task function to signal that this is a pipelined function.
+
+ This means the original task function can be called just like any other python function.
+
+=====================================================================================================================
+Q. My *Ruffus* tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?
+=====================================================================================================================
+ *This is my code:*
+
+ ::
+
+ from ruffus import *
+ import sys
+ @transform("start.input", regex(".+"), ("first_output.txt", "second_output.txt"))
+ def task1(i,o):
+ pass
+
+ @transform(task1, suffix(".txt"), ".result")
+ def task2(i, o):
+ pass
+
+ pipeline_printout(sys.stdout, [task2], verbose=3)
+
+ ::
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = task1
+ Job = [start.input
+ ->[first_output.txt, second_output.txt]]
+
+ Task = task2
+ Job = [[first_output.txt, second_output.txt]
+ ->first_output.result]
+
+ ________________________________________
+
+ A: This code produces a single output of a tuple of 2 files. In fact, you want two
+ outputs, each consisting of 1 file.
+
+ You want a single job (single input) to be produce multiple outputs (multiple jobs
+ in downstream tasks). This is a one-to-many operation which calls for
+ :ref:`@split <decorators.split>`:
+
+ ::
+
+ from ruffus import *
+ import sys
+ @split("start.input", ("first_output.txt", "second_output.txt"))
+ def task1(i,o):
+ pass
+
+ @transform(task1, suffix(".txt"), ".result")
+ def task2(i, o):
+ pass
+
+ pipeline_printout(sys.stdout, [task2], verbose=3)
+
+ ::
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = task1
+ Job = [start.input
+ ->[first_output.txt, second_output.txt]]
+
+ Task = task2
+ Job = [first_output.txt
+ ->first_output.result]
+ Job = [second_output.txt
+ ->second_output.result]
+
+ ________________________________________
+
+
+=======================================================================================
+Q. How can a *Ruffus* task produce output which goes off in different directions?
+=======================================================================================
+
+ A. As above, anytime there is a situation which requires a one-to-many operation, you should reach
+ for :ref:`@subdivide <decorators.subdivide>`. The advanced form takes a regular expression, making
+ it easier to produce multiple derivatives of the input file. The following example subdivides
+ *2* jobs each into *3*, so that the subsequence task will run *2* x *3* = *6* jobs.
+
+ ::
+
+ from ruffus import *
+ import sys
+ @subdivide(["1.input_file",
+ "2.input_file"],
+ regex(r"(.+).input_file"), # match file prefix
+ [r"\1.file_type1",
+ r"\1.file_type2",
+ r"\1.file_type3"])
+ def split_task(input, output):
+ pass
+
+
+ @transform(split_task, regex("(.+)"), r"\1.test")
+ def test_split_output(i, o):
+ pass
+
+ pipeline_printout(sys.stdout, [test_split_output], verbose = 3)
+
+ Each of the original 2 files have been split in three so that test_split_output will run
+ 6 jobs simultaneously.
+
+ ::
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = split_task
+ Job = [1.input_file ->[1.file_type1, 1.file_type2, 1.file_type3]]
+ Job = [2.input_file ->[2.file_type1, 2.file_type2, 2.file_type3]]
+
+ Task = test_split_output
+ Job = [1.file_type1 ->1.file_type1.test]
+ Job = [1.file_type2 ->1.file_type2.test]
+ Job = [1.file_type3 ->1.file_type3.test]
+ Job = [2.file_type1 ->2.file_type1.test]
+ Job = [2.file_type2 ->2.file_type2.test]
+ Job = [2.file_type3 ->2.file_type3.test]
+ ________________________________________
+
+
+
+=======================================================================================
+Q. Can I call extra code before each job?
+=======================================================================================
+ A. This is easily accomplished by hijacking the process
+ for checking if jobs are up to date or not (:ref:`@check_if_uptodate <decorators.check_if_uptodate>`):
+
+ ::
+
+ from ruffus import *
+ import sys
+
+ def run_this_before_each_job (*args):
+ print "Calling function before each job using these args", args
+ # Remember to delegate to the default *Ruffus* code for checking if
+ # jobs need to run.
+ return needs_update_check_modify_time(*args)
+
+ @check_if_uptodate(run_this_before_each_job)
+ @files([[None, "a.1"], [None, "b.1"]])
+ def task_func(input, output):
+ pass
+
+ pipeline_printout(sys.stdout, [task_func])
+
+ This results in:
+ ::
+
+ ________________________________________
+ >>> pipeline_run([task_func])
+ Calling function before each job using these args (None, 'a.1')
+ Calling function before each job using these args (None, 'a.1')
+ Calling function before each job using these args (None, 'b.1')
+ Job = [None -> a.1] completed
+ Job = [None -> b.1] completed
+ Completed Task = task_func
+
+ .. note::
+
+ Because ``run_this_before_each_job(...)`` is called whenever *Ruffus* checks to see if
+ a job is up to date or not, the function may be called twice for some jobs
+ (e.g. ``(None, 'a.1')`` above).
+
+
+=========================================================================================================
+Q. Does *Ruffus* allow checkpointing: to distinguish interrupted and completed results?
+=========================================================================================================
+
+_____________________________________________________
+A. Use the builtin sqlite checkpointing
+_____________________________________________________
+
+
+ By default, ``pipeline_run(...)`` will save the timestamps for output files from successfully run jobs to an sqlite database file (``.ruffus_history.sqlite``) in the current directory .
+
+ * If you are using ``Ruffus.cmdline``, you can change the checksum / timestamp database file name on the command line using ``--checksum_file_name NNNN``
+ *
+
+
+ The level of timestamping / checksumming can be set via the ``checksum_level`` parameter:
+
+ .. code-block:: python
+
+ pipeline_run(..., checksum_level = N, ...)
+
+ where the default is 1::
+
+ level 0 : Use only file timestamps
+ level 1 : above, plus timestamp of successful job completion
+ level 2 : above, plus a checksum of the pipeline function body
+ level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+_____________________________________________________
+A. Use a flag file
+_____________________________________________________
+
+ When gmake is interrupted, it will delete the target file it is updating so that the target is
+ remade from scratch when make is next run. Ruffus, by design, does not do this because, more often than
+ not, the partial / incomplete file may be usefully if only to reveal, for example, what might have caused an interrupting error
+ or exception. It also seems a bit too clever and underhand to go around the programmer's back to delete files...
+
+ A common *Ruffus* convention is create an empty checkpoint or "flag" file whose sole purpose
+ is to record a modification-time and the successful completion of a job.
+
+ This would be task with a completion flag:
+
+ ::
+
+ #
+ # Assuming a pipelined task function named "stage1"
+ #
+ @transform(stage1, suffix(".stage1"), [".stage2", ".stage2_finished"] )
+ def stage2 (input_files, output_files):
+ task_output_file, flag_file = output_files
+ cmd = ("do_something2 %(input_file)s >| %(task_output_file)s ")
+ cmd = cmd % {
+ "input_file": input_files[0],
+ "task_output_file": task_output_file
+ }
+ if not os.system( cmd ):
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888
+ #
+ # It worked: Create completion flag_file
+ #
+ open(flag_file, "w")
+ #
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+ The flag_files ``xxx.stage2_finished`` indicate that each job is finished. If this is missing,
+ ``xxx.stage2`` is only a partial, interrupted result.
+
+
+ The only thing to be aware of is that the flag file will appear in the list of inputs of the
+ downstream task, which should accordingly look like this:
+
+
+ ::
+
+ @transform(stage2, suffix(".stage2"), [".stage3", ".stage3_finished"] )
+ def stage3 (input_files, output_files):
+
+ #888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ #
+ # Note that the first parameter is a LIST of input files, the last of which
+ # is the flag file from the previous task which we can ignore
+ #
+ input_file, previous_flag_file = input_files
+ #
+ #888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ task_output_file, flag_file = output_files
+ cmd = ("do_something3 %(input_file)s >| %(task_output_file)s ")
+ cmd = cmd % {
+ "input_file": input_file,
+ "task_output_file": task_output_file
+ }
+ # completion flag file for this task
+ if not os.system( cmd ):
+ open(flag_file, "w")
+
+
+ The :ref:`Bioinformatics example<examples_bioinformatics_part2.step2>` contains :ref:`code <examples_bioinformatics_part2_code>` for checkpointing.
+
+
+_____________________________________________________
+A. Use a temp file
+_____________________________________________________
+
+ Thanks to Martin Goodson for suggesting this and providing an example. In his words:
+
+ "I normally use a decorator to create a temporary file which is only renamed after the task has completed without any problems. This seems a more elegant solution to the problem:"
+
+
+ .. code-block:: python
+
+ def usetemp(task_func):
+ """ Decorate a function to write to a tmp file and then rename it. So half finished tasks cannot create up to date targets.
+ """
+ @wraps(task_func)
+ def wrapper_function(*args, **kwargs):
+ args=list(args)
+ outnames=args[1]
+ if not isinstance(outnames, basestring) and hasattr(outnames, '__getitem__'):
+ tmpnames=[str(x)+".tmp" for x in outnames]
+ args[1]=tmpnames
+ task_func(*args, **kwargs)
+ try:
+ for tmp, name in zip(tmpnames, outnames):
+ if os.path.exists(tmp):
+ os.rename(tmp, str(name))
+ except BaseException as e:
+ for name in outnames:
+ if os.path.exists(name):
+ os.remove(name)
+ raise (e)
+ else:
+ tmp=str(outnames)+'.tmp'
+ args[1]=tmp
+ task_func(*args, **kwargs)
+ os.rename(tmp, str(outnames))
+ return wrapper_function
+
+
+ Use like this:
+
+ .. code-block:: python
+
+ @files(None, 'client1.price')
+ @usetemp
+ def getusers(inputfile, outputname):
+ #**************************************************
+ # code goes here
+ # outputname now refers to temporary file
+ pass
+
+
+
+
+
+**********************************************************
+Windows
+**********************************************************
+
+=========================================================
+Q. Windows seems to spawn *ruffus* processes recursively
+=========================================================
+
+ A. It is necessary to protect the "entry point" of the program under windows.
+ Otherwise, a new process will be started each time the main module is imported
+ by a new Python interpreter as an unintended side effects. Causing a cascade
+ of new processes.
+
+ See: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+
+ This code works::
+
+ if __name__ == '__main__':
+ try:
+ pipeline_run([parallel_task], multiprocess = 5)
+ except Exception, e:
+ print e.args
+
+
+
+**********************************************************
+Sun Grid Engine / PBS / SLURM etc
+**********************************************************
+
+==========================================================================================================================================
+Q. Can Ruffus be used to manage a cluster or grid based pipeline?
+==========================================================================================================================================
+ A. Some minimum modifications have to be made to your *Ruffus* script to allow it to submit jobs to a cluster
+
+ See :ref:`ruffus.drmaa_wrapper <new_manual.ruffus.drmaa_wrapper.run_job>`
+
+ Thanks to Andreas Heger and others at CGAT and Bernie Pope for contributing ideas and code.
+
+
+==========================================================================================================================================
+Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies
+==========================================================================================================================================
+
+ A. You need to use multithreading rather than multiprocessing. See :ref:`ruffus.drmaa_wrapper <new_manual.ruffus.drmaa_wrapper.run_job>`
+
+
+=====================================================================
+Q. Keeping Large intermediate files
+=====================================================================
+
+ Sometimes pipelines create a large number of intermediate files which might not be needed later.
+
+ Unfortunately, the current design of *Ruffus* requires these files to hang around otherwise the pipeline
+ will not know that it ran successfully.
+
+ We have some tentative plans to get around this but in the meantime, Bernie Pope suggests
+ truncating intermediate files in place, preserving timestamps::
+
+
+ # truncate a file to zero bytes, and preserve its original modification time
+ def zeroFile(file):
+ if os.path.exists(file):
+ # save the current time of the file
+ timeInfo = os.stat(file)
+ try:
+ f = open(file,'w')
+ except IOError:
+ pass
+ else:
+ f.truncate(0)
+ f.close()
+ # change the time of the file back to what it was
+ os.utime(file,(timeInfo.st_atime, timeInfo.st_mtime))
+
+**********************************************************************************
+Sharing python objects between Ruffus processes running concurrently
+**********************************************************************************
+
+ The design of Ruffus envisages that much of the data flow in pipelines occurs in files but it is also possible to pass python objects in memory.
+
+ Ruffus uses the `multiprocessing <http://docs.python.org/2/library/multiprocessing.html>`_ module and much of the following is a summary of what is covered
+ in depth in the Python Standard Library `Documentation <http://docs.python.org/2/library/multiprocessing.html#sharing-state-between-processes>`_.
+
+ Running Ruffus using ``pipeline_run(..., multiprocess = NNN)`` where ``NNN`` > 1 runs each job concurrently on up to ``NNN`` separate local processes.
+ Each task function runs independently in a different python intepreter, possibly on a different CPU, in the most efficient way.
+ However, this does mean we have to pay some attention to how data is sent across process boundaries (unlike the situation with ``pipeline_run(..., multithread = NNN)`` ).
+
+ The python code and data which comprises your multitasking Ruffus job is sent to a separate process in three ways:
+
+ #. The python function code and data objects are `pickled <http://docs.python.org/2/library/pickle.html>`__, i.e. converting into a byte stream, by the master process, sent to the remote process
+ before being converted back into normal python (unpickling).
+ #. The parameters for your jobs, i.e. what Ruffus calls your task functions with, are separately `pickled <http://docs.python.org/2/library/pickle.html>`__ and sent to the remote process via
+ `multiprocessing.Queue <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`_
+ #. You can share and synchronise other data yourselves. The canonical example is the logger provided by ``Ruffus.cmdline.setup_logging``
+
+ .. note::
+
+ Check that your function code and data can be `pickled <http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled>`_.
+
+ Only functions, built-in functions and classes defined at the top level of a module are picklable.
+
+
+ The following answers are a short "how-to" for sharing and synchronising data yourselves.
+
+
+==============================================================================
+Can ordinary python objects be shared between processes?
+==============================================================================
+
+ A. Objects which can be `pickled <http://docs.python.org/2/library/pickle.html>`__ can be shared as is. These include
+
+ * numbers
+ * strings
+ * tuples, lists, sets, and dictionaries containing only objects which can be `pickled <http://docs.python.org/2/library/pickle.html>`__.
+
+ #. If these do not change during your pipeline, you can just use them without any further effort in your task.
+ #. If you need to use the value at the point when the task function is *called*, then you need to pass the python object as parameters to your task.
+ For example:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+ # changing_list changes...
+ @transform(previous_task, suffix(".foo"), ".bar", changing_list)
+ def next_task(input_file, output_file, changing_list):
+ pass
+
+ #. If you need to use the value when the task function is *run* then see :ref:`the following answer. <how-about-synchronising-python-objects-in-real-time>`.
+
+
+================================================================================================
+Why am I getting ``PicklingError``?
+================================================================================================
+
+ What is happening? Didn't `Joan of Arc <https://en.wikipedia.org/wiki/Battle_of_the_Herrings>`_ solve this once and for all?
+
+ A. Some of the data or code in your function cannot be `pickled <http://docs.python.org/2/library/pickle.html>`__ and is being asked to be sent by python ``mulitprocessing`` across process boundaries.
+
+
+ When you run your pipeline using multiprocess:
+
+ .. code-block:: python
+
+ pipeline_run([], verbose = 5, multiprocess = 5, logger = ruffusLoggerProxy)
+
+ You will get the following errors:
+
+ .. code-block:: python
+
+ Exception in thread Thread-2:
+ Traceback (most recent call last):
+ File "/path/to/python/python2.7/threading.py", line 808, in __bootstrap_inner
+ self.run()
+ File "/path/to/python/python2.7/threading.py", line 761, in run
+ self.__target(*self.__args, * *self.__kwargs)
+ File "/path/to/python/python2.7/multiprocessing/pool.py", line 342, in _handle_tasks
+ put(task)
+ PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
+
+
+ which go away when you set ``pipeline_run([], multiprocess = 1, ...)``
+
+
+
+
+ Unfortunately, pickling errors are particularly ill-served by standard python error messages. The only really good advice is to take the offending
+ code and try and `pickle <http://docs.python.org/2/library/pickle.html>`__ it yourself and narrow down the errors. Check your objects against the list
+ in the `pickle <http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled>`_ module.
+ Watch out especially for nested functions. These will have to be moved to file scope.
+ Other objects may have to be passed in proxy (see below).
+
+
+.. _how-about-synchronising-python-objects-in-real-time:
+
+================================================================================================
+How about synchronising python objects in real time?
+================================================================================================
+
+ A. You can use managers and proxy objects from the `multiprocessing <http://docs.python.org/library/multiprocessing.html>`__ module.
+
+ The underlying python object would be owned and managed by a (hidden) server process. Other processes can access the shared objects transparently by using proxies. This is how the logger provided by
+ ``Ruffus.cmdline.setup_logging`` works:
+
+ .. code-block:: python
+
+ # optional logger which can be passed to ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ ``logger`` is a proxy for the underlying python `logger <http://docs.python.org/2/library/logging.html>`__ object, and it can be shared freely between processes.
+
+ The best course is to pass ``logger`` as a parameter to a *Ruffus* task.
+
+ The only caveat is that we should make sure multiple jobs are not writting to the log at the same time. To synchronise logging, we use proxy to a non-reentrant `multiprocessing.lock <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Lock>`_.
+
+ .. code-block:: python
+
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+ @transform(previous_task, suffix(".foo"), ".bar", logger, logger_mutex)
+ def next_task(input_file, output_file, logger, logger_mutex):
+ with logger_mutex:
+ logger.info("We are in the middle of next_task: %s -> %s" % (input_file, output_file))
+
+
+==============================================================================
+Can I share and synchronise my own python classes via proxies?
+==============================================================================
+
+ A. `multiprocessing.managers.SyncManager <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager>`__ provides out of the box support for lists, arrays and dicts etc.
+
+ Most of the time, we can use a "vanilla" manager provided by `multiprocessing.Manager() <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.multiprocessing.Manager>`_:
+
+ .. code-block:: python
+
+
+ import multiprocessing
+ manager = multiprocessing.Manager()
+
+ list_proxy = manager.list()
+ dict_proxy = manager.dict()
+ lock_proxy = manager.Lock()
+ namespace_proxy = manager.Namespace()
+ queue_proxy = manager.Queue([maxsize])
+ rentrant_lock_proxy = manager.RLock()
+ semaphore_proxy = manager.Semaphore([value])
+ char_array_proxy = manager.Array('c')
+ integer_proxy = manager.Value('i', 6)
+
+ @transform(previous_task, suffix(".foo"), ".bar", lock_proxy, dict_proxy, list_proxy)
+ def next_task(input_file, output_file, lock_proxy, dict_proxy, list_proxy):
+ with lock_proxy:
+ list_proxy.append(3)
+ dict_proxy['a'] = 5
+
+
+ However, you can also create proxy custom classes for your own objects.
+
+ In this case you may need to derive from `multiprocessing.managers.SyncManager <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager>`_
+ and register proxy functions. See ``Ruffus.proxy_logger`` for an example of how to do this.
+
+============================================================================================================================================================
+How do I send python objects back and forth without tangling myself in horrible synchronisation code?
+============================================================================================================================================================
+
+ A. Sharing python objects by passing messages is a much more modern and safer way to coordinate multitasking than using synchronization primitives like locks.
+
+ The python `multiprocessing <http://docs.python.org/2/library/multiprocessing.html#pipes-and-queues>`__ module provides support for passing python objects as messages between processes.
+ You can either use `pipes <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe>`__
+ or `queues <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`__.
+ The idea is that one process pushes and object onto a `pipe <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe>`__ or `queue <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`__
+ and the other processes pops it out at the other end. `Pipes <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe>`__ are
+ only two ended so `queues <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`__ are usually a better fit for sending data to multiple Ruffus jobs.
+
+ Proxies for `queues <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager.Queue>`__ can be passed between processes as in the previous section
+
+
+==============================================================================
+How do I share large amounts of data efficiently across processes?
+==============================================================================
+
+ A. If it is really impractical to use data files on disk, you can put the data in shared memory.
+
+ It is possible to create shared objects using shared memory which can be inherited by child processes or passed as Ruffus parameters.
+ This is probably most efficently done via the `array <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Array>`_
+ interface. Again, it is easy to create locks and proxies for synchronised access:
+
+
+ .. code-block:: python
+
+ from multiprocessing import Process, Lock
+ from multiprocessing.sharedctypes import Value, Array
+ from ctypes import Structure, c_double
+
+ manager = multiprocessing.Manager()
+
+ lock_proxy = manager.Lock()
+ int_array_proxy = manager.Array('i', [123] * 100)
+
+ @transform(previous_task, suffix(".foo"), ".bar", lock_proxy, int_array_proxy)
+ def next_task(input_file, output_file, lock_proxy, int_array_proxy):
+ with lock_proxy:
+ int_array_proxy[23] = 71
+
+
+
+
diff --git a/doc/_build/html/_sources/gallery.txt b/doc/_build/html/_sources/gallery.txt
new file mode 100644
index 0000000..90ff20a
--- /dev/null
+++ b/doc/_build/html/_sources/gallery.txt
@@ -0,0 +1,63 @@
+.. include:: global.inc
+
+.. image:: images/logo.jpg
+
+******************************************************
+Hall of Fame: User contributed flowcharts
+******************************************************
+Please contribute your own work flows in your favourite colours with (an optional) short description
+to email: ruffus_lib at llew.org.uk
+
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+RNASeq pipeline
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ http://en.wikipedia.org/wiki/RNA-Seq
+
+ Mapping transcripts onto genomes using high-throughput sequencing technologies (:download:`svg <images/gallery/gallery_rna_seq.svg>`).
+
+ .. image:: images/gallery/gallery_rna_seq.png
+ :target: _downloads/gallery_rna_seq.svg
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+non-coding evolutionary constraints
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ http://en.wikipedia.org/wiki/Noncoding_DNA
+
+ Non-protein coding evolutionary constraints in different species (:download:`svg <images/gallery/gallery_dless.svg>`).
+
+ .. image:: images/gallery/gallery_dless.png
+ :target: _downloads/gallery_dless.svg
+
+^^^^^^^^^^^^^^^^^
+SNP annotation
+^^^^^^^^^^^^^^^^^
+Predicting impact of different Single Nucleotide Polymorphisms
+
+http://en.wikipedia.org/wiki/Single-nucleotide_polymorphism
+
+Population variation across genomes (:download:`svg <images/gallery/gallery_snp_annotation.svg>`).
+
+.. image:: images/gallery/gallery_snp_annotation.png
+ :target: _downloads/gallery_snp_annotation.svg
+
+Using "pseudo" targets to run only part of the pipeline (:download:`svg <images/gallery/gallery_snp_annotation_consequences.svg>`).
+
+.. image:: images/gallery/gallery_snp_annotation_consequences.png
+ :target: _downloads/gallery_snp_annotation_consequences.svg
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Chip-Seq analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Analysing DNA binding sites with Chip-Seq
+http://en.wikipedia.org/wiki/Chip-Sequencing
+
+ (:download:`svg <images/gallery/gallery_big_pipeline.svg>`)
+
+ .. image:: images/gallery/gallery_big_pipeline.png
+ :target: _downloads/gallery_big_pipeline.svg
+
diff --git a/doc/_build/html/_sources/glossary.txt b/doc/_build/html/_sources/glossary.txt
new file mode 100644
index 0000000..9d86bab
--- /dev/null
+++ b/doc/_build/html/_sources/glossary.txt
@@ -0,0 +1,81 @@
+.. include:: global.inc
+****************
+Glossary
+****************
+.. _Glossary:
+
+.. _glossary.task:
+
+.. glossary::
+
+
+
+ task
+ A stage in a computational pipeline.
+
+ Each **task** in *ruffus* is represented by a python function.
+
+ For example, a task might be to find the products of a sets of two numbers::
+
+ 4 x 5 = 20
+ 5 x 6 = 30
+ 2 x 7 = 14
+
+ job
+ Any number of operations which can be run in parallel and make up
+ the work in a stage of a computional pipeline.
+
+ Each **task** in *ruffus* is a separate call to the **task** function.
+
+ For example, if a task is to find products of numbers, each of these will be a separate job.
+
+ Job1::
+
+ 4 x 5 = 20
+
+ Job2::
+
+ 5 x 6 = 30
+
+ Job3::
+
+ 2 x 7 = 14
+
+ Jobs need not complete in order.
+
+
+ decorator
+ Ruffus decorators allow functions to be incorporated into a computational
+ pipeline, with automatic generation of parameters, dependency checking etc.,
+ without modifying any code within the function.
+ Quoting from the `python wiki <http://wiki.python.org/moin/PythonDecorators>`_:
+
+ A Python decorator is a specific change to the Python syntax that
+ allows us to more conveniently alter functions and methods.
+
+ Decorators dynamically alter the functionality of a function, method, or
+ class without having to directly use subclasses or change the source code
+ of the function being decorated.
+
+
+
+ generator
+ python generators are new to python 2.2
+ (see `Charming Python: Iterators and simple generators <http://www.ibm.com/developerworks/library/l-pycon.html>`_).
+ They allow iterable data to be generated on the fly.
+
+ Ruffus asks for generators when you want to generate **job** parameters dynamically.
+
+ Each set of job parameters is returned by the ``yield`` keyword for
+ greater clarity. For example,::
+
+ def generate_job_parameters():
+
+ for file_index, file_name in iterate(all_file_names):
+
+ # parameter for each job
+ yield file_index, file_name
+
+ Each job takes the parameters ``file_index`` and ``file_name``
+
+
diff --git a/doc/_build/html/_sources/history.txt b/doc/_build/html/_sources/history.txt
new file mode 100644
index 0000000..c0a8f05
--- /dev/null
+++ b/doc/_build/html/_sources/history.txt
@@ -0,0 +1,733 @@
+.. include:: global.inc
+
+
+########################################
+Major Features added to Ruffus
+########################################
+
+.. note::
+
+ See :ref:`To do list <todo>` for future enhancements to Ruffus
+
+
+********************************************************************
+version 2.5RC
+********************************************************************
+
+ 31st July 2014: Release Candidate
+
+ 5th August 2014: Release
+
+============================================================================================================================================================
+1) Python3 compatability (but at least python 2.6 is now required)
+============================================================================================================================================================
+
+ Ruffus v2.5 is now python3 compatible. This has required surprisingly many changes to the codebase. Please report any bugs to me.
+
+ .. note::
+
+ **Ruffus now requires at least python 2.6**
+
+ It proved to be impossible to support python 2.5 and python 3.x at the same time.
+
+============================================================================================================================================================
+2) Ctrl-C interrupts
+============================================================================================================================================================
+
+ Ruffus now mostly(!) terminates gracefully when interrupted by Ctrl-C .
+
+ Please send me bug reports for when this doesn't work with a minimally reproducible case.
+
+ This means that, in general, if an ``Exception`` is thrown during your pipeline but you don't want to wait for the rest of the jobs to complete, you can still press Ctrl-C at any point.
+ Note that you may still need to clean up spawned processes, for example, using ``qdel`` if you are using ``Ruffus.drmaa_wrapper``
+
+============================================================================================================================================================
+3) Customising flowcharts in pipeline_printout_graph() with ``@graphviz``
+============================================================================================================================================================
+
+ *Contributed by Sean Davis, with improved syntax via Jake Biesinger*
+
+ The graphics for each task can have its own attributes (URL, shape, colour) etc. by adding
+ `graphviz attributes <http://www.graphviz.org/doc/info/attrs.html>`__
+ using the ``@graphviz`` decorator.
+
+ * This allows HTML formatting in the task names (using the ``label`` parameter as in the following example).
+ HTML labels **must** be enclosed in ``<`` and ``>``. E.g.
+
+ .. code-block:: python
+
+ label = "<Line <BR/> wrapped task_name()>"
+
+ * You can also opt to keep the task name and wrap it with a prefix and suffix:
+
+ .. code-block:: python
+
+ label_suffix = "??? ", label_prefix = ": What is this?"
+
+ * The ``URL`` attribute allows the generation of clickable svg, and also client / server
+ side image maps usable in web pages.
+ See `Graphviz documentation <http://www.graphviz.org/content/output-formats#dimap>`__
+
+
+ Example:
+
+ .. code-block:: python
+
+
+ @graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ # Can use dictionary if you wish...
+ graphviz_params = {"URL":"http://cnn.com", "fontcolor": '"#FF00FF"'}
+ @graphviz(**graphviz_params)
+ def myTask(input,output):
+ pass
+
+ .. **
+
+ .. image:: images/history_html_flowchart.png
+ :scale: 30
+
+
+============================================================================================================================================================
+4. Consistent verbosity levels
+============================================================================================================================================================
+
+ The verbosity levels are now more fine-grained and consistent between pipeline_printout and pipeline_run.
+ Note that At verbosity > 2, ``pipeline_run`` outputs lists of up-to-date tasks before running the pipeline.
+ Many users who defaulted to using a verbosity of 3 may want to move up to ``verbose = 4``.
+
+ * **level 0** : *Nothing*
+ * **level 1** : *Out-of-date Task names*
+ * **level 2** : *All Tasks (including any task function docstrings)*
+ * **level 3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * **level 4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * **level 5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * **level 6** : *All jobs in All Tasks whether out of date or not*
+ * **level 10**: *Logs messages useful only for debugging ruffus pipeline code*
+
+ * Defaults to **level 4** for pipeline_printout: *Out of date jobs, with explanations and warnings*
+ * Defaults to **level 1** for pipeline_run: *Out-of-date Task names*
+
+============================================================================================================================================================
+5. Allow abbreviated paths from ``pipeline_run`` or ``pipeline_printout``
+============================================================================================================================================================
+
+ .. note ::
+
+ Please contact me with suggestions if you find the abbreviations useful but "aesthetically challenged"!
+
+ Some pipelines produce interminable lists of long filenames. It would be nice to be able to abbreviate this
+ to just enough information to follow the progress.
+
+ Ruffus now allows either
+ 1) Only the nth top level sub-directories to be included
+ 2) The message to be truncated to a specified number of characters (to fit on a line, for example)
+
+ Note that the number of characters specified is the separate length of the input and output parameters,
+ not the entire message. You many need to specify a smaller limit that you expect (e.g. ``60`` rather than `80`)
+
+ .. code-block:: python
+
+ pipeline_printout(verbose_abbreviated_path = NNN)
+ pipeline_run(verbose_abbreviated_path = -MMM)
+
+
+ The ``verbose_abbreviated_path`` parameter restricts the length of input / output file paths to either
+
+ * NNN levels of nested paths
+ * A total of MMM characters, MMM is specified by setting ``verbose_abbreviated_path`` to -MMM (i.e. negative values)
+
+ ``verbose_abbreviated_path`` defaults to ``2``
+
+
+ For example:
+
+ Given ``["aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"]``
+
+
+ .. code-block:: python
+ :emphasize-lines: 1,4,8,19
+
+ # Original relative paths
+ "[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Full abspath
+ verbose_abbreviated_path = 0
+ "[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Specifed level of nested directories
+ verbose_abbreviated_path = 1
+ "[.../dddd.txt, .../gggg.txt]"
+
+ verbose_abbreviated_path = 2
+ "[.../cc/dddd.txt, .../ffff/gggg.txt]"
+
+ verbose_abbreviated_path = 3
+ "[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]"
+
+
+ # Truncated to MMM characters
+ verbose_abbreviated_path = -60
+ "<???> /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+
+ If you are using ``ruffus.cmdline``, the abbreviated path lengths can be specified on
+ the command line as an extension to the verbosity:
+
+ .. code-block:: bash
+ :emphasize-lines: 4,7
+
+ # verbosity of 4
+ yourscript.py --verbose 4
+
+ # display three levels of nested directories
+ yourscript.py --verbose 4:3
+
+ # restrict input and output parameters to 60 letters
+ yourscript.py --verbose 4:-60
+
+
+ The number after the colon is the abbreviated path length
+
+
+============================================================================================================================================================
+Other changes
+============================================================================================================================================================
+ * BUG FIX: Output producing wild cards was not saved in the checksum files!!!
+ * BUG FIX: @mkdir bug under Windows. Thanks to Sean Turley. (Aargh! Different exceptions are thrown in Windows vs. Linux for the same condition!)
+ * Added :ref:`pipeline_get_task_names(...) <pipeline_functions.pipeline_get_task_names>` which returns all task name as a list of strings. Thanks to Clare Sloggett
+
+
+********************************************************************
+version 2.4.1
+********************************************************************
+
+ 26th April 2014
+
+ * Breaking changes to drmaa API suggested by Bernie Pope to ensure portability across different drmaa implementations (SGE, SLURM etc.)
+
+********************************************************************
+version 2.4
+********************************************************************
+
+ 4th April 2014
+
+============================================================================================================================================================
+Additions to ``ruffus`` namespace
+============================================================================================================================================================
+
+ * :ref:`formatter() <new_manual.formatter>` (:ref:`syntax <decorators.formatter>`)
+ * :ref:`originate() <new_manual.originate>` (:ref:`syntax <decorators.originate>`)
+ * :ref:`subdivide() <new_manual.subdivide>` (:ref:`syntax <decorators.subdivide>`)
+
+============================================================================================================================================================
+Installation: use pip
+============================================================================================================================================================
+
+ ::
+
+ sudo pip install ruffus --upgrade
+
+============================================================================================================================================================
+1) Command Line support
+============================================================================================================================================================
+
+ The optional ``Ruffus.cmdline`` module provides support for a set of common command
+ line arguments which make writing *Ruffus* pipelines much more pleasant.
+ See :ref:`manual <new_manual.cmdline>`
+
+============================================================================================================================================================
+2) Check pointing
+============================================================================================================================================================
+
+ * Contributed by **Jake Biesinger**
+ * See :ref:`Manual <new_manual.checkpointing>`
+ * Uses a fault resistant sqlite database file to log i/o files, and additional checksums
+ * defaults to checking file timestamps stored in the current directory (``ruffus_utilility.RUFFUS_HISTORY_FILE = '.ruffus_history.sqlite'``)
+ * :ref:`pipeline_run(..., checksum_level = N, ...) <pipeline_functions.pipeline_run>`
+
+ * level 0 = CHECKSUM_FILE_TIMESTAMPS : Classic mode. Use only file timestamps (no checksum file will be created)
+ * level 1 = CHECKSUM_HISTORY_TIMESTAMPS : Also store timestamps in a database after successful job completion
+ * level 2 = CHECKSUM_FUNCTIONS : As above, plus a checksum of the pipeline function body
+ * level 3 = CHECKSUM_FUNCTIONS_AND_PARAMS : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+ * defaults to level 1
+
+ * Can speed up trivial tasks: Previously Ruffus always added an extra 1 second pause between tasks
+ to guard against file systems (Ext3, FAT, some NFS) with low timestamp granularity.
+
+
+============================================================================================================================================================
+3) :ref:`subdivide() <new_manual.subdivide>` (:ref:`syntax <decorators.subdivide>`)
+============================================================================================================================================================
+
+ * Take a list of input jobs (like :ref:`@transform <decorators.transform>`) but further splits each into multiple jobs, i.e. it is a **many->even more** relationship
+ * synonym for the deprecated ``@split(..., regex(), ...)``
+
+========================================================================================================================================================================================================================================================================================================================
+4) :ref:`mkdir() <new_manual.mkdir>` (:ref:`syntax <decorators.mkdir>`) with :ref:`formatter() <new_manual.formatter>`, :ref:`suffix() <decorators.suffix>` and :ref:`regex() <decorators.regex>`
+========================================================================================================================================================================================================================================================================================================================
+
+ * allows directories to be created depending on runtime parameters or the output of previous tasks
+ * behaves just like :ref:`@transform <decorators.transform>` but with its own (internal) function which does the actual work of making a directory
+ * Previous behavior is retained:``mkdir`` continues to work seamlessly inside :ref:`@follows <decorators.follows>`
+
+============================================================================================================================================================
+5) :ref:`originate() <new_manual.originate>` (:ref:`syntax <decorators.originate>`)
+============================================================================================================================================================
+
+ * Generates output files without dependencies from scratch (*ex nihilo*!)
+ * For first step in a pipeline
+ * Task function obviously only takes output and not input parameters. (There *are* no inputs!)
+ * synonym for :ref:`@split(None,...) <decorators.split>`
+ * See :ref:`Summary <decorators.originate>` / :ref:`Manual <new_manual.originate>`
+
+========================================================================================================================================================================================================================================================================================================================
+6) New flexible :ref:`formatter() <new_manual.formatter>` (:ref:`syntax <decorators.formatter>`) alternative to :ref:`regex() <decorators.regex>` & :ref:`suffix() <decorators.suffix>`
+========================================================================================================================================================================================================================================================================================================================
+
+ * Easy manipulation of path subcomponents in the style of `os.path.split() <http://docs.python.org/2/library/os.path.html#os.path.split>`__
+ * Regular expressions are no longer necessary for path manipulation
+ * Familiar python syntax
+ * Optional regular expression matches
+ * Can refer to any in the list of N input files (not only the first file as for ``regex(...)``)
+ * Can even refer to individual letters within a match
+
+============================================================================================================================================================
+7) Combinatorics (all vs. all decorators)
+============================================================================================================================================================
+
+ * :ref:`@product <new_manual.product>` (See `itertools.product <http://docs.python.org/2/library/itertools.html#itertools.product>`__)
+ * :ref:`@permutations <new_manual.permutations>` (See `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__)
+ * :ref:`@combinations <new_manual.combinations>` (See `itertools.combinations <http://docs.python.org/2/library/itertools.html#itertools.combinations>`__)
+ * :ref:`@combinations_with_replacement <new_manual.combinations_with_replacement>` (See `itertools.combinations_with_replacement <http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement>`__)
+ * in optional :ref:`combinatorics <new_manual.combinatorics>` module
+ * Only :ref:`formatter() <new_manual.formatter>` provides the necessary flexibility to construct the output. (:ref:`suffix() <decorators.suffix>` and :ref:`regex() <decorators.regex>` are not supported.)
+ * See :ref:`Summary <decorators.combinatorics>` / :ref:`Manual <new_manual.combinatorics>`
+
+
+
+============================================================================================================================================================
+8) drmaa support and multithreading:
+============================================================================================================================================================
+
+ * :ref:`ruffus.drmaa_wrapper.run_job() <new_manual.ruffus.drmaa_wrapper.run_job>` (:ref:`syntax <drmaa_wrapper.run_job>`)
+ * Optional helper module allows jobs to dispatch work to a computational cluster and wait until it completes.
+ * Requires ``multithread`` rather than ``multiprocess``
+
+============================================================================================================================================================
+9) ``pipeline_run(...)`` and exceptions
+============================================================================================================================================================
+ See :ref:`Manual <new_manual.exceptions>`
+
+ * Optionally terminate pipeline after first exception
+ * Display exceptions without delay
+
+
+============================================================================================================================================================
+10) Miscellaneous
+============================================================================================================================================================
+
+ Better error messages for ``formatter()``, ``suffix()`` and ``regex()`` for ``pipeline_printout(..., verbose >= 3, ...)``
+ * Error messages for showing mismatching regular expression and offending file name
+ * Wrong capture group names or out of range indices will raise informative Exception
+
+********************************************************************
+version 2.3
+********************************************************************
+ 1st September, 2013
+
+ * ``@active_if`` turns off tasks at runtime
+ The Design and initial implementation were contributed by Jacob Biesinger
+
+ Takes one or more parameters which can be either booleans or functions or callable objects which return True / False::
+
+ run_if_true_1 = True
+ run_if_true_2 = False
+
+ @active_if(run_if_true, lambda: run_if_true_2)
+ def this_task_might_be_inactive():
+ pass
+
+ The expressions inside @active_if are evaluated each time
+ ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+
+ Dormant tasks behave as if they are up to date and have no output.
+
+ * Command line parsing
+ * Supports both argparse (python 2.7) and optparse (python 2.6):
+ * ``Ruffus.cmdline`` module is optional.
+ * See :ref:`manual <new_manual.cmdline>`
+ * Optionally terminate pipeline after first exception
+ To have all exceptions interrupt immediately::
+
+ pipeline_run(..., exceptions_terminate_immediately = True)
+
+ By default ruffus accumulates ``NN`` errors before interrupting the pipeline prematurely. ``NN`` is the specified parallelism for ``pipeline_run(..., multiprocess = NN)``.
+
+ Otherwise, a pipeline will only be interrupted immediately if exceptions of type ``ruffus.JobSignalledBreak`` are thrown.
+
+ * Display exceptions without delay
+
+ By default, Ruffus re-throws exceptions in ensemble after pipeline termination.
+
+ To see exceptions as they occur::
+
+ pipeline_run(..., log_exceptions = True)
+
+ ``logger.error(...)`` will be invoked with the string representation of the each exception, and associated stack trace.
+
+ The default logger prints to sys.stderr, but this can be changed to any class from the logging module or compatible object via ``pipeline_run(..., logger = ???)``
+
+ * Improved ``pipeline_printout()``
+
+ * `@split` operations now show the 1->many output in pipeline_printout
+
+ This make it clearer that ``@split`` is creating multiple output parameters (rather than a single output parameter consisting of a list)::
+
+ Task = split_animals
+ Job = [None
+ -> cows
+ -> horses
+ -> pigs
+ , any_extra_parameters]
+ * File date and time are displayed in human readable form and out of date files are flagged with asterisks.
+
+
+
+********************************************************************
+version 2.2
+********************************************************************
+ 22nd July, 2010
+
+ * Simplifying **@transform** syntax with **suffix(...)**
+
+ Regular expressions within ruffus are very powerful, and can allow files to be moved
+ from one directory to another and renamed at will.
+
+ However, using consistent file extensions and
+ ``@transform(..., suffix(...))`` makes the code much simpler and easier to read.
+
+ Previously, ``suffix(...)`` did not cooperate well with ``inputs(...)``.
+ For example, finding the corresponding header file (".h") for the matching input
+ required a complicated ``regex(...)`` regular expression and ``input(...)``. This simple case,
+ e.g. matching "something.c" with "something.h", is now much easier in Ruffus.
+
+
+ For example:
+ ::
+
+ source_files = ["something.c", "more_code.c"]
+ @transform(source_files, suffix(".c"), add_inputs(r"\1.h", "common.h"), ".o")
+ def compile(input_files, output_file):
+ ( source_file,
+ header_file,
+ common_header) = input_files
+ # call compiler to make object file
+
+ This is equivalent to calling:
+
+ ::
+
+ compile(["something.c", "something.h", "common.h"], "something.o")
+ compile(["more_code.c", "more_code.h", "common.h"], "more_code.o")
+
+ The ``\1`` matches everything *but* the suffix and will be applied to both ``glob``\ s and file names.
+
+ For simplicity and compatibility with previous versions, there is always an implied r"\1" before
+ the output parameters. I.e. output parameters strings are *always* substituted.
+
+
+ * Tasks and glob in **inputs(...)** and **add_inputs(...)**
+
+ ``glob``\ s and tasks can be added as the prerequisites / input files using
+ ``inputs(...)`` and ``add_inputs(...)``. ``glob`` expansions will take place when the task
+ is run.
+
+ * Advanced form of **@split** with **regex**:
+
+ The standard ``@split`` divided one set of inputs into multiple outputs (the number of which
+ can be determined at runtime).
+
+ This is a ``one->many`` operation.
+
+
+ An advanced form of ``@split`` has been added which can split each of several files further.
+
+ In other words, this is a ``many->"many more"`` operation.
+
+ For example, given three starting files:
+ ::
+
+ original_files = ["original_0.file",
+ "original_1.file",
+ "original_2.file"]
+ We can split each into its own set of sub-sections:
+ ::
+
+ @split(original_files,
+ regex(r"starting_(\d+).fa"), # match starting files
+ r"files.split.\1.*.fa" # glob pattern
+ r"\1") # index of original file
+ def split_files(input_file, output_files, original_index):
+ """
+ Code to split each input_file
+ "original_0.file" -> "files.split.0.*.fa"
+ "original_1.file" -> "files.split.1.*.fa"
+ "original_2.file" -> "files.split.2.*.fa"
+ """
+
+
+ This is, conceptually, the reverse of the @collate(...) decorator
+
+ * Ruffus will complain about unescaped regular expression special characters:
+
+ Ruffus uses "\\1" and "\\2" in regular expression substitutions. Even seasoned python
+ users may not remember that these have to be 'escaped' in strings. The best option is
+ to use 'raw' python strings e.g.
+
+ ::
+
+ r"\1_substitutes\2correctly\3four\4times"
+
+ Ruffus will throw an exception if it sees an unescaped "\\1" or "\\2" in a file name,
+ which should catch most of these bugs.
+
+ * Prettier output from *pipeline_printout_graph*
+
+ Changed to nicer colours, symbols etc. for a more professional look.
+ @split and @merge tasks now look different from @transform.
+ Colours, size and resolution are now fully customisable::
+
+ pipeline_printout_graph( #...
+ user_colour_scheme = {
+ "colour_scheme_index":1,
+ "Task to run" : {"fillcolor":"blue"},
+ pipeline_name : "My flowchart",
+ size : (11,8),
+ dpi : 120)})
+
+ An SVG bug in firefox has been worked around so that font size are displayed correctly.
+
+
+
+
+********************************************************************
+version 2.1.1
+********************************************************************
+ * **@transform(.., add_inputs(...))**
+ ``add_inputs(...)`` allows the addition of extra input dependencies / parameters for each job.
+
+ Unlike ``inputs(...)``, the original input parameter is retained:
+ ::
+
+ from ruffus import *
+ @transform(["a.input", "b.input"], suffix(".input"), add_inputs("just.1.more","just.2.more"), ".output")
+ def task(i, o):
+ ""
+
+ Produces:
+ ::
+
+ Job = [[a.input, just.1.more, just.2.more] ->a.output]
+ Job = [[b.input, just.1.more, just.2.more] ->b.output]
+
+
+ Like ``inputs``, ``add_inputs`` accepts strings, tasks and ``glob`` s
+ This minor syntactic change promises add much clarity to Ruffus code.
+ ``add_inputs()`` is available for ``@transform``, ``@collate`` and ``@split``
+
+
+********************************************************************
+version 2.1.0
+********************************************************************
+ * **@jobs_limit**
+ Some tasks are resource intensive and too many jobs should not be run at the
+ same time. Examples include disk intensive operations such as unzipping, or
+ downloading from FTP sites.
+
+ Adding::
+
+ @jobs_limit(4)
+ @transform(new_data_list, suffix(".big_data.gz"), ".big_data")
+ def unzip(i, o):
+ "unzip code goes here"
+
+ would limit the unzip operation to 4 jobs at a time, even if the rest of the
+ pipeline runs highly in parallel.
+
+ (Thanks to Rob Young for suggesting this.)
+
+********************************************************************
+version 2.0.10
+********************************************************************
+ * **touch_files_only** option for **pipeline_run**
+
+ When the pipeline runs, task functions will not be run. Instead, the output files for
+ each job (in each task) will be ``touch``\ -ed if necessary.
+ This can be useful for simulating a pipeline run so that all files look as
+ if they are up-to-date.
+
+ Caveats:
+
+ * This may not work correctly where output files are only determined at runtime, e.g. with **@split**
+ * Only the output from pipelined jobs which are currently out-of-date will be ``touch``\ -ed.
+ In other words, the pipeline runs *as normal*, the only difference is that the
+ output files are ``touch``\ -ed instead of being created by the python task functions
+ which would otherwise have been called.
+
+ * Parameter substitution for **inputs(...)**
+
+ The **inputs(...)** parameter in **@transform**, **@collate** can now take tasks and ``glob`` s,
+ and these will be expanded appropriately (after regular expression replacement).
+
+ For example::
+
+ @transform("dir/a.input", regex(r"(.*)\/(.+).input"),
+ inputs((r"\1/\2.other", r"\1/*.more")), r"elsewhere/\2.output")
+ def task1(i, o):
+ """
+ Some pipeline task
+ """
+
+ Is equivalent to calling::
+
+ task1(("dir/a.other", "dir/1.more", "dir/2.more"), "elsewhere/a.output")
+
+ \
+
+ Here::
+
+ r"\1/*.more"
+
+ is first converted to::
+
+ r"dir/*.more"
+
+ which matches::
+
+ "dir/1.more"
+ "dir/2.more"
+
+
+********************************************************************
+version 2.0.9
+********************************************************************
+
+ * Better display of logging output
+ * Advanced form of **@split**
+ This is an experimental feature.
+
+ Hitherto, **@split** only takes 1 set of input (tasks/files/``glob`` s) and split these
+ into an indeterminate number of output.
+
+ This is a one->many operation.
+
+ Sometimes it is desirable to take multiple input files, and split each of them further.
+
+ This is a many->many (more) operation.
+
+ It is possible to hack something together using **@transform** but downstream tasks would not
+ aware that each job in **@transform** produces multiple outputs (rather than one input,
+ one output per job).
+
+ The syntax looks like::
+
+ @split(get_files, regex(r"(.+).original"), r"\1.*.split")
+ def split_files(i, o):
+ pass
+
+ If ``get_files()`` returned ``A.original``, ``B.original`` and ``C.original``,
+ ``split_files()`` might lead to the following operations::
+
+ A.original
+ -> A.1.original
+ -> A.2.original
+ -> A.3.original
+ B.original
+ -> B.1.original
+ -> B.2.original
+ C.original
+ -> C.1.original
+ -> C.2.original
+ -> C.3.original
+ -> C.4.original
+ -> C.5.original
+
+ Note that each input (``A/B/C.original``) can produce a number of output, the exact
+ number of which does not have to be pre-determined.
+ This is similar to **@split**
+
+ Tasks following ``split_files`` will have ten inputs corresponding to each of the
+ output from ``split_files``.
+
+ If **@transform** was used instead of **@split**, then tasks following ``split_files``
+ would only have 3 inputs.
+
+********************************************************************
+version 2.0.8
+********************************************************************
+
+ * File names can be in unicode
+ * File systems with 1 second timestamp granularity no longer cause problems.
+
+********************************************************************
+version 2.0.2
+********************************************************************
+
+ * Much prettier /useful output from :ref:`pipeline_printout <pipeline_functions.pipeline_printout>`
+ * New tutorial / manual
+
+
+
+********************************************************************
+version 2.0
+********************************************************************
+ * Revamped documentation:
+
+ * Rewritten tutorial
+ * Comprehensive manual
+ * New syntax help
+
+ * Major redesign. New decorators include
+
+ * :ref:`@split <new_manual.split>`
+ * :ref:`@transform <new_manual.transform>`
+ * :ref:`@merge <new_manual.merge>`
+ * :ref:`@collate <new_manual.collate>`
+
+ * Major redesign. Decorator *inputs* can mix
+
+ * Output from previous tasks
+ * |glob|_ patterns e.g. ``*.txt``
+ * Files names
+ * Any other data type
+
+********************************************************************
+version 1.1.4
+********************************************************************
+ Tasks can get their input by automatically chaining to the output from one or more parent tasks using :ref:`@files_re <decorators.files_re>`
+
+********************************************************************
+version 1.0.7
+********************************************************************
+ Added `proxy_logger` module for accessing a shared log across multiple jobs in different processes.
+
+********************************************************************
+version 1.0
+********************************************************************
+
+ Initial Release in Oxford
+
+########################################
+Fixed Bugs
+########################################
+
+ Full list at `"Latest Changes wiki entry" <http://code.google.com/p/ruffus/wiki/LatestChanges>`_
diff --git a/doc/_build/html/_sources/implementation_notes.txt b/doc/_build/html/_sources/implementation_notes.txt
new file mode 100644
index 0000000..bba4374
--- /dev/null
+++ b/doc/_build/html/_sources/implementation_notes.txt
@@ -0,0 +1,437 @@
+##########################################
+Implementation Tips
+##########################################
+
+******************************************************************************
+Release
+******************************************************************************
+
+ * Change ``ruffus_version.py``
+
+ * Rebuild pdf and copy it to ``doc/static_data``
+
+ cd doc
+ make latexpdf
+ cp _build/latex/ruffus.pdf static_data
+
+ * Rebuild documentation::
+
+ make htmlsync
+
+ * tag git with, for example::
+
+ git tag -a v2.5RC -m "Version 2.5 Release Candidate"
+
+
+ * Upload to pypi::
+
+ python setup.py sdist --format=gztar upload
+
+******************************************************************************
+dbdict.py
+******************************************************************************
+
+ This is an sqlite backed dictionary originally written by Jacob Sondergaard and
+ contributed by Jake Biesinger who added automatic pickling of python objects.
+
+ The pickling code was refactored out by Leo Goodstadt into separate functions as
+ part of the preparation to make Ruffus python3 ready.
+
+ Python original saved (pickled) objects as 7 bit ASCII strings. Later formats
+ (protocol = -1 is the latest format) uses 8 bit strings and are rather more efficient.
+
+ These then need to be saved as BLOBs to sqlite3 rather than normal strings. We
+ can signal this by wrapping the pickled string in a object providing a "buffer interface".
+ This is ``buffer`` in python2.6/2.7 and ``memoryview`` in python3.
+
+ http://bugs.python.org/issue7723 suggests there is no portable python2/3 way to write
+ blobs to Sqlite without these two incompatible wrappers.
+ This would require conditional compilation:
+
+ .. code-block:: python
+
+ if sys.hexversion >= 0x03000000:
+ value = memoryview(pickle.dumps(value, protocol = -1))
+ else:
+ value = buffer(pickle.dumps(value, protocol = -1))
+
+
+ Despite the discussion on the bug report, sqlite3.Binary seems to work.
+ We shall see if this is portable to python3.
+
+******************************************************************************
+how to write new decorators
+******************************************************************************
+
+
+ New placeholder class. E.g. for ``@new_deco``
+
+ .. code-block:: python
+
+ class new_deco(task_decorator):
+ pass
+
+ Add to list of action names and ids:
+
+ .. code-block:: python
+
+ action_names = ["unspecified",
+ ...
+ "task_new_deco",
+
+ action_task_new_deco = 15
+
+ Add function:
+
+ .. code-block:: python
+
+ def task_transform (self, orig_args):
+
+
+ Add documentation to:
+
+ * decorators/NEW_DECORATOR.rst
+ * decorators/decorators.rst
+ * _templates/layout.html
+ * manual
+
+
+
+
+##########################################
+Implementation notes
+##########################################
+
+N.B. Remember to cite Jake Biesinger and see if he is interested to be a co-author if we ever resubmit the drastically changed version...
+He contributed checkpointing, travis and tox etc.
+
+.. _todo.misfeatures:
+
+********************************************************************************************************
+``Ctrl-C`` handling
+********************************************************************************************************
+
+ Pressing ``Ctrl-C`` left dangling process in Ruffus 2.4 because ``KeyboardInterrupt`` does not play nice with python ``multiprocessing.Pool``
+ See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool/1408476#1408476
+
+ http://bryceboe.com/2012/02/14/python-multiprocessing-pool-and-keyboardinterrupt-revisited/ provides a reimplementation of Pool which
+ however only works when you have a fixed number of jobs which should then run in parallel to completion. Ruffus is considerably more
+ complicated because we have a variable number of jobs completing and being submitted into the job queue at any one time. Think
+ of tasks stalling waiting for the dependent tasks to complete and then all the jobs of the task being released onto the queue
+
+ The solution is
+
+ #. Use a ``timeout`` parameter when using ``IMapIterator.next(timeout=None)`` to iterate through ``pool.imap_unordered`` because only timed ``condition`` s can be interruptible by signals...!!
+ #. This involves rewriting the ``for`` loop manually as a ``while`` loop
+ #. We use a timeout of ``99999999``, i.e. 3 years, which should be enough for any job to complete...
+ #. Googling after the fact, it looks like the galaxy guys (cool dudes or what) have written similar `code <https://galaxy-dist.readthedocs.org/en/latest/_modules/galaxy/objectstore/s3_multipart_upload.html>`__
+ #. ``next()`` for normal iterators do not take ``timeout`` as an extra parameter so we have to wrap next in a conditional :-(. The galaxy guys do a `shim <http://en.wikipedia.org/wiki/Shim_(computing)>`__ around ``next()`` but that is as much obsfucation as a simple if...
+ #. After jobs are interrupted by a signal, we rethrow with our own exception because we want something that inherits from ``Exception`` unlike ``KeyboardInterrupt``
+ #. When a signal happens, we need to immediately stop ``feed_job_params_to_process_pool()`` from sending more parameters into the job queue (``parameter_q``)
+ We use a proxy to a ``multiprocessing.Event`` (via ``syncmanager.Event()``). When ``death_event`` is set, all further processing stops...
+ #. We also signal that all jobs should finish by putting ``all_tasks_complete()`` into ``parameter_q`` but only ``death_event`` prevents jobs already in the queue from going through
+ #. Ater signalling, some of the child processes appear to be dead by the time we start cleaning up. ``pool.terminate()`` sometimes tries and fails to
+ re-connect to the the ``death_event`` proxy via sockets and throws an exception. We should really figure out a better solution but in the meantime
+ wrapping it in a ``try / except`` allows a clean exit.
+ #. If a vanilla exception is raised without multiprocessing running, we still need to first save the exception in ``job_errors`` (even if it is just one) before
+ cleaning up, because the cleaning up process may lead to further (ignored) exceptions which would overwrite the current exception when we need to rethrow it
+
+
+ Exceptions thrown in the middle of a multiprocessing / multithreading job appear to be handled gracefully.
+
+ For drmaa jobs, ``qdel`` may still be necessary.
+
+
+******************************************************************************
+Python3 compatability
+******************************************************************************
+
+ Required extensive changes especially in unit test code.
+
+ Changes:
+
+ 1. ``sort`` in python3 does not order mixed types, i.e. ``int()``, ``list()`` and ``str()`` are incommensurate
+
+ * In ``task.get_output_files (...)``, sort after conversion to string
+
+ .. code-block:: python
+
+ sorted(self.output_filenames, key = lambda x: str(x))
+
+ * In ``file_name_parameters.py``: ``collate_param_factory (...)``, ``sort`` after conversion to string, then ``groupby`` without string conversion. This is
+ because we can't guarantee that two different objects do not have the same string representation. But ``groupby`` requires that similar things are adjacent...
+
+ In other words, ``groupby`` is a refinement of ``sorted``
+
+ .. code-block:: python
+
+ for output_extra_params, grouped_params in groupby(sorted(io_params_iter, key = get_output_extras_str), key = get_output_extras):
+ pass
+
+ 2. ``print()`` is a function
+
+ .. code-block:: python
+
+ from __future__ import print_function
+
+ 3. ``items()`` only returns a list in python2. Rewrite ``dict.iteritems()`` whenever this might cause a performance bottleneck
+ 4. ``zip`` and ``map`` return iterators. Conditionally import in python2
+
+ .. code-block:: python
+
+ import sys
+ if sys.hexversion < 0x03000000:
+ from future_builtins import zip, map
+
+ 5. ``cPickle->pickle`` ``CStringIO->io`` need to be conditionally imported
+
+ .. code-block:: python
+
+ try:
+ import StringIO as io
+ except:
+ import io as io
+
+
+ 6. ``map`` code can be changed to list comprehensions. Use ``2to3`` to do heavy lifting
+
+ 7. All normal strings are unicode in python3. Have to use ``bytes`` to support 8-bit char arrays.
+ Normally, this means that ``str`` "just works". However, to provide special handling of
+ both 8-bit and unicode strings in python2, we often need to check for ``isinstance(xxx, basestring)``.
+
+ We need to conditionally define:
+
+ .. code-block:: python
+
+ if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ path_str_type = str
+ else:
+ path_str_type = basestring
+
+ # further down...
+ if isinstance(compiled_regex, path_str_type):
+ pass
+
+
+
+******************************************************************************
+Refactoring: parameter handling
+******************************************************************************
+
+ Though the code is still split in a not very sensible way between ``ruffus_utility.py``, ``file_name_parameters.py`` and ``task.py``,
+ some rationalisation has taken place, and comments added so further refactoring can be made more easily.
+
+ Common code for::
+
+ file_name_parameters.split_ex_param_factory()
+ file_name_parameters.transform_param_factory()
+ file_name_parameters.collate_param_factory()
+
+ has been moved to ``file_name_parameters.py.yield_io_params_per_job()``
+
+
+ unit tests added to ``test_file_name_parameters.py`` and ``test_ruffus_utility.py``
+
+
+
+
+******************************************************************************
+``formatter``
+******************************************************************************
+ ``get_all_paths_components(paths, regex_str)`` in ``ruffus_utility.py``
+
+ Input files names are first squished into a flat list of files.
+ ``get_all_paths_components()`` returns both the regular expression matches and the break down of the path.
+
+ In case of name clashes, the classes with higher priority override:
+
+ 1) Captures by name
+ 2) Captures by index
+ 3) Path components:
+ 'ext' = extension with dot
+ 'basename' = file name without extension
+ 'path' = path before basename, not ending with slash
+ 'subdir' = list of directories starting with the most nested and ending with the root (if normalised)
+ 'subpath' = list of 'path' with successive directories removed starting with the most nested and ending with the root (if normalised)
+
+ E.g. ``name = '/a/b/c/sample1.bam'``, ``formatter=r"(.*)(?P<id>\d+)\.(.+)")`` returns:
+
+ .. code-block:: python
+
+ 0: '/a/b/c/sample1.bam', // Entire match captured by index
+ 1: '/a/b/c/sample', // captured by index
+ 2: 'bam', // captured by index
+ 'id': '1' // captured by name
+ 'ext': '.bam',
+ 'subdir': ['c', 'b', 'a', '/'],
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'basename': 'sample1',
+
+
+ The code is in ``ruffus_utility.py``:
+
+ .. code-block:: python
+
+ results = get_all_paths_components(paths, regex_str)
+ string.format(results[2])
+
+
+ All the magic is hidden inside black boxes ``filename_transform`` classes:
+
+ .. code-block:: python
+
+
+ class t_suffix_filename_transform(t_filename_transform):
+ class t_regex_filename_transform(t_filename_transform):
+ class t_format_filename_transform(t_filename_transform):
+
+===================================================
+``formatter()``: ``regex()`` and ``suffix()``
+===================================================
+
+
+ The previous behaviour with regex() where mismatches fail even if no substitution is made is retained by the use of ``re.subn()``.
+ This is a corner case but I didn't want user code to break
+
+ .. code-block:: python
+
+ # filter on ".txt"
+ input_filenames = ["a.wrong", "b.txt"]
+ regex("(.txt)$")
+
+ # fails, no substitution possible
+ r"\1"
+
+ # fails anyway even through regular expression matches not referenced...
+ r"output.filename"
+
+
+************************************************************************************************************************************************************
+ at product()
+************************************************************************************************************************************************************
+
+ * Use combinatoric generators from itertools and keep that naming scheme
+ * Put all new generators in an ``combinatorics`` submodule namespace to avoid breaking user code. (They can imported if necessary.)
+ * test code in test/test_combinatorics.py
+ * The ``itertools.product(repeat)`` parameter doesn't make sense for Ruffus and will not be used
+ * Flexible number of pairs of ``task`` / ``glob`` / file names + ``formatter()``
+ * Only ``formatter([OPTIONAl_REGEX])`` provides the necessary flexibility to construct the output so we won't bother with suffix and regex
+
+ * Similar to ``@transform`` but with extra level of nested-ness
+
+ Retain same code for ``@product`` and ``@transform`` by adding an additional level of indirection:
+ * generator wrap around ``get_strings_in_nested_sequence`` to convert nested input parameters either to a single flat list of file names or to nested lists of file names
+
+ .. code-block:: python
+
+ file_name_parameters.input_param_to_file_name_list (input_params)
+ file_name_parameters.list_input_param_to_file_name_list (input_params)
+
+ * ``t_file_names_transform`` class which stores a list of regular expressions, one for each ``formatter()`` object corresponding to a single set of input parameters
+
+ .. code-block:: python
+
+ t_formatter_file_names_transform
+ t_nested_formatter_file_names_transform
+
+ * string substitution functions which will apply a list of ``formatter`` changes
+
+ .. code-block:: python
+
+ ruffus.utility.t_formatter_replace()
+ ruffus.utility.t_nested_formatter_replace()
+
+ * ``ruffus_uilility.swap_doubly_nested_order()`` makes the syntax / implementation very orthogonal
+
+************************************************************************************************************************************************************
+``@permutations(...),`` ``@combinations(...),`` ``@combinations_with_replacement(...)``
+************************************************************************************************************************************************************
+
+ Similar to ``@product`` extra level of nested-ness is self versus self
+
+ Retain same code for ``@product``
+ * forward to a sinble ``file_name_parameters.combinatorics_param_factory()``
+ * use ``combinatorics_type`` to dispatch to ``combinatorics.permutations``, ``combinatorics.combinations`` and ``combinatorics.combinations_with_replacement``
+ * use ``list_input_param_to_file_name_list`` from ``file_name_parameters.product_param_factory()``
+
+
+
+************************************************************************************************************************************************************
+drmaa alternatives
+************************************************************************************************************************************************************
+
+ Alternative, non-drmaa polling code at
+
+ https://github.com/bjpop/rubra/blob/master/rubra/cluster_job.py
+
+
+
+************************************************************************************************************************************************************
+Task completion monitoring
+************************************************************************************************************************************************************
+
+===================================================
+ How easy is it to abstract out the database?
+===================================================
+
+ * The database is Jacob Sondergaard's ``dbdict`` which is a nosql / key-value store wrapper around sqlite
+ .. code-block:: python
+
+ job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)
+
+ * The key is the output file name, so it is important not to confuse Ruffus by having different tasks generate the same output file!
+ * Is it possible to abstract this so that **jobs** get timestamped as well?
+ * If we should ever want to abstract out ``dbdict``, we need to have a similar key-value store class,
+ and make sure that a single instance of ``dbdict`` is used through ``pipeline_run`` which is passed up
+ and down the function call chain. ``dbdict`` would then be drop-in replaceable by our custom (e.g. flat-file-based) dbdict alternative.
+
+
+ To peek into the database:
+
+ .. code-block:: bash
+
+ $ sqlite3 .ruffus_history.sqlite
+ sqlite> .tables
+ data
+ sqlite> .schema data
+ CREATE TABLE data (key PRIMARY KEY,value);
+ sqlite> select key from data order by key;
+
+======================================================================================================
+ Can we query the database, get Job history / stats?
+======================================================================================================
+
+ Yes, if we write a function to read and dump the entire database but this is only useful with timestamps and task names. See below
+
+======================================================================================================
+ What are the run time performance implications?
+======================================================================================================
+
+ Should be fast: a single db connection is created and used inside ``pipeline_run``, ``pipeline_printout``, ``pipeline_printout_graph``
+
+
+
+===================================================
+ Avoid pauses between tasks
+===================================================
+
+ Allows Ruffus to avoid adding an extra 1 second pause between tasks to guard against file systems with low timestamp granularity.
+
+ * If the local file time looks to be in sync with the underlying file system, saved system time is used instead of file timestamps
+
+
+
+
+******************************************************************************************
+``@mkdir(...),``
+******************************************************************************************
+
+ * ``mkdir`` continues to work seamlessly inside ``@follows``) but also as its own decorator ``@mkdir`` due to the original happy orthogonal design
+ * fixed bug in checking so that Ruffus does't blow up if non strings are in the output (number...)
+ * note: adding the decorator to a previously undecorated function might have unintended consequences. The undecorated function turns into a zombie.
+ * fixed ugly bug in ``pipeline_printout`` for printing single line output
+ * fixed description and printout indent
+
+
diff --git a/doc/_build/html/_sources/installation.txt b/doc/_build/html/_sources/installation.txt
new file mode 100644
index 0000000..f3ca116
--- /dev/null
+++ b/doc/_build/html/_sources/installation.txt
@@ -0,0 +1,79 @@
+.. include:: global.inc
+.. _Installation:
+
+************************************
+Installation
+************************************
+
+:mod:`Ruffus` is a lightweight python module for building computational pipelines.
+
+
+The easy way
+============
+
+ *Ruffus* is available as an
+ `easy-install <http://peak.telecommunity.com/DevCenter/EasyInstall>`_ -able package
+ on the `Python Package Index <http://pypi.python.org/pypi/Sphinx>`_.
+
+ ::
+
+ sudo pip install ruffus --upgrade
+
+ This may also work for older installations
+
+ #) Install setuptools::
+
+ wget peak.telecommunity.com/dist/ez_setup.py
+ sudo python ez_setup.py
+
+ #) Install *Ruffus* automatically::
+
+ easy_install -U ruffus
+
+
+The most up-to-date code:
+==============================
+ * `Download the latest sources <https://pypi.python.org/pypi/ruffus>`_ or
+
+ * Check out the latest code from Google using git::
+
+ git clone https://bunbun68@code.google.com/p/ruffus/ .
+
+ * Bleeding edge Ruffus development takes place on github::
+
+ git clone git at github.com:bunbun/ruffus.git .
+
+
+ * To install after downloading, change to the , type::
+
+ python ./setup.py install
+
+
+======================
+Graphical flowcharts
+======================
+
+ **Ruffus** relies on the ``dot`` programme from `Graphviz <http://www.graphviz.org/>`_
+ ("Graph visualisation") to make pretty flowchart representations of your pipelines in multiple
+ graphical formats (e.g. ``png``, ``jpg``). The crossplatform Graphviz package can be
+ `downloaded here <http://www.graphviz.org/Download.php>`_ for Windows,
+ Linux, Macs and Solaris. Some Linux
+ distributions may include prebuilt packages.
+
+ For Fedora, try
+ ::
+
+ yum list 'graphviz*'
+
+ For ubuntu / Debian, try
+ ::
+
+ sudo apt-get install graphviz
+
+
+
+
+
+
+
+
diff --git a/doc/_build/html/_sources/pipeline_functions.txt b/doc/_build/html/_sources/pipeline_functions.txt
new file mode 100644
index 0000000..fd267fb
--- /dev/null
+++ b/doc/_build/html/_sources/pipeline_functions.txt
@@ -0,0 +1,689 @@
+.. include:: global.inc
+.. _pipeline_functions:
+
+See :ref:`Decorators <decorators>` for more decorators
+
+.. |pipeline_run| replace:: `pipeline_run`
+.. _pipeline_run: `pipeline_functions.pipeline_run`_
+.. |pipeline_printout| replace:: `pipeline_printout`
+.. _pipeline_printout: `pipeline_functions.pipeline_printout`_
+.. |pipeline_printout_graph| replace:: `pipeline_printout_graph`
+.. _pipeline_printout_graph: `pipeline_functions.pipeline_printout_graph`_
+.. |pipeline_get_task_names| replace:: `pipeline_get_task_names`
+.. _pipeline_get_task_names: `pipeline_functions.pipeline_get_task_names`_
+
+
+.. |pr_target_tasks| replace:: `target_tasks`
+.. _pr_target_tasks: `pipeline_functions.pipeline_run.target_tasks`_
+.. |pr_forcedtorun_tasks| replace:: `forcedtorun_tasks`
+.. _pr_forcedtorun_tasks: `pipeline_functions.pipeline_run.forcedtorun_tasks`_
+.. |pr_multiprocess| replace:: `multiprocess`
+.. _pr_multiprocess: `pipeline_functions.pipeline_run.multiprocess`_
+.. |pr_logger| replace:: `logger`
+.. _pr_logger: `pipeline_functions.pipeline_run.logger`_
+.. |pr_gnu_make| replace:: `gnu_make_maximal_rebuild_mode`
+.. _pr_gnu_make: `pipeline_functions.pipeline_run.gnu_make`_
+.. |pr_verbose| replace:: `verbose`
+.. _pr_verbose: `pipeline_functions.pipeline_run.verbose`_
+.. |pr_runtime_data| replace:: `runtime_data`
+.. _pr_runtime_data: `pipeline_functions.pipeline_run.runtime_data`_
+.. |pr_one_second_per_job| replace:: `one_second_per_job`
+.. _pr_one_second_per_job: `pipeline_functions.pipeline_run.one_second_per_job`_
+.. |pr_touch_files_only| replace:: `touch_files_only`
+.. _pr_touch_files_only: `pipeline_functions.pipeline_run.touch_files_only`_
+
+.. |pr_exceptions_terminate_immediately| replace:: `exceptions_terminate_immediately`
+.. _pr_exceptions_terminate_immediately: `pipeline_functions.pipeline_run.exceptions_terminate_immediately`_
+.. |pr_log_exceptions| replace:: `log_exceptions`
+.. _pr_log_exceptions: `pipeline_functions.pipeline_run.log_exceptions`_
+.. |pr_multithread| replace:: `multithread`
+.. _pr_multithread: `pipeline_functions.pipeline_run.multithread`_
+.. |pr_checksum_level| replace:: `checksum_level`
+.. _pr_checksum_level: `pipeline_functions.pipeline_run.checksum_level`_
+.. |pr_history_file| replace:: `history_file`
+.. _pr_history_file: `pipeline_functions.pipeline_run.history_file`_
+.. |pr_verbose_abbreviated_path| replace:: `verbose_abbreviated_path`
+.. _pr_verbose_abbreviated_path: `pipeline_functions.pipeline_run.verbose_abbreviated_path`_
+
+
+.. |pp_output_stream| replace:: `output_stream`
+.. _pp_output_stream: `pipeline_functions.pipeline_printout.output_stream`_
+.. |pp_target_tasks| replace:: `target_tasks`
+.. _pp_target_tasks: `pipeline_functions.pipeline_printout.target_tasks`_
+.. |pp_forcedtorun_tasks| replace:: `forcedtorun_tasks`
+.. _pp_forcedtorun_tasks: `pipeline_functions.pipeline_printout.forcedtorun_tasks`_
+.. |pp_verbose| replace:: `verbose`
+.. _pp_verbose: `pipeline_functions.pipeline_printout.verbose`_
+.. |pp_indent| replace:: `indent`
+.. _pp_indent: `pipeline_functions.pipeline_printout.indent`_
+.. |pp_wrap_width| replace:: `wrap_width`
+.. _pp_wrap_width: `pipeline_functions.pipeline_printout.wrap_width`_
+.. |pp_gnu_make| replace:: `gnu_make_maximal_rebuild_mode`
+.. _pp_gnu_make: `pipeline_functions.pipeline_printout.gnu_make`_
+.. |pp_runtime_data| replace:: `runtime_data`
+.. _pp_runtime_data: `pipeline_functions.pipeline_printout.runtime_data`_
+.. |pp_checksum_level| replace:: `checksum_level`
+.. _pp_checksum_level: `pipeline_functions.pipeline_printout.checksum_level`_
+.. |pp_history_file| replace:: `history_file`
+.. _pp_history_file: `pipeline_functions.pipeline_printout.history_file`_
+.. |pp_verbose_abbreviated_path| replace:: `verbose_abbreviated_path`
+.. _pp_verbose_abbreviated_path: `pipeline_functions.pipeline_printout.verbose_abbreviated_path`_
+
+
+
+.. |ppg_stream| replace:: `stream`
+.. _ppg_stream: `pipeline_functions.pipeline_printout_graph.stream`_
+.. |ppg_output_format| replace:: `output_format`
+.. _ppg_output_format: `pipeline_functions.pipeline_printout_graph.output_format`_
+.. |ppg_target_tasks| replace:: `target_tasks`
+.. _ppg_target_tasks: `pipeline_functions.pipeline_printout_graph.target_tasks`_
+.. |ppg_forcedtorun_tasks| replace:: `forcedtorun_tasks`
+.. _ppg_forcedtorun_tasks: `pipeline_functions.pipeline_printout_graph.forcedtorun_tasks`_
+.. |ppg_draw_vertically| replace:: `draw_vertically`
+.. _ppg_draw_vertically: `pipeline_functions.pipeline_printout_graph.draw_vertically`_
+.. |ppg_ignore_upstream_of_target| replace:: `ignore_upstream_of_target`
+.. _ppg_ignore_upstream_of_target: `pipeline_functions.pipeline_printout_graph.ignore_upstream_of_target`_
+.. |ppg_skip_uptodate_tasks| replace:: `skip_uptodate_tasks`
+.. _ppg_skip_uptodate_tasks: `pipeline_functions.pipeline_printout_graph.skip_uptodate_tasks`_
+.. |ppg_gnu_make| replace:: `gnu_make_maximal_rebuild_mode`
+.. _ppg_gnu_make: `pipeline_functions.pipeline_printout_graph.gnu_make`_
+.. |ppg_test_all_task_for_update| replace:: `test_all_task_for_update`
+.. _ppg_test_all_task_for_update: `pipeline_functions.pipeline_printout_graph.test_all_task_for_update`_
+.. |ppg_no_key_legend| replace:: `no_key_legend`
+.. _ppg_no_key_legend: `pipeline_functions.pipeline_printout_graph.no_key_legend`_
+.. |ppg_minimal_key_legend| replace:: `minimal_key_legend`
+.. _ppg_minimal_key_legend: `pipeline_functions.pipeline_printout_graph.minimal_key_legend`_
+.. |ppg_pipeline_name| replace:: `pipeline_name`
+.. _ppg_pipeline_name: `pipeline_functions.pipeline_printout_graph.pipeline_name`_
+.. |ppg_user_colour_scheme| replace:: `user_colour_scheme`
+.. _ppg_user_colour_scheme: `pipeline_functions.pipeline_printout_graph.user_colour_scheme`_
+.. |ppg_size| replace:: `size`
+.. _ppg_size: `pipeline_functions.pipeline_printout_graph.size`_
+.. |ppg_dpi| replace:: `dpi`
+.. _ppg_dpi: `pipeline_functions.pipeline_printout_graph.dpi`_
+.. |ppg_runtime_data| replace:: `runtime_data`
+.. _ppg_runtime_data: `pipeline_functions.pipeline_printout_graph.runtime_data`_
+.. |ppg_checksum_level| replace:: `checksum_level`
+.. _ppg_checksum_level: `pipeline_functions.pipeline_printout_graph.checksum_level`_
+.. |ppg_history_file| replace:: `history_file`
+.. _ppg_history_file: `pipeline_functions.pipeline_printout_graph.history_file`_
+
+
+
+
+
+
+
+
+
+
+################################################
+Pipeline functions
+################################################
+
+ There are only four functions for **Ruffus** pipelines:
+
+ * |pipeline_run|_ executes a pipeline
+ * |pipeline_printout|_ prints a list of tasks and jobs which will be run in a pipeline
+ * |pipeline_printout_graph|_ prints a schematic flowchart of pipeline tasks in various graphical formats
+ * |pipeline_get_task_names|_ returns a list of all task names in the pipeline
+
+.. _pipeline_functions.pipeline_run:
+
+.. index::
+ single: pipeline functions; pipeline_run
+ pair: pipeline_run; Run pipeline
+
+**************************************************************************************************************************************************************************************
+*pipeline_run*
+**************************************************************************************************************************************************************************************
+**pipeline_run** ( |pr_target_tasks|_ = [], |pr_forcedtorun_tasks|_ = [], |pr_multiprocess|_ = 1, |pr_logger|_ = stderr_logger, |pr_gnu_make|_ = True, |pr_verbose|_ =1, |pr_runtime_data|_ = None, |pr_one_second_per_job|_ = True, |pr_touch_files_only|_ = False, |pr_exceptions_terminate_immediately|_ = None, |pr_log_exceptions|_ = None, |pr_history_file|_ = None, |pr_checksum_level|_ = None, |pr_multithread|_ = 0, |pr_verbose_abbreviated_path|_ = None)
+
+ **Purpose:**
+
+ Runs all specified pipelined functions if they or any antecedent tasks are
+ incomplete or out-of-date.
+
+ **Example**:
+
+ .. code-block:: python
+
+ #
+ # Run task2 whatever its state, and also task1 and antecedents if they are incomplete
+ # Do not log pipeline progress messages to stderr
+ #
+ pipeline_run([task1, task2], forcedtorun_tasks = [task2], logger = blackhole_logger)
+
+ **Parameters:**
+
+
+
+.. _pipeline_functions.pipeline_run.target_tasks:
+
+ * *target_tasks*
+ Pipeline functions and any necessary antecedents (specified implicitly or with :ref:`@follows <decorators.follows>`)
+ which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+.. _pipeline_functions.pipeline_run.forcedtorun_tasks:
+
+ * *forcedtorun_tasks*
+ Optional. These pipeline functions will be invoked regardless of their state.
+ Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+.. _pipeline_functions.pipeline_run.multiprocess:
+
+ * *multiprocess*
+ Optional. The number of processes which should be dedicated to running in parallel independent
+ tasks and jobs within each task. If ``multiprocess`` is set to 1, the pipeline will
+ execute in the main process.
+
+.. _pipeline_functions.pipeline_run.multithread:
+
+ * *multithread*
+ Optional. The number of threads which should be dedicated to running in parallel independent
+ tasks and jobs within each task. Should be used only with drmaa. Otherwise the CPython `global interpreter lock (GIL) <https://wiki.python.org/moin/GlobalInterpreterLock>`__
+ will slow down your pipeline
+
+.. _pipeline_functions.pipeline_run.logger:
+
+ * *logger*
+ For logging messages indicating the progress of the pipeline in terms of tasks and jobs.
+ Defaults to outputting to sys.stderr.
+ Setting ``logger=blackhole_logger`` will prevent any logging output.
+
+.. _pipeline_functions.pipeline_run.gnu_make:
+
+ * *gnu_make_maximal_rebuild_mode*
+ .. warning ::
+ This is a dangerous option. Use rarely and with caution
+
+ Optional parameter governing how **Ruffus** determines which part of the pipeline is
+ out of date and needs to be re-run. If set to ``False``, **ruffus** will work back
+ from the ``target_tasks`` and only execute the pipeline after the first up-to-date
+ tasks that it encounters. For example, if there are four tasks:
+
+ ::
+
+ #
+ # task1 -> task2 -> task3 -> task4 -> task5
+ #
+ target_tasks = [task5]
+
+ If ``task3()`` is up-to-date, then only ``task4()`` and ``task5()`` will be run.
+ This will be the case even if ``task2()`` and ``task1()`` are incomplete.
+
+ This allows you to remove all intermediate results produced by ``task1 -> task3``.
+
+
+
+.. _pipeline_functions.pipeline_run.verbose:
+
+ * *verbose*
+ Optional parameter indicating the verbosity of the messages sent to ``logger``:
+ (Defaults to level 1 if unspecified)
+
+ * level **0** : *nothing*
+ * level **1** : *Out-of-date Task names*
+ * level **2** : *All Tasks (including any task function docstrings)*
+ * level **3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * level **4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * level **5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * level **6** : *All jobs in All Tasks whether out of date or not*
+ * level **10**: *logs messages useful only for debugging ruffus pipeline code*
+
+
+ ``verbose >= 10`` are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+.. _pipeline_functions.pipeline_run.runtime_data:
+
+ * *runtime_data*
+ Experimental feature for passing data to tasks at run time
+
+.. _pipeline_functions.pipeline_run.one_second_per_job:
+
+ * *one_second_per_job*
+ To work around poor file timepstamp resolution for some file systems.
+ Defaults to True if checksum_level is 0 forcing Tasks to take a minimum of 1 second to complete.
+ If your file system has coarse grained time stamps, you can turn on this delay
+ by setting *one_second_per_job* to ``True``
+
+.. _pipeline_functions.pipeline_run.touch_files_only:
+
+ * *touch_files_only*
+ Create or update output files only to simulate the running of the pipeline.
+ Does not invoke real task functions to run jobs. This is most useful to force a
+ pipeline to acknowledge that a particular part is now up-to-date.
+
+ This will not work properly if the identities of some files are not known before hand,
+ and depend on run time. In other words, not recommended if ``@split`` or custom parameter generators are being used.
+
+
+
+.. _pipeline_functions.pipeline_run.exceptions_terminate_immediately:
+
+ * *exceptions_terminate_immediately*
+ Exceptions cause immediate termination of the pipeline.
+
+
+.. _pipeline_functions.pipeline_run.log_exceptions:
+
+ * *log_exceptions*
+ Print exceptions to the logger as soon as they occur.
+
+
+.. _pipeline_functions.pipeline_run.history_file:
+
+ * *history_file*
+ The database file which stores checksums and file timestamps for input/output files.
+ Defaults to ``.ruffus_history.sqlite`` if unspecified
+
+.. _pipeline_functions.pipeline_run.checksum_level:
+
+ * *checksum_level*
+ Several options for checking up-to-dateness are available: Default is level 1.
+
+ * level 0 : Use only file timestamps
+ * level 1 : above, plus timestamp of successful job completion
+ * level 2 : above, plus a checksum of the pipeline function body
+ * level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+.. _pipeline_functions.pipeline_run.verbose_abbreviated_path:
+
+ * *verbose_abbreviated_path*
+ Whether input and output paths are abbreviated. Defaults to 2 if unspecified
+
+ * level 0: The full (expanded, abspath) input or output path
+ * level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with ``[,,,]/``
+ * level < 0: Input / Output parameters are truncated to ``MMM`` letters where ``verbose_abbreviated_path ==-MMM``. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by ``<???>``
+
+
+
+
+
+
+
+
+.. _pipeline_functions.pipeline_printout:
+
+.. index::
+ single: pipeline functions; pipeline_run
+ pair: pipeline_printout; Printout simulated run of the pipeline
+
+**********************************************************************************************************************************************************************************************************
+*pipeline_printout*
+**********************************************************************************************************************************************************************************************************
+**pipeline_printout** (|pp_output_stream|_ = sys.stdout, |pp_target_tasks|_ = [], |pp_forcedtorun_tasks|_ = [], |pp_verbose|_ = 1, |pp_indent|_ = 4, |pp_gnu_make|_ = True, |pp_wrap_width|_ = 100, |pp_runtime_data|_ = None, |pp_checksum_level|_ = None, |pp_history_file|_ = None, |pr_verbose_abbreviated_path|_ = None)
+
+ **Purpose:**
+
+ Prints out all the pipelined functions which will be invoked given specified ``target_tasks``
+ without actually running the pipeline. Because this is a simulation, some of the job
+ parameters may be incorrect. For example, the results of a :ref:`@split<new_manual.split>`
+ operation is not predetermined and will only be known after the pipelined function
+ splits up the original data. Parameters of all downstream pipelined functions will
+ be changed depending on this initial operation.
+
+ **Example**:
+ ::
+
+ #
+ # Simulate running task2 whatever its state, and also task1 and antecedents
+ # if they are incomplete
+ # Print out results to STDOUT
+ #
+ pipeline_printout(sys.stdout, [task1, task2], forcedtorun_tasks = [task2], verbose = 1)
+
+ **Parameters:**
+
+.. _pipeline_functions.pipeline_printout.output_stream:
+
+ * *output_stream*
+ Where to printout the results of simulating the running of the pipeline.
+
+.. _pipeline_functions.pipeline_printout.target_tasks:
+
+ * *target_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`: Pipeline functions and any necessary antecedents (specified implicitly or with :ref:`@follows <decorators.follows>`)
+ which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+
+.. _pipeline_functions.pipeline_printout.forcedtorun_tasks:
+
+ * *forcedtorun_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:These pipeline functions will be invoked regardless of their state.
+ Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+
+.. _pipeline_functions.pipeline_printout.verbose:
+
+ * *verbose*
+ Optional parameter indicating the verbosity of the messages sent to ``logger``:
+ (Defaults to level 4 if unspecified)
+
+ * level **0** : *nothing*
+ * level **1** : *Out-of-date Task names*
+ * level **2** : *All Tasks (including any task function docstrings)*
+ * level **3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * level **4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * level **5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * level **6** : *All jobs in All Tasks whether out of date or not*
+ * level **10**: *logs messages useful only for debugging ruffus pipeline code*
+
+
+ ``verbose >= 10`` are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+.. _pipeline_functions.pipeline_printout.indent:
+
+ * *indent*
+ Optional parameter governing the indentation when printing out the component job
+ parameters of each task function.
+
+
+.. _pipeline_functions.pipeline_printout.gnu_make:
+
+ * *gnu_make_maximal_rebuild_mode*
+ .. warning ::
+ This is a dangerous option. Use rarely and with caution
+
+ See explanation in :ref:`pipeline_run <pipeline_functions.pipeline_run.gnu_make>`.
+
+.. _pipeline_functions.pipeline_printout.wrap_width:
+
+ * *wrap_width*
+ Optional parameter governing the length of each line before it starts wrapping
+ around.
+
+
+.. _pipeline_functions.pipeline_printout.runtime_data:
+
+ * *runtime_data*
+ Experimental feature for passing data to tasks at run time
+
+
+.. _pipeline_functions.pipeline_printout.history_file:
+
+ * *history_file*
+ The database file which stores checksums and file timestamps for input/output files.
+ Defaults to ``.ruffus_history.sqlite`` if unspecified
+
+.. _pipeline_functions.pipeline_printout.checksum_level:
+
+ * *checksum_level*
+ Several options for checking up-to-dateness are available: Default is level 1.
+
+ * level 0 : Use only file timestamps
+ * level 1 : above, plus timestamp of successful job completion
+ * level 2 : above, plus a checksum of the pipeline function body
+ * level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+
+.. _pipeline_functions.pipeline_printout.verbose_abbreviated_path:
+
+ * *verbose_abbreviated_path*
+ Whether input and output paths are abbreviated. Defaults to 2 if unspecified
+
+ * level 0: The full (expanded, abspath) input or output path
+ * level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with ``[,,,]/``
+ * level < 0: Input / Output parameters are truncated to ``MMM`` letters where ``verbose_abbreviated_path ==-MMM``. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by ``<???>``
+
+
+.. _pipeline_functions.pipeline_printout_graph:
+
+.. index::
+ single: pipeline functions; pipeline_printout_graph
+ pair: pipeline_printout_graph; print flowchart representation of pipeline functions
+
+
+
+
+
+************************************************************************************************************************************************************************************************************************************************************************************
+*pipeline_printout_graph*
+************************************************************************************************************************************************************************************************************************************************************************************
+
+**pipeline_printout_graph** (|ppg_stream|_, |ppg_output_format|_ = None, |ppg_target_tasks|_ = [], |ppg_forcedtorun_tasks|_ = [], |ppg_ignore_upstream_of_target|_ = False, |ppg_skip_uptodate_tasks|_ = False, |ppg_gnu_make|_ = True, |ppg_test_all_task_for_update|_ = True, |ppg_no_key_legend|_ = False, |ppg_minimal_key_legend|_ = True, |ppg_user_colour_scheme|_ = None, |ppg_pipeline_name|_ = "Pipeline", |ppg_size|_ = (11,8), |ppg_dpi|_ = 120, |ppg_runtime_data|_ = None, |ppg_checksum_leve [...]
+
+ **Purpose:**
+
+ Prints out flowchart of all the pipelined functions which will be invoked given specified ``target_tasks``
+ without actually running the pipeline.
+
+ See :ref:`Flowchart colours <new_manual.flowchart_colours>`
+
+ **Example**:
+ ::
+
+ pipeline_printout_graph("flowchart.jpg", "jpg", [task1, task16],
+ forcedtorun_tasks = [task2],
+ no_key_legend = True)
+
+ **Customising appearance:**
+
+ The :ref:`user_colour_scheme <pipeline_functions.pipeline_printout_graph.user_colour_scheme>` parameter can be used to change
+ flowchart colours. This allows the default :ref:`Colour Schemes <new_manual.flowchart_colours>`
+ to be set. An example of customising flowchart appearance is available :ref:`(see code) <new_manual.flowchart_colours.code>` .
+
+
+
+
+ **Parameters:**
+
+.. _pipeline_functions.pipeline_printout_graph.stream:
+
+ * *stream*
+ The file or file-like object to which the flowchart should be printed.
+ If a string is provided, it is assumed that this is the name of the output file
+ which will be opened automatically.
+
+
+.. _pipeline_functions.pipeline_printout_graph.output_format:
+
+ * *output_format*
+ If missing, defaults to the extension of the *stream* file name (i.e. ``jpg`` for ``a.jpg``)
+
+ | If the programme ``dot`` can be found on the executio path, this
+ can be any number of `formats <http://www.graphviz.org/doc/info/output.html>`_
+ supported by `Graphviz <http://www.graphviz.org/>`_, including, for example,
+ ``jpg``, ``png``, ``pdf``, ``svg`` etc.
+ | Otherwise, **ruffus** will only output without error in the `dot <http://en.wikipedia.org/wiki/DOT_language>`_ format, which
+ is a plain-text graph description language.
+
+.. _pipeline_functions.pipeline_printout_graph.target_tasks:
+
+ * *target_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`: Pipeline functions and any necessary antecedents (specified implicitly or with :ref:`@follows <decorators.follows>`)
+ which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+
+.. _pipeline_functions.pipeline_printout_graph.forcedtorun_tasks:
+
+ * *forcedtorun_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:These pipeline functions will be invoked regardless of their state.
+ Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+.. _pipeline_functions.pipeline_printout_graph.draw_vertically:
+
+ * *draw_vertically*
+ Draw flowchart in vertical orientation
+
+.. _pipeline_functions.pipeline_printout_graph.ignore_upstream_of_target:
+
+ * *ignore_upstream_of_target*
+ Start drawing flowchart from specified target tasks. Do not draw tasks which are
+ downstream (subsequent) to the targets.
+
+.. _pipeline_functions.pipeline_printout_graph.skip_uptodate_tasks:
+
+ * *ignore_upstream_of_target*
+ Do not draw up-to-date / completed tasks in the flowchart unless they are
+ lie on the execution path of the pipeline.
+
+.. _pipeline_functions.pipeline_printout_graph.gnu_make:
+
+ * *gnu_make_maximal_rebuild_mode*
+ .. warning ::
+ This is a dangerous option. Use rarely and with caution
+
+ See explanation in :ref:`pipeline_run <pipeline_functions.pipeline_run.gnu_make>`.
+
+.. _pipeline_functions.pipeline_printout_graph.test_all_task_for_update:
+
+ * *test_all_task_for_update*
+ | Indicates whether intermediate tasks are out of date or not. Normally **Ruffus** will
+ stop checking dependent tasks for completion or whether they are out-of-date once it has
+ discovered the maximal extent of the pipeline which has to be run.
+ | For displaying the flow of the pipeline, this is hardly very informative.
+
+.. _pipeline_functions.pipeline_printout_graph.no_key_legend:
+
+ * *no_key_legend*
+ Do not include key legend explaining the colour scheme of the flowchart.
+
+
+.. _pipeline_functions.pipeline_printout_graph.minimal_key_legend:
+
+ * *minimal_key_legend*
+ Do not include unused task types in key legend.
+
+.. _pipeline_functions.pipeline_printout_graph.user_colour_scheme:
+
+ * *user_colour_scheme*
+ Dictionary specifying colour scheme for flowchart
+
+ See complete :ref:`list of Colour Schemes <new_manual.flowchart_colours>`.
+
+ | Colours can be names e.g. ``"black"`` or quoted hex e.g. ``'"#F6F4F4"'`` (note extra quotes)
+ | Default values will be used unless specified
+
+ .. csv-table::
+ :header: "key", "Subkey", ""
+
+ "
+ - ``'colour_scheme_index'`` ", "| index of default colour scheme,
+ | 0-7, defaults to 0 unless specified", ""
+ "
+ - ``'Final target'``
+ - ``'Explicitly specified task'``
+ - ``'Task to run'``
+ - ``'Down stream'``
+ - ``'Up-to-date Final target'``
+ - ``'Up-to-date task forced to rerun'``
+ - ``'Up-to-date task'``
+ - ``'Vicious cycle'``
+ ","
+ - ``'fillcolor'``
+ - ``'fontcolor'``
+ - ``'color'``
+ - ``'dashed'`` = ``0/1``
+ ", "Colours / attributes for each task type"
+ "
+ - ``'Vicious cycle'``
+ - ``'Task to run'``
+ - ``'Up-to-date'``", "- ``'linecolor'``", "Colours for arrows between tasks"
+ "- ``'Pipeline'``", "- ``'fontcolor'``","Flowchart title colour"
+ "- ``'Key'``", "
+ - ``'fontcolor'``
+ - ``'fillcolor'``", "Legend colours"
+
+ Example:
+
+ Use colour scheme index = 1
+ ::
+
+ pipeline_printout_graph ("flowchart.svg", "svg", [final_task],
+ user_colour_scheme = {
+ "colour_scheme_index" :1,
+ "Pipeline" :{"fontcolor" : '"#FF3232"' },
+ "Key" :{"fontcolor" : "Red",
+ "fillcolor" : '"#F6F4F4"' },
+ "Task to run" :{"linecolor" : '"#0044A0"' },
+ "Final target" :{"fillcolor" : '"#EFA03B"',
+ "fontcolor" : "black",
+ "dashed" : 0 }
+ })
+
+
+
+
+.. _pipeline_functions.pipeline_printout_graph.pipeline_name:
+
+ * *pipeline_name*
+ Specify title for flowchart
+
+.. _pipeline_functions.pipeline_printout_graph.size:
+
+ * *size*
+ Size in inches for flowchart
+
+.. _pipeline_functions.pipeline_printout_graph.dpi:
+
+ * *dpi*
+ Resolution in dots per inch. Ignored for svg output
+
+.. _pipeline_functions.pipeline_printout_graph.runtime_data:
+
+ * *runtime_data*
+ Experimental feature for passing data to tasks at run time
+
+.. _pipeline_functions.pipeline_printout_graph.history_file:
+
+ * *history_file*
+ The database file which stores checksums and file timestamps for input/output files.
+ Defaults to ``.ruffus_history.sqlite`` if unspecified
+
+.. _pipeline_functions.pipeline_printout_graph.checksum_level:
+
+ * *checksum_level*
+ Several options for checking up-to-dateness are available: Default is level 1.
+
+ * level 0 : Use only file timestamps
+ * level 1 : above, plus timestamp of successful job completion
+ * level 2 : above, plus a checksum of the pipeline function body
+ * level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+
+
+.. _pipeline_functions.pipeline_get_task_names:
+
+.. index::
+ single: pipeline functions; pipeline_get_task_names
+ pair: pipeline_get_task_names; print list of task names without running the pipeline
+
+
+**************************************************************************************************************************************************************************************
+*pipeline_get_task_names*
+**************************************************************************************************************************************************************************************
+**pipeline_get_task_names** ()
+
+ **Purpose:**
+
+ Returns a list of all task names in the pipeline without running the pipeline or checking to see if the tasks are connected correctly
+
+ **Example**:
+
+ Given:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate([])
+ def create_data(output_files):
+ pass
+
+ @transform(create_data, suffix(".txt"), ".task1")
+ def task1(input_files, output_files):
+ pass
+
+ @transform(task1, suffix(".task1"), ".task2")
+ def task2(input_files, output_files):
+ pass
+
+ Produces a list of three task names:
+
+ .. code-block:: pycon
+
+ >>> pipeline_get_task_names ()
+ ['create_data', 'task1', 'task2']
diff --git a/doc/_build/html/_sources/proxy_logger.txt b/doc/_build/html/_sources/proxy_logger.txt
new file mode 100644
index 0000000..1e11a28
--- /dev/null
+++ b/doc/_build/html/_sources/proxy_logger.txt
@@ -0,0 +1,24 @@
+.. include:: global.inc
+#####################
+ruffus.proxy_logger
+#####################
+
+.. _proxy-logger:
+
+.. automodule:: ruffus.proxy_logger
+ :undoc-members:
+
+
+===========================
+Proxies for a log:
+===========================
+
+.. autofunction:: make_shared_logger_and_proxy
+
+===========================
+Create a logging object
+===========================
+
+
+.. autofunction:: setup_std_shared_logger
+
diff --git a/doc/_build/html/_sources/recipes.txt b/doc/_build/html/_sources/recipes.txt
new file mode 100644
index 0000000..b31b8df
--- /dev/null
+++ b/doc/_build/html/_sources/recipes.txt
@@ -0,0 +1,9 @@
+.. include:: global.inc
+************
+Recipes
+************
+
+^^^^^^^^^^^^^^^^^
+General
+^^^^^^^^^^^^^^^^^
+
diff --git a/doc/_build/html/_sources/refactoring_ruffus_notes.txt b/doc/_build/html/_sources/refactoring_ruffus_notes.txt
new file mode 100644
index 0000000..0148709
--- /dev/null
+++ b/doc/_build/html/_sources/refactoring_ruffus_notes.txt
@@ -0,0 +1,5 @@
+
+ Remember to cite Jake Biesinger and see if he is interested to be a co-author if we ever resubmit the drastically changed version...
+
+
+
diff --git a/doc/_build/html/_sources/task.txt b/doc/_build/html/_sources/task.txt
new file mode 100644
index 0000000..c84bee1
--- /dev/null
+++ b/doc/_build/html/_sources/task.txt
@@ -0,0 +1,138 @@
+.. include:: global.inc
+#####################
+ruffus.Task
+#####################
+
+.. automodule:: ruffus.task
+ :undoc-members:
+ :noindex:
+
+***************************************
+Decorators
+***************************************
+ Basic Task decorators are:
+
+ :ref:`@follows() <decorators.follows>`
+
+ and
+
+ :ref:`@files() <decorators.files>`
+
+ Task decorators include:
+
+ :ref:`@split() <decorators.files>`
+
+ :ref:`@transform() <decorators.files>`
+
+ :ref:`@merge() <decorators.files>`
+
+ :ref:`@posttask() <decorators.posttask>`
+
+ More advanced users may require:
+
+ :ref:`@transform() <decorators.transform_ex>`
+
+ :ref:`@collate() <decorators.collate>`
+
+ :ref:`@parallel() <decorators.parallel>`
+
+ :ref:`@check_if_uptodate() <decorators.check_if_uptodate>`
+
+ :ref:`@files_re() <decorators.files_re>`
+
+
+***************************************
+Pipeline functions
+***************************************
+========================
+pipeline_run
+========================
+.. autofunction:: pipeline_run (target_tasks, forcedtorun_tasks=[], multiprocess=1, logger=stderr_logger, gnu_make_maximal_rebuild_mode=True)
+
+========================
+pipeline_printout
+========================
+.. autofunction:: pipeline_printout
+
+========================
+pipeline_printout_graph
+========================
+.. autofunction:: pipeline_printout_graph
+
+
+.. ???
+
+
+***************************************
+Logging
+***************************************
+.. autoclass:: t_black_hole_logger
+.. autoclass:: t_stderr_logger
+
+.. ???
+
+
+***************************************
+Implementation:
+***************************************
+=================================
+Parameter factories:
+=================================
+.. autofunction:: merge_param_factory
+.. autofunction:: collate_param_factory
+.. autofunction:: transform_param_factory
+.. autofunction:: files_param_factory
+.. autofunction:: args_param_factory
+.. autofunction:: split_param_factory
+
+.. ???
+
+
+=================================
+Wrappers around jobs:
+=================================
+.. autofunction:: job_wrapper_generic
+.. autofunction:: job_wrapper_io_files
+.. autofunction:: job_wrapper_mkdir
+
+.. ???
+
+
+
+
+=================================
+Checking if job is update:
+=================================
+.. autofunction:: needs_update_check_modify_time
+.. autofunction:: needs_update_check_directory_missing
+
+.. ???
+
+
+***************************************
+Exceptions and Errors
+***************************************
+.. autoclass::task_FilesArgumentsError
+.. autoclass::task_FilesreArgumentsError
+.. autoclass::JobSignalledBreak
+.. autoclass::MissingInputFileError
+.. autoclass::PostTaskArgumentError
+.. autoclass::error_making_directory
+.. autoclass::error_duplicate_task_name
+.. autoclass::error_decorator_args
+.. autoclass::error_task_name_lookup_failed
+.. autoclass::error_task_decorator_takes_no_args
+.. autoclass::error_function_is_not_a_task
+.. autoclass::error_circular_dependencies
+.. autoclass::error_not_a_directory
+.. autoclass::error_missing_output
+.. autoclass::error_job_signalled_interrupt
+
+
+
+.. ???
+
+
+
+
+
diff --git a/doc/_build/html/_sources/todo.txt b/doc/_build/html/_sources/todo.txt
new file mode 100644
index 0000000..678f521
--- /dev/null
+++ b/doc/_build/html/_sources/todo.txt
@@ -0,0 +1,500 @@
+.. include:: global.inc
+
+.. _todo:
+
+##########################################
+Future Changes to Ruffus
+##########################################
+
+ I would appreciated feedback and help on all these issues and where next to take *ruffus*.
+
+
+ **Future Changes** are features where we more or less know where we are going and how to get there.
+
+ **Planned Improvements** describes features we would like in Ruffus but where the implementation
+ or syntax has not yet been (fully) worked out.
+
+ If you have suggestions or contributions, please either write to me ( ruffus_lib at llew.org.uk) or
+ send a pull request via the `git site <https://github.com/bunbun/ruffus>`__.
+
+
+.. _todo.inactive_tasks_in_pipeline_printout_graph:
+
+********************************************************************************************************
+Todo: pipeline_printout_graph should print inactive tasks
+********************************************************************************************************
+
+
+.. _todo.dynamic_strings:
+
+********************************************************************************************************
+Todo: Mark input strings as non-file names, and add support for dynamically returned parameters
+********************************************************************************************************
+
+ 1. Use indicator object.
+ 2. What is a good name? ``"output_from()"``, ``"NOT_FILE_NAME"`` :-)
+ 3. They will still participate in suffix, formatter and regex replacement
+
+ Bernie Pope suggests that we should generalise this:
+
+
+ If any object in the input parameters is a (non-list/tuple) class instance, check (getattr) whether it has a ``ruffus_params()`` function.
+ If it does, call it to obtain a list which is substituted in place.
+ If there are string nested within, these will also take part in Ruffus string substitution.
+ Objects with ``ruffus_params()`` always "decay" to the results of the function call
+
+ ``output_from`` would be a simple wrapper which returns the internal string via ``ruffus_params()``
+
+ .. code-block:: python
+
+ class output_from (object):
+ def __init__(self, str):
+ self.str = str
+ def ruffus_params(self):
+ return [self.str]
+
+ Returning a list should be like wildcards and should not introduce an unnecessary level of indirection for output parameters, i.e. suffix(".txt") or formatter() / "{basename[0]}" should work.
+
+ Check!
+
+
+.. _todo.extra_parameters:
+
+********************************************************************************************************
+Todo: Allow "extra" parameters to be used in output substitution
+********************************************************************************************************
+
+ Formatter substitution can refer to the original elements in the input and extra parameters (without converting them to strings either). This refers to the original (nested) data structure.
+
+ This will allow normal python datatypes to be handed down and slipstreamed into a pipeline more easily.
+
+ The syntax would use Ruffus (> version 2.4) formatter:
+
+ .. code-block:: python
+ :emphasize-lines: 2,3
+
+ @transform( ..., formatter(), [
+ "{EXTRAS[0][1][3]}", # EXTRAS
+ "[INPUTS[1][2]]"],...) # INPUTS
+ def taskfunc():
+ pass
+
+ ``EXTRA`` and ``INPUTS`` indicate that we are referring to the input and extra parameters.
+
+ These are the full (nested) parameters in all their original form. In the case of the input parameters, this obvious depends on the decorator, so
+
+ .. code-block:: python
+
+ @transform(["a.text", [1, "b.text"]], formatter(), "{INPUTS[0][0]}")
+ def taskfunc():
+ pass
+
+ would give
+
+ ::
+
+ job #1
+ input == "a.text"
+ output == "a"
+
+ job #2
+ input == [1, "b.text"]
+ output == 1
+
+
+ The entire string must consist of ``INPUTS`` or ``EXTRAS`` followed by optionally N levels of square brackets. i.e. They must match ``"(INPUTS|EXTRAS)(\[\d+\])+"``
+
+ No string conversion takes place.
+
+ For ``INPUTS`` or ``EXTRAS`` which have objects with a ``ruffus_params()`` function (see Todo item above),
+ the original object rather than the result of ``ruffus_params()`` is forwarded.
+
+
+
+.. _todo.pre_post_job:
+
+********************************************************************************************************
+Todo: Extra signalling before and after each task and job
+********************************************************************************************************
+
+ .. code-block:: python
+
+ @prejob(custom_func)
+ @postjob(custom_func)
+ def task():
+ pass
+
+ ``@prejob`` / ``@postjob`` would be run in the child processes.
+
+
+.. _todo.new_decorators:
+
+******************************************************************************
+Todo: ``@split`` / ``@subdivide`` returns the actual output created
+******************************************************************************
+
+ * **overrides** (not replaces) wild cards.
+ * Returns a list, each with output and extra paramters.
+ * Won't include extraneous files which were not created in the pipeline but which just happened to match the wild card
+ * We should have ``ruffus_output_params``, ``ruffus_extra_params`` wrappers for clarity:
+
+ .. code-block:: python
+
+ @split("a.file", "*.txt")
+ def split_into_txt_files(input_file, output_files):
+ output_files = ["a.txt", "b.txt", "c.txt"]
+ for output_file_name in output_files:
+ with open(output_file_name, "w") as oo:
+ pass
+ return [
+ ruffus_output("a.file"),
+ [ruffus_output(["b.file", "c.file"]), ruffus_extras(13, 14)],
+ ]
+
+
+ * Consider yielding?
+
+==========================================================================================
+Checkpointing
+==========================================================================================
+
+ * If checkpoint file is used, the actual files are saved and checked the next time
+ * If no files are generated, no files are checked the next time...
+ * The output files do not have to match the wildcard though we can output a warning message if that happens...
+ This is obviously dangerous because the behavior will change if the pipeline is rerun without using the checkpoint file
+ * What happens if the task function changes?
+
+***************************************
+Todo: New decorators
+***************************************
+
+==============================================================================
+Todo: ``@originate``
+==============================================================================
+
+ Each (serial) invocation returns lists of output parameters until returns
+ None. (Empty list = ``continue``, None = ``break``).
+
+
+
+==============================================================================
+Todo: ``@recombine``
+==============================================================================
+
+ Like ``@collate`` but automatically regroups jobs which were a result of a previous ``@subdivide`` / ``@split`` (even after intervening ``@transform`` )
+
+ This is the only way job trickling can work without stalling the pipeline: We would know
+ how many jobs were pending for each ``@recombine`` job and which jobs go together.
+
+****************************************************************************************
+Todo: Named parameters in decorators for clarity
+****************************************************************************************
+
+.. _todo.bioinformatics_example:
+
+********************************************************************************************************
+Todo: Bioinformatics example to end all examples
+********************************************************************************************************
+
+ Uses
+ * ``@product``
+ * ``@subdivide``
+ * ``@transform``
+ * ``@collate``
+ * ``@merge``
+
+****************************************************************************************
+Todo: Allow the next task to start before all jobs in the previous task have finished
+****************************************************************************************
+
+ Jake (Biesinger) calls this **Job Trickling**!
+
+ * A single long running job no longer will hold up the entire pipeline
+ * Calculates dependencies dynamically at the job level.
+ * Goal is to have a long running (months) pipeline to which we can keep adding input...
+ * We can choose between prioritising completion of the entire pipeline for some jobs
+ (depth first) or trying to complete as many tasks as possible (breadth first)
+
+==============================================================================
+Converting to per-job rather than per task dependencies
+==============================================================================
+ Some decorators prevent per job (rather than per task) dependency calculations, and
+ will call a pipeline stall until the dependent tasks are completed (the current situation):
+
+ * Some types of jobs unavoidably depend on an entire previous task completing:
+ * ``add_inputs()``, ``inputs()``
+ * ``@merge``
+ * ``@split`` (implicit ``@merge``)
+ * ``@split``, ``@originate`` produce variable amount of output at runtime and must be completed before the next task can be run.
+ * Should ``yield`` instead of return?
+ * ``@collate`` needs to pattern match all the inputs of a previous task
+ * Replace ``@collate`` with ``@recombine`` which "remembers" and reverses the results of a previous
+ ``@subdivide`` or ``@split``
+ * Jobs need unique job_id tag
+ * Jobs are assigned (nested) grouping id which accompany them down the
+ pipeline after ``@subdivide`` / ``@split`` and are removed after ``@recombine``
+ * Should have a count of jobs so we always know *when* an "input slot" is full
+ * Funny "single file" mode for ``@transform,`` ``@files`` needs to be
+ regularised so it is a syntactic (front end) convenience (oddity!)
+ and not plague the inards of ruffus
+
+
+ Breaking change: to force the entirety of the previous task to complete before the next one, use ``@follows``
+
+==============================================================================
+Implementation
+==============================================================================
+
+ * "Push" model. Completing jobs "check in" their outputs to "input slots" for all the sucessor jobs.
+ * When "input slots" are full for any job, it is put on the dispatch queue to be run.
+ * The priority (depth first or breadth first) can be set here.
+ * ``pipeline_run`` / ``Pipeline_printout`` create a task dependency tree structure (from decorator dependencies) (a runtime pipeline object)
+ * Each task in the pipeline object knows which other tasks wait on it.
+ * When output is created by a job, it sends messages to (i.e. function calls) all dependent tasks in the pipeline object with the new output
+ * Sets of output such as from ``@split`` and ``@subdivide`` and ``@originate`` have a
+ terminating condition and/or a associated count (# of output)
+ * Tasks in the pipeline object forward incoming inputs to task input slots (for slots common to all jobs in a
+ task: ``@inputs``, ``@add_inputs``) or to slots in new jobs in the pipeline object
+ * When all slots are full in each job, this triggers putting the job parameters onto the job submission queue
+ * The pipeline object should allow Ruffus to be reentrant?
+
+
+
+##########################################
+Planned Improvements to Ruffus
+##########################################
+
+.. _todo.run_on_cluster:
+
+
+
+ * ``@split`` needs to be able to specify at run time the number of
+ resulting jobs without using wild cards
+ * legacy support for wild cards and file names.
+
+
+********************************************************************************************************
+Planned: Running python code (task functions) transparently on remote cluster nodes
+********************************************************************************************************
+
+ Wait until next release.
+
+ Will bump Ruffus to v.3.0 if can run python jobs transparently on a cluster!
+
+ abstract out ``task.run_pooled_job_without_exceptions()`` as a function which can be supplied to ``pipeline_run``
+
+ Common "job" interface:
+
+ * marshalled arguments
+ * marshalled function
+ * submission timestamp
+
+ Returns
+ * completion timestamp
+ * returned values
+ * exception
+
+ #) Full version use libpythongrid?
+ * Christian Widmer <ckwidmer at gmail.com>
+ * Cheng Soon Ong <chengsoon.ong at unimelb.edu.au>
+ * https://code.google.com/p/pythongrid/source/browse/#git%2Fpythongrid
+ * Probably not good to base Ruffus entirely on libpythongrid to minimise dependencies, the use of sophisticated configuration policies etc.
+ #) Start with light-weight file-based protocol
+ * specify where the scripts should live
+ * use drmaa to start jobs
+ * have executable ruffus module which knows how to load deserialise (unmarshall) function / parameters from disk. This would be what drmaa starts up, given the marshalled data as an argument
+ * time stamp
+ * "heart beat" to check that the job is still running
+ #) Next step: socket-based protocol
+ * use specified master port in ruffus script
+ * start remote processes using drmaa
+ * child receives marshalled data and the address::port in the ruffus script (head node) to initiate hand shake or die
+ * process recycling: run successive jobs on the same remote process for reduced overhead, until exceeds max number of jobs on the same process, min/max time on the same process
+ * resubmit if die (Don't do sophisticated stuff like libpythongrid).
+
+.. _todo.job_trickling:
+
+
+
+.. _todo.custom_parameters:
+
+************************************
+Planned: Custom parameter generator
+************************************
+
+ Request on mailing list
+
+ I've often wished that I could use an arbitrary function to process the input filepath instead of just a regex.
+
+ .. code-block:: python
+
+ def f(inputs, outputs, extra_param1, extra_param2):
+ # do something to generate parameters
+ return new_output_param, new_extra_param1, new_extra_param2
+
+ now f() can be used inside a Ruffus decorator to generate the outputs from inputs, instead of being forced to use a regex for the job.
+
+ Cheers,
+ Bernie.
+
+ Leverages built-in Ruffus functionality.
+ Don't have to write entire parameter generation from scratch.
+
+ * Gets passed an iterator where you can do a for loop to get input parameters / a flattened list of files
+ * Other parameters are forwarded as is
+ * The duty of the function is to ``yield`` input, output, extra parameters
+
+
+ Simple to do but how do we prevent this from being a job-trickling barrier?
+
+ Postpone until we have an initial design for job-trickling: Ruffus v.4 ;-(
+
+
+.. _todo.gui:
+
+****************************************************************************
+Planned: Ruffus GUI interface.
+****************************************************************************
+
+ Desktop (PyQT or web-based solution?) I'd love to see an svg pipeline picture that I could actually interact with
+
+
+********************************************************************************************************
+Planned: Non-decorator / Function interface to Ruffus
+********************************************************************************************************
+
+
+.. _todo.intermediate_files:
+
+********************************************************************************************************
+Planned: Remove intermediate files
+********************************************************************************************************
+
+ Often large intermediate files are produced in the middle of a pipeline which could be
+ removed. However, their absence would cause the pipeline to appear out of date. What is
+ the best way to solve this?
+
+ In gmake, all intermediate files which are not marked ``.PRECIOUS`` are deleted.
+
+ We do not want to manually mark intermediate files for several reasons:
+ * The syntax would be horrible and clunky
+ * The gmake distinction between ``implicit`` and ``explicit`` rules is not one we
+ would like to impose on Ruffus
+ * Gmake uses statically determined (DAG) dependency trees so it is quite natural and
+ easy to prune intermediate paths
+
+ Our preferred solution should impose little to no semantic load on Ruffus, i.e. it should
+ not make it more complex / difficult to use. There are several alternatives we are
+ considering:
+
+ #) Have an **update** mode in which pipeline_run would ignore missing files and only run tasks with existing, out-of-date files.
+ #) Optionally ignore all out-of-date dependencies beyond a specified point in the pipeline
+ #) Add a decorator to flag sections of the pipeline where intermediate files can be removed
+
+
+ Option (1) is rather unnerving because it makes inadvertent errors difficult to detect.
+
+ Option (2) involves relying on the user of a script to remember the corect chain of dependencies in
+ often complicated pipelines. It would be advised to keep a flowchart to hand. Again,
+ the chances of error are much greater.
+
+ Option (3) springs from the observation by Andreas Heger that parts of a pipeline with
+ disposable intermediate files can usually be encapsulated as an autonomous section.
+ Within this subpipeline, all is well provided that the outputs of the last task are complete
+ and up-to-date with reference to the inputs of the first task. Intermediate files
+ could be removed with impunity.
+
+ The suggestion is that these autonomous subpipelines could be marked out using the Ruffus
+ decorator syntax::
+
+ #
+ # First task in autonomous subpipeline
+ #
+ @files("who.isit", "its.me")
+ def first_task(*args):
+ pass
+
+ #
+ # Several intermediate tasks
+ #
+ @transform(subpipeline_task1, suffix(".me"), ".her")
+ def task2_etc(*args):
+ pass
+
+ #
+ # Final task
+ #
+ @sub_pipeline(subpipeline_task1)
+ @transform(subpipeline_task1, suffix(".her"), ".you")
+ def final_task(*args):
+ pass
+
+ **@sub_pipeline** marks out all tasks between ``first_task`` and ``final_task`` and
+ intermediate files such as ``"its.me"``, ``"its.her`` can be deleted. The pipeline will
+ only run if ``"its.you"`` is missing or out-of-date compared with ``"who.isit"``.
+
+ Over the next few Ruffus releases we will see if this is a good design, and whether
+ better keyword can be found than **@sub_pipeline** (candidates include **@shortcut**
+ and **@intermediate**)
+
+
+.. _todo.retry:
+
+********************************************************************************************************
+Planned: @retry_on_error(NUM_OF_RETRIES)
+********************************************************************************************************
+
+.. _todo.cleanup:
+
+********************************************************************************************************
+Planned: Clean up
+********************************************************************************************************
+
+ The plan is to store the files and directories created via
+ a standard interface.
+
+ The placeholders for this are a function call ``register_cleanup``.
+
+ Jobs can specify the files they created and which need to be
+ deleted by returning a list of file names from the job function.
+
+ So::
+
+ raise Exception = Error
+
+ return False = halt pipeline now
+
+ return string / list of strings = cleanup files/directories later
+
+ return anything else = ignored
+
+
+ The cleanup file/directory store interface can be connected to
+ a text file or a database.
+
+ The cleanup function would look like this::
+
+ pipeline_cleanup(cleanup_log("../cleanup.log"), [instance ="october19th" ])
+ pipeline_cleanup(cleanup_msql_db("user", "password", "hash_record_table"))
+
+ The parameters for where and how to store the list of created files could be
+ similarly passed to pipeline_run as an extra parameter::
+
+ pipeline_run(cleanup_log("../cleanup.log"), [instance ="october19th" ])
+ pipeline_run(cleanup_msql_db("user", "password", "hash_record_table"))
+
+ where `cleanup_log` and `cleanup_msql_db` are classes which have functions for
+
+ #) storing file
+ #) retrieving file
+ #) clearing entries
+
+
+ * Files would be deleted in reverse order, and directories after files.
+ * By default, only empty directories would be removed.
+
+ But this could be changed with a ``--forced_remove_dir`` option
+
+ * An ``--remove_empty_parent_directories`` option would be
+ supported by `os.removedirs(path) <http://docs.python.org/library/os.html#os.removedirs>`_.
+
diff --git a/doc/_build/html/_sources/tutorials/manual/advanced_transform.txt b/doc/_build/html/_sources/tutorials/manual/advanced_transform.txt
new file mode 100644
index 0000000..3d211a2
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/advanced_transform.txt
@@ -0,0 +1,347 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.advanced_transform:
+
+######################################################################################################################################################
+|manual.transform_ex.chapter_num|: **add_inputs()** `and` **inputs()**: `Controlling both input and output files with` **@transform**
+######################################################################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@transform <decorators.transform>` syntax in detail
+
+
+ .. index::
+ pair: @transform (Advanced Usage); Manual
+
+
+ The standard :ref:`@transform <manual.transform>` allows you to send a list of data files
+ to the same pipelined function and for the resulting *outputs* parameter to be automatically
+ inferred from file names in the *inputs*.
+
+ There are two situations where you might desire additional flexibility:
+ #. You need to add additional prequisites or filenames to the *inputs* of every single one
+ of your jobs
+ #. (Less often,) the actual *inputs* file names are some variant of the *outputs* of another
+ task.
+
+ Either way, it is occasionally very useful to be able to generate the actual *inputs* as
+ well as *outputs* parameters by regular expression substitution. The following examples will show
+ you both how and why you would want to do this.
+
+====================================================================
+Adding additional *input* prerequisites per job
+====================================================================
+
+
+************************************************************************
+1.) Example: compiling c++ code
+************************************************************************
+
+ Suppose we wished to compile some c++ (``"*.cpp"``) files:
+ .. comment << Pythoncode
+
+ ::
+
+ source_files = "hasty.cpp", "tasty.cpp", "messy.cpp"
+ for source_file in source_files:
+ open(source_file, "w")
+
+ .. comment
+ Pythoncode
+
+
+ The ruffus code would look like this:
+ .. comment << Pythoncode
+
+ ::
+
+ from ruffus import *
+
+ @transform(source_files, suffix(".cpp"), ".o")
+ def compile(input_filename, output_file_name):
+ open(output_file_name, "w")
+
+ .. comment
+ Pythoncode
+
+
+ This results in the following jobs:
+ ::
+
+ >>> pipeline_run([compile], verbose = 2, multiprocess = 3)
+
+ Job = [None -> hasty.cpp] completed
+ Job = [None -> tasty.cpp] completed
+ Job = [None -> messy.cpp] completed
+ Completed Task = prepare_cpp_source
+
+ Job = [hasty.cpp -> hasty.o] completed
+ Job = [messy.cpp -> messy.o] completed
+ Job = [tasty.cpp -> tasty.o] completed
+ Completed Task = compile
+
+************************************************************************
+2.) Example: Adding a header file with **add_inputs(..)**
+************************************************************************
+
+
+ All this is plain vanilla **@transform** syntax. But suppose that we need to add a
+ common header file ``"universal.h"`` to our compilation.
+ The **add_inputs** provides for this with the minimum of fuss:
+
+ .. comment << Pythoncode
+
+ ::
+
+ # create header file
+ open("universal.h", "w")
+
+ # compile C++ files with extra header
+ @transform(prepare_cpp_source, suffix(".cpp"), add_inputs("universal.h"), ".o")
+ def compile(input_filename, output_file_name):
+ open(output_file_name, "w")
+
+ .. comment
+ Pythoncode
+
+ Now the input file is a python list, with ``"universal.h"`` added to each ``"*.cpp"``
+
+ ::
+
+ >>> pipeline_run([compile], verbose = 2, multiprocess = 3)
+
+ Job = [ [hasty.cpp, universal.h] -> hasty.o] completed
+ Job = [ [messy.cpp, universal.h] -> messy.o] completed
+ Job = [ [tasty.cpp, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+================================================================================
+Additional *input* prerequisites can be globs, tasks or pattern matches
+================================================================================
+
+ A common requirement is to include the corresponding header file in compilations.
+ It is easy to use **add_inputs** to look up additional files via pattern matches.
+
+************************************************************************
+3.) Example: Adding matching header file
+************************************************************************
+
+ To make this example more fun, we shall also:
+ #) Give each source code file its own ordinal
+ #) Use ``add_inputs`` to add files produced by another task function
+
+ .. comment << Pythoncode
+
+ ::
+
+ # each source file has its own index
+ source_names = [("hasty.cpp", 1),
+ ("tasty.cpp", 2),
+ ("messy.cpp", 3), ]
+ header_names = [sn.replace(".cpp", ".h") for (sn, i) in source_names]
+ header_names.append("universal.h")
+
+ #
+ # create header and source files
+ #
+ for source, source_index in source_names:
+ open(source, "w")
+
+ for header in header_names:
+ open(header, "w")
+
+
+
+ from ruffus import *
+
+ #
+ # lookup embedded strings in each source files
+ #
+ @transform(source_names, suffix(".cpp"), ".embedded")
+ def get_embedded_strings(input_filename, output_file_name):
+ open(output_file_name, "w")
+
+
+
+ # compile C++ files with extra header
+ @transform(source_names, suffix(".cpp"),
+ add_inputs( "universal.h",
+ r"\1.h",
+ get_embedded_strings ), ".o")
+ def compile(input_params, output_file_name):
+ open(output_file_name, "w")
+
+
+ pipeline_run([compile], verbose = 2, multiprocess = 3)
+
+ .. comment
+ Pythoncode
+
+ This script gives the following output
+
+ ::
+
+ >>> pipeline_run([compile], verbose = 2, multiprocess = 3)
+
+ Job = [[hasty.cpp, 1] -> hasty.embedded] completed
+ Job = [[messy.cpp, 3] -> messy.embedded] completed
+ Job = [[tasty.cpp, 2] -> tasty.embedded] completed
+ Completed Task = get_embedded_strings
+
+ Job = [[[hasty.cpp, 1], # inputs
+ universal.h, # common header
+ hasty.h, # corresponding header
+ hasty.embedded, messy.embedded, tasty.embedded] # output of get_embedded_strings()
+ -> hasty.o] completed
+ Job = [[[messy.cpp, 3], # inputs
+ universal.h, # common header
+ messy.h, # corresponding header
+ hasty.embedded, messy.embedded, tasty.embedded] # output of get_embedded_strings()
+ -> messy.o] completed
+ Job = [[[tasty.cpp, 2], # inputs
+ universal.h, # common header
+ tasty.h, # corresponding header
+ hasty.embedded, messy.embedded, tasty.embedded] # output of get_embedded_strings()
+ -> tasty.o] completed
+ Completed Task = compile
+
+ We can see that the ``compile(...)`` task now has four sets of *inputs*:
+ 1) The original inputs (e.g. ``[hasty.cpp, 1]``)
+
+ And three additional added by **add_inputs(...)**
+ 2) A header file (``universal.h``) common to all jobs
+ 3) The matching header (e.g. ``hasty.h``)
+ 4) The output from another task ``get_embedded_strings()`` (e.g. ``hasty.embedded, messy.embedded, tasty.embedded``)
+
+ .. note::
+ For input parameters with nested structures (lists or sets), the pattern matching is
+ on the first filename string Ruffus comes across (DFS).
+
+ So for ``["hasty.c", 0]``, the pattern matches ``"hasty.c"``.
+
+ If in doubt, use :ref:`pipeline_printout <manual.tracing_pipeline_parameters>` to
+ check what parameters Ruffus is using.
+
+
+************************************************************************
+4.) Example: Using **regex(..)** instead of **suffix(..)**
+************************************************************************
+ Suffix pattern matching is much simpler and hence is usually preferable to the more
+ powerful regular expressions. We can rewrite the above example to use **regex** as well
+ to give exactly the same output.
+
+ .. comment << Pythoncode
+
+ ::
+
+ # compile C++ files with extra header
+ @transform(source_names, regex(r"(.+)\.cpp"),
+ add_inputs( "universal.h",
+ r"\1.h",
+ get_embedded_strings ), r"\1.o")
+ def compile(input_params, output_file_name):
+ open(output_file_name, "w")
+
+ .. comment
+ Pythoncode
+
+ .. note::
+ The backreference ``\g<0>`` usefully substitutes the entire substring matched by
+ the regular expression.
+
+
+
+====================================================================
+Replacing all input parameters with **inputs(...)**
+====================================================================
+
+ More rarely, it is necessary to replace all the input parameters wholescale.
+
+************************************************************************
+4.) Example: Running matching python scripts
+************************************************************************
+ In the following example, we are not compiling C++ source files but invoking
+ corresponding python scripts which have the same name.
+
+ Given three c++ files and their corresponding python scripts:
+
+ .. comment << Pythoncode
+
+ ::
+
+ # each source file has its own index
+ source_names = [("hasty.cpp", 1),
+ ("tasty.cpp", 2),
+ ("messy.cpp", 3), ]
+
+ #
+ # create c++ source files and corresponding python files
+ #
+ for source, source_index in source_names:
+ open(source, "w")
+ open(source.replace(".cpp", ".py"), "w")
+
+ .. comment
+ Pythoncode
+
+ The Ruffus code will call each python script corresponding to their c++ counterpart:
+
+ .. comment << Pythoncode
+
+ ::
+
+ from ruffus import *
+
+
+ # run corresponding python files
+ @transform(source_names, suffix(".cpp"), inputs( r"\1.py"), ".results")
+ def run_python_file(input_params, output_file_name):
+ open(output_file_name, "w")
+
+
+ pipeline_run([run_python_file], verbose = 2, multiprocess = 3)
+
+ .. comment
+ Pythoncode
+
+ Resulting in this output:
+ ::
+
+ >>> pipeline_run([run_python_file], verbose = 2, multiprocess = 3)
+ Job = [hasty.py -> hasty.results] completed
+ Job = [messy.py -> messy.results] completed
+ Job = [tasty.py -> tasty.results] completed
+ Completed Task = run_python_file
+
+
+************************************************************************
+5.) Example: Using **regex** instead of **suffix**
+************************************************************************
+
+ Again, the same code can be written (less clearly) using the more powerful
+ **regex** and python regular expressions:
+
+ .. comment << Pythoncode
+
+ ::
+
+ from ruffus import *
+
+
+ # run corresponding python files
+ @transform(source_names, regex(r"(.+)\.cpp"), inputs( r"\1.py"), r\"1.results")
+ def run_python_file(input_params, output_file_name):
+ open(output_file_name, "w")
+
+
+ pipeline_run([run_python_file], verbose = 2, multiprocess = 3)
+
+ .. comment
+ Pythoncode
+
+
+ This is about as sophisticated as **@transform** ever gets!
diff --git a/doc/_build/html/_sources/tutorials/manual/check_if_uptodate.txt b/doc/_build/html/_sources/tutorials/manual/check_if_uptodate.txt
new file mode 100644
index 0000000..bbfe447
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/check_if_uptodate.txt
@@ -0,0 +1,89 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.check_if_uptodate:
+
+#######################################################################################################################
+|manual.check_if_uptodate.chapter_num|: `Writing custom functions to decide which jobs are up to date`
+#######################################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@check_if_uptodate syntax in detail<decorators.check_if_uptodate>`
+
+
+
+ .. index::
+ pair: @check_if_uptodate; Manual
+
+******************************************************************************
+**@check_if_uptodate** : Manual dependency checking
+******************************************************************************
+ tasks specified with
+ * :ref:`@files <manual.files>`
+ * :ref:`@split <manual.split>`
+ * :ref:`@transform <manual.transform>`
+ * :ref:`@merge <manual.merge>`
+ * :ref:`@collate <manual.collate>`
+
+ have automatic dependency checking based on file modification times.
+
+ Sometimes, you might want to decide have more control over whether to run jobs, especially
+ if a task does not rely on or produce files (i.e. with :ref:`@parallel <manual.parallel>`)
+
+ You can write your own custom function to decide whether to run a job.
+ This takes as many parameters as your task function, and needs to return a
+ tuple for whether an update is required, and why (i.e. ``tuple(bool, str)``)
+
+ This simple example which creates the file ``"a.1"`` if it does not exist:
+
+ ::
+
+ from ruffus import *
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+
+ could be rewritten more laboriously as:
+
+ ::
+
+
+ from ruffus import *
+ import os
+ def check_file_exists(input_file, output_file):
+ if os.path.exists(output_file):
+ return False, "File already exists"
+ return True, "%s is missing" % output_file
+
+ @parallel([[None, "a.1"]])
+ @check_if_uptodate(check_file_exists)
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+
+ Both produce the same output:
+ ::
+
+ Task = create_if_necessary
+ Job = [null, "a.1"] completed
+
+
+
+
+.. note::
+
+ The function specified by :ref:`@check_if_uptodate <manual.check_if_uptodate>` can be called
+ more than once for each job.
+
+ See the :ref:`description here <manual.dependencies>` of how **Ruffus** decides which tasks to run.
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/collate.txt b/doc/_build/html/_sources/tutorials/manual/collate.txt
new file mode 100644
index 0000000..13e98cf
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/collate.txt
@@ -0,0 +1,95 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.collate:
+
+######################################################################################################
+|manual.collate.chapter_num|: **@collate**\ : `group together disparate input into sets of results`
+######################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@collate syntax in detail <decorators.collate>`
+
+ It is often very useful to group together disparate *inputs* into several categories, each of which
+ lead to a separate *output*. In the example shown below, we produce separate summaries of results
+ depending on which species the file belongs to.
+
+ **Ruffus** uses the term ``collate`` in a rough analogy to the way printers group together
+ copies of documents appropriately.
+
+ .. index::
+ pair: @collate; Manual
+
+
+
+
+
+====================================================
+Collating many *inputs* each into a single *output*
+====================================================
+
+ Our example starts with some files which presumably have been created by some
+ earlier stages of our pipeline. We simulate this here with this code:
+
+ ::
+
+ files_names = [ "mammals.tiger.wild.animals"
+ "mammals.lion.wild.animals"
+ "mammals.lion.handreared.animals"
+ "mammals.dog.tame.animals"
+ "mammals.dog.wild.animals"
+ "mammals.dog.feral.animals"
+ "reptiles.crocodile.wild.animals" ]
+ for f in files_names:
+ open(f, "w").write(f)
+
+ However, we are only interested in mammals, and we would like the files of each species to
+ end up in its own directory, i.e. ``tiger``, ``lion`` and ``dog``:
+
+ ::
+
+ import os
+ os.mkdir("tiger")
+ os.mkdir("lion")
+ os.mkdir("dog")
+
+ Now we would like to place each file in a different destination, depending on its
+ species. The following regular expression marks out the species name ``r'mammals.([^.]+)'``.
+ For ``mammals.tiger.wild.animals``, the first matching group (``\1``) == ``"tiger"``
+
+ Then, the following::
+
+ from ruffus import *
+
+ @collate('*.animals', # inputs = all *.animal files
+ regex(r'mammals.([^.]+)'), # regular expression
+ r'\1/animals.in_my_zoo', # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ print "Collating %s" % species
+
+ o = open(outfile, "w")
+ for i in infiles:
+ o.write(open(infile).read() + "\ncaptured\n")
+
+ pipeline_run([capture_mammals])
+
+ .. ???
+
+ puts each captured mammal in its own directory::
+
+ Task = capture_mammals
+ Job = [(mammals.lion.handreared.animals, mammals.lion.wild.animals) -> lion/animals.in_my_zoo] completed
+ Job = [(mammals.tiger.wild.animals, ) -> tiger/animals.in_my_zoo] completed
+ Job = [(mammals.dog.tame.animals, mammals.dog.wild.animals, mammals.dog.feral.animals) -> dog/animals.in_my_zoo] completed
+
+
+ .. ???
+
+ The crocodile has been discarded because it isn't a mammal and the file name
+ doesn't match the ``mammal`` part of the regular expression.
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/dependencies.txt b/doc/_build/html/_sources/tutorials/manual/dependencies.txt
new file mode 100644
index 0000000..1221c63
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/dependencies.txt
@@ -0,0 +1,158 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.dependencies:
+
+##################################################################################
+|manual.dependencies.chapter_num|: `Checking dependencies to run tasks in order`
+##################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+
+
+ .. index::
+ pair: dependencies; Manual
+
+
+
+ How does **Ruffus** decide how to run your pipeline?
+
+ * In which order should pipelined functions be called?
+
+ * Which parts of the pipeline are up-to-date and do not need to be rerun?
+
+
+=============================================
+Running all out-of-date tasks and dependents
+=============================================
+
+ .. image:: ../../images/manual_dependencies_flowchart_intro.png
+
+
+ By default, *ruffus* will
+
+ * build a flow chart (dependency tree) of pipelined tasks (functions)
+ * start from the most ancestral tasks with the fewest dependencies (``task1`` and ``task4`` in the flowchart above).
+ * walk up the tree to find the first incomplete / out-of-date tasks (i.e. ``task3`` and ``task5``.
+ * start running from there
+
+ All down-stream (dependent) tasks will be re-run anyway, so we don't have to test
+ whether they are up-to-date or not.
+
+ .. _manual.dependencies.checking_multiple_times:
+
+ .. note::
+
+ This means that **ruffus** *may* ask any task if their jobs are out of date more than once:
+
+ * once when deciding which parts of the pipeline have to be run
+ * once just before executing the task.
+
+ *Ruffus* tries to be clever / efficient, and does the minimal amount of querying.
+
+
+.. _manual.dependencies.example:
+
+
+=======================================
+A simple example
+=======================================
+
+-------------------------------------
+ Four successive tasks to run:
+-------------------------------------
+ .. note::
+ The full code is available :ref:`here <manual.dependencies.code>`.
+
+
+ Suppose we have four successive tasks to run, whose flowchart we can print out
+ by running:
+
+ ::
+
+ pipeline_printout_graph ("flowchart.png", "png", [task4],
+ draw_vertically = True)
+
+
+ .. image:: ../../images/manual_dependencies_flowchart1.png
+
+ We can see that all four tasks need to run reach the target task4.
+
+----------------------------------------
+ Pipeline tasks are up-to-date:
+----------------------------------------
+
+
+ After the pipeline runs (``python simpler.py -d ""``), all tasks are up to date and the flowchart shows:
+
+ .. image:: ../../images/manual_dependencies_flowchart2.png
+
+-------------------------------------
+ Some tasks out of date:
+-------------------------------------
+
+ If we then made task2 and task4 out of date by modifying their *inputs* files:
+ ::
+
+ open("a.1", "w")
+ open("a.3", "w")
+
+
+ the flowchart would show:
+
+ #. the pipeline only has to rerun from ``task2``.
+ #. ``task1`` is complete / up-to-date
+ #. ``task3`` will have to be re-run because it follows (depends on) ``task2``.
+
+ .. image:: ../../images/manual_dependencies_flowchart3.png
+
+=======================================
+Forced Reruns
+=======================================
+ Even if a pipeline stage appears to be up to date,
+ you can always force the pipeline to include from one or more task functions.
+
+ This is particularly useful, for example, if the pipeline data hasn't changed but
+ the analysis or computional code has.
+
+ ::
+
+ pipeline_run([task4], [task1])
+
+
+ will run all tasks from ``task1`` to ``task4``
+
+
+ Both the "target" and the "forced" lists can include as many tasks as you wish. All dependencies
+ are still carried out and out-of-date jobs rerun.
+
+
+=======================================
+Esoteric option: Minimal Reruns
+=======================================
+
+ In the above example, you could point out that ``task3`` is not out of date. And if we were only interested
+ in the immediate dependencies or prerequisites leading up to ``task4``, we might not
+ need task2 to rerun at all, only ``task4``.
+
+ This rather dangerous option is useful if you don't want to keep all the intermediate
+ files/results from upstream tasks. The pipeline will only not involve any incomplete
+ tasks which precede an up-to-date result.
+
+ This is seldom what you intend, and you should always check that the appropriate stages
+ of the pipeline are executed in the flowchart output.
+
+ In such cases, we can rerun the pipeline with the following option:
+
+ ::
+
+ pipeline_run([task4], gnu_make_maximal_rebuild_mode = False)
+
+ and only ``task4`` will rerun.
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/dependencies_code.txt b/doc/_build/html/_sources/tutorials/manual/dependencies_code.txt
new file mode 100644
index 0000000..9e99198
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/dependencies_code.txt
@@ -0,0 +1,97 @@
+.. include:: ../../global.inc
+.. _manual.dependencies.code:
+
+
+########################################################################################
+Code for Chapter 9: Checking dependencies to run tasks in order
+########################################################################################
+ * :ref:`Manual overview <manual>`
+ * :ref:`Back <manual.dependencies.example>`
+
+ This example shows how dependencies work
+
+
+************************************
+Code
+************************************
+ ::
+
+ from ruffus import *
+ import json
+
+ import time
+ def task_helper(infile, outfile):
+ """
+ cat input file content to output file
+ after writing out job parameters
+ """
+ if infile:
+ output_text = "".join(sorted(open(infile).readlines()))
+ else:
+ output_text = "None"
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+ #
+ # task1
+ #
+ @files(None, 'a.1')
+ def task1(infile, outfile):
+ """
+ First task
+ """
+ task_helper(infile, outfile)
+
+
+
+ #
+ # task2
+ #
+ @transform(task1, regex(r'.1'), '.2')
+ def task2(infile, outfile):
+ """
+ Second task
+ """
+ task_helper(infile, outfile)
+
+
+
+ #
+ # task3
+ #
+ @transform(task2, regex(r'.2'), '.3')
+ def task3(infile, outfile):
+ """
+ Third task
+ """
+ task_helper(infile, outfile)
+
+
+
+ #
+ # task4
+ #
+ @transform(task3, regex(r'.3'), '.4')
+ def task4(infile, outfile):
+ """
+ Fourth task
+ """
+ task_helper(infile, outfile)
+
+ pipeline_printout_graph ("flowchart.png", "png", [task4], draw_vertically = True, no_key_legend = True)
+ pipeline_run([task4])
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([task4], multiprocess = 10, logger = logger_proxy)
+ job = [null, "a.1"]
+ job = ["a.1", "a.2"]
+ job = ["a.2", "a.3"]
+ job = ["a.3", "a.4"]
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/exceptions.txt b/doc/_build/html/_sources/tutorials/manual/exceptions.txt
new file mode 100644
index 0000000..c0bd3b6
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/exceptions.txt
@@ -0,0 +1,82 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.exceptions:
+
+###################################################################################################
+|manual.exceptions.chapter_num|: `Exceptions thrown inside a pipeline`
+###################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+
+
+ .. index::
+ pair: Exceptions; Manual
+
+
+ The goal for **Ruffus** is that exceptions should just work *out-of-the-box* without any fuss.
+ This is especially important for exceptions that come from your code which may be raised
+ in a different process. Often multiple parallel operations (jobs or tasks) fail at the
+ same time. **Ruffus** will forward each of these exceptions with the tracebacks so you
+ can jump straight to the offending line.
+
+ This example shows separate exceptions from two jobs running in parallel:
+
+ .. image:: ../../images/manual_exceptions.png
+
+
+.. _manual.exceptions.multiple_errors:
+
+.. index:: signalling, interrupts, break, errors, exceptions, multiple errors
+
+=====================
+Multiple Errors
+=====================
+ For any task where exceptions are thrown, *Ruffus* will continue executing all the jobs
+ currently in progress (up to the maximum number of concurrent jobs
+ (``multiprocess``) set in :ref:`pipeline_run <pipeline_functions.pipeline_run>`).
+ Each of these may raise separate exceptions.
+ This seems a fair tradeoff between being able to gather detailed error information for
+ running jobs, and not wasting too much time for a task that is going to fail anyway.
+
+
+.. index:: signalling, interrupts, break, errors, exceptions
+
+.. _manual.exceptions.interrupting:
+
+=================================
+Interrupting the pipeline
+=================================
+
+ If your task function raises a ``Ruffus.JobSignalledBreak`` Exception, this will immediately
+ halt the pipeline at that point, without waiting for other jobs in the queue to complete:
+
+ ::
+
+ from ruffus import *
+ @parallel([['A', 1], ['B',3]])
+ def parallel_task(name, param1):
+ if name == 'A': return False
+
+ pipeline_run([parallel_task])
+
+
+
+ produces the following (abbreviated):
+
+ ::
+
+ task.RethrownJobError:
+
+ Exceptions running jobs for
+ 'def parallel_task(...):'
+
+ Original exception:
+
+ Exception #1
+ task.JobSignalledBreak: Job = ["A", 1] returned False
+ for Job = ["A", 1]
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/files.txt b/doc/_build/html/_sources/tutorials/manual/files.txt
new file mode 100644
index 0000000..ebf7746
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/files.txt
@@ -0,0 +1,218 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.files:
+
+#######################################################################################
+|manual.files.chapter_num|: `Passing parameters to the pipeline with` **@files**
+#######################################################################################
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@files syntax in detail <decorators.files>`
+
+
+ | The python functions which do the actual work of each stage or
+ :term:`task` of a **Ruffus** pipeline are written by you.
+ | The role of **Ruffus** is to make sure these functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if desired.
+
+ The easiest way to specify parameters to *Ruffus* :term:`task` functions is to use
+ the :ref:`@files <decorators.files>` decorator.
+
+ .. index::
+ pair: @files; Manual
+
+
+***************************************
+**@files**
+***************************************
+
+ Running this code:
+
+ ::
+
+ from ruffus import *
+
+ @files('a.1', ['a.2', 'b.2'], 'A file')
+ def single_job_io_task(infile, outfiles, text):
+ for o in outfiles: open(o, "w")
+
+ # prepare input file
+ open('a.1', "w")
+
+ pipeline_run()
+
+
+ Is equivalent to calling:
+ ::
+
+ single_job_io_task('a.1', ['a.2', 'b.2'], 'A file')
+
+
+ And produces:
+ ::
+
+ >>> pipeline_run()
+ Job = [a.1 -> [a.2, b.2], A file] completed
+ Completed Task = single_job_io_task
+
+ **Ruffus** will automatically check if your task is up to date. The second time :ref:`pipeline_run() <pipeline_functions.pipeline_run>`
+ is called, nothing will happen. But if you update ``a.1``, the task will rerun:
+
+ ::
+
+ >>> open('a.1', "w")
+ >>> pipeline_run()
+ Job = [a.1 -> [a.2, b.2], A file] completed
+ Completed Task = single_job_io_task
+
+ See :ref:`chapter 2<manual.skip_up_to_date>` for a more in-depth discussion of how **Ruffus**
+ decides which parts of the pipeline are complete and up-to-date.
+
+
+.. index::
+ pair: @files; in parallel
+
+.. _manual.files.parallel:
+
+******************************************************************************
+Running the same code on different parameters in parallel
+******************************************************************************
+
+ Your pipeline may require the same function to be called multiple times on independent parameters.
+ In which case, you can supply all the parameters to @files, each will be sent to separate jobs that
+ may run in parallel if necessary. **Ruffus** will check if each separate :term:`job` is up-to-date using
+ the *inputs* and *outputs* (first two) parameters (See the :ref:`chapter 2<manual.io_parameters>` ).
+
+
+ For example, if a sequence
+ (e.g. a list or tuple) of 5 parameters are passed to **@files**, that indicates
+ there will also be 5 separate jobs:
+
+ ::
+
+ from ruffus import *
+ parameters = [
+ [ 'job1.file' ], # 1st job
+ [ 'job2.file', 4 ], # 2st job
+ [ 'job3.file', [3, 2] ], # 3st job
+ [ 67, [13, 'job4.file'] ], # 4st job
+ [ 'job5.file' ], # 5st job
+ ]
+ @files(parameters)
+ def task_file(*params):
+ ""
+
+ | **Ruffus** creates as many jobs as there are elements in ``parameters``.
+ | In turn, each of these elements consist of series of parameters which will be
+ passed to each separate job.
+
+ Thus the above code is equivalent to calling:
+
+ ::
+
+ task_file('job1.file')
+ task_file('job2.file', 4)
+ task_file('job3.file', [3, 2])
+ task_file(67, [13, 'job4.file'])
+ task_file('job5.file')
+
+
+ What ``task_file()`` does with these parameters is up to you!
+
+ The only constraint on the parameters is that **Ruffus** will treat any first
+ parameter of each job as the *inputs* and any second as the *output*. Any
+ strings in the *inputs* or *output* parameters (including those nested in sequences)
+ will be treated as file names.
+
+ Thus, to pick the parameters out of one of the above jobs:
+
+ ::
+
+ task_file(67, [13, 'job4.file'])
+
+ | *inputs* == ``67``
+ | *outputs* == ``[13, 'job4.file']``
+ |
+ | The solitary output filename is ``job4.file``
+
+
+.. index::
+ pair: @files; check if up to date
+
+.. _manual.files.is_uptodate:
+.. _manual.files.example:
+
+=======================================
+Checking if jobs are up to date
+=======================================
+
+ | Usually we do not want to run all the stages in a pipeline but only where
+ the input data has changed or is no longer up to date.
+ | One easy way to do this is to check the modification times for files produced
+ at each stage of the pipeline.
+
+ | Let us first create our starting files ``a.1`` and ``b.1``
+ | We can then run the following pipeline function to create
+
+ * ``a.2`` from ``a.1`` and
+ * ``b.2`` from ``b.1``
+
+ ::
+
+ # create starting files
+ open("a.1", "w")
+ open("b.1", "w")
+
+
+ from ruffus import *
+ parameters = [
+ [ 'a.1', 'a.2', 'A file'], # 1st job
+ [ 'b.1', 'b.2', 'B file'], # 2nd job
+ ]
+
+ @files(parameters)
+ def parallel_io_task(infile, outfile, text):
+ # copy infile contents to outfile
+ infile_text = open(infile).read()
+ f = open(outfile, "w").write(infile_text + "\n" + text)
+
+ pipeline_run()
+
+
+ .. ???
+
+ This produces the following output:
+ ::
+
+ >>> pipeline_run()
+ Job = [a.1 -> a.2, A file] completed
+ Job = [b.1 -> b.2, B file] completed
+ Completed Task = parallel_io_task
+
+
+ | If you called :ref:`pipeline_run() <pipeline_functions.pipeline_run>` again, nothing would happen because the files are up to date:
+ | ``a.2`` is more recent than ``a.1`` and
+ | ``b.2`` is more recent than ``b.1``
+
+ However, if you subsequently modified ``a.1`` again:
+ ::
+
+ open("a.1", "w")
+ pipeline_run(verbose = 1)
+
+ you would see the following::
+
+ >>> pipeline_run([parallel_io_task])
+ Task = parallel_io_task
+ Job = ["a.1" -> "a.2", "A file"] completed
+ Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date
+ Completed Task = parallel_io_task
+
+ The 2nd job is up to date and will be skipped.
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/files_re.txt b/doc/_build/html/_sources/tutorials/manual/files_re.txt
new file mode 100644
index 0000000..5d8a396
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/files_re.txt
@@ -0,0 +1,135 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.files_re:
+
+###############################################################################################################
+|manual.files_re.chapter_num|: **@files_re**: Deprecated `syntax using regular expressions`
+###############################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@files_re <decorators.files_re>` syntax in detail
+
+
+ .. index::
+ pair: @files_re; Manual
+
+
+
+
+***************************************
+**@files_re**
+***************************************
+
+ This is older, now deprecated syntax.
+
+ **@files_re** combines the functionality of @transform, @collate and @merge in
+ one overloaded decorator.
+
+ This is the reason why its use is discouraged. **@files_re** syntax is far too overloaded
+ and context-dependent to support its myriad of different functions.
+
+ The following documentation is provided to help maintain historical **ruffus** usage.
+
+=======================================
+Transforming input and output filenames
+=======================================
+
+
+ For example, the following code from |manual.transform.chapter_num| takes files from
+ the previous pipeline task, and makes new output parameters with the ``.sums`` suffix
+ in place of the ``.chunks`` suffix:
+
+ ::
+
+ @transform(step_4_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ #
+ # calculate sums and sums of squares for all values in the input_file_name
+ # writing to output_file_name
+ ""
+
+ This can be written using @files_re equivalently:
+
+ ::
+
+ @files_re(step_4_split_numbers_into_chunks, r".chunks", r".sums")
+ def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ ""
+
+.. _manual.files_re.combine:
+.. index::
+ pair: combine; Manual
+
+=====================================================
+Collating many *inputs* into a single *output*
+=====================================================
+
+ Similarly, the following code from :ref:`|manual.collate.chapter_num| <manual.collate>` collects **inputs**
+ from the same species in the same directory:
+
+ ::
+
+ @collate('*.animals', # inputs = all *.animal files
+ regex(r'mammals.([^.]+)'), # regular expression
+ r'\1/animals.in_my_zoo', # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
+ This can be written using @files_re equivalently using the :ref:`combine<decorators.combine>` indicator:
+
+ ::
+
+ @files_re('*.animals', # inputs = all *.animal files
+ r'mammals.([^.]+)', # regular expression
+ combine(r'\1/animals.in_my_zoo'), # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
+
+
+==============================================================================
+Generating *input* and *output* parameter using regular expresssions
+==============================================================================
+
+ The following code generates additional *Input* prerequisite file names which match the original *Input* files names.
+
+ We want each job of our ``analyse()`` function to get corresponding pairs
+ of ``xx.chunks`` and ``xx.red_indian`` files when
+
+ ``*.chunks`` are generated by the task function ``split_up_problem()`` and
+ ``*.red_indian`` are generated by the task function ``make_red_indians()``:
+
+ ::
+
+ @follows(make_red_indians)
+ @transform(split_up_problem, # starting set of *inputs*
+ regex(r"(.*).chunks"), # regular expression
+ inputs([r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results" # xx.results
+ )
+ def analyse(input_filenames, output_file_name):
+ "Do analysis here"
+
+
+ The equivalent code using @files_re looks very similar:
+
+ ::
+
+ @follows(make_red_indians)
+ @files_re( split_up_problem, # starting set of *inputs*
+ r"(.*).chunks", # regular expression
+ [r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results") # xx.results
+ def analyse(input_filenames, output_file_name):
+ "Do analysis here"
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/follows.txt b/doc/_build/html/_sources/tutorials/manual/follows.txt
new file mode 100644
index 0000000..7486f9b
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/follows.txt
@@ -0,0 +1,193 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.follows:
+
+#################################################################################################
+|manual.follows.chapter_num| : `Arranging tasks into a pipeline with` **@follows**
+#################################################################################################
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@follows syntax in detail <decorators.follows>`
+
+ .. index::
+ pair: @follows; Manual
+
+
+***************************************
+**@follows**
+***************************************
+
+ The order in which stages or :term:`task`\ s of a pipeline are arranged are set
+ explicitly by the :ref:`@follows(...) <decorators.follows>` python decorator:
+
+ ::
+
+ from ruffus import *
+ import sys
+
+ def first_task():
+ print "First task"
+
+ @follows(first_task)
+ def second_task():
+ print "Second task"
+
+ @follows(second_task)
+ def final_task():
+ print "Final task"
+
+
+ the ``@follows`` decorator indicate that the ``first_task`` function precedes ``second_task`` in
+ the pipeline.
+
+
+.. note::
+
+ We shall see in :ref:`Chapter 2 <manual.tasks_as_input>` that the order of pipeline :term:`task`\ s can also be inferred implicitly
+ for the following decorators
+
+ * :ref:`@split(...) <manual.split>`
+ * :ref:`@transform(...) <manual.transform>`
+ * :ref:`@merge(...) <manual.merge>`
+ * :ref:`@collate(...) <manual.collate>`
+
+.. index::
+ pair: pipeline_run; Manual
+
+=====================
+Running
+=====================
+
+ Now we can run the pipeline by:
+ ::
+
+ pipeline_run([final_task])
+
+
+ Because ``final_task`` depends on ``second_task`` which depends on ``first_task`` , all
+ three functions will be executed in order.
+
+.. index::
+ pair: pipeline_printout_graph; Manual
+ pair: pipeline_printout; Manual
+
+
+=====================
+Displaying
+=====================
+
+ We can see a flowchart of our fledgling pipeline by executing:
+ ::
+
+ pipeline_printout_graph ( "manual_follows1.png",
+ "png",
+ [final_task],
+ no_key_legend=True)
+
+ producing the following flowchart
+
+ .. image:: ../../images/manual_follows1.png
+
+
+ or in text format with:
+ ::
+
+ pipeline_printout(sys.stdout, [final_task])
+
+ which produces the following:
+ ::
+
+ Task = first_task
+ Task = second_task
+ Task = final_task
+
+
+
+.. index::
+ pair: @follows; referring to functions before they are defined
+ pair: @follows; out of order
+.. _manual.follows.out_of_order:
+
+***************************************
+Defining pipeline tasks out of order
+***************************************
+
+ All this assumes that all your pipelined tasks are defined in order.
+ (``first_task`` before ``second_task`` before ``final_task``)
+
+ | This is usually the most sensible way to arrange your code.
+
+ If you wish to refer to tasks which are not yet defined, you can do so by quoting the function name as a string:
+
+ ::
+
+ @follows("second_task")
+ def final_task():
+ print "Final task"
+
+ You can refer to tasks (functions) in other modules, in which case the full
+ qualified name must be used:
+
+ ::
+
+ @follows("other_module.second_task")
+ def final_task():
+ print "Final task"
+
+.. index::
+ pair: @follows; multiple dependencies
+
+.. _manual.follows.multiple_dependencies:
+
+***************************************
+Multiple dependencies
+***************************************
+
+ Each task can depend on more than one antecedent task.
+
+ This can be indicated either by stacking ``@follows``:
+ ::
+
+ @follows(first_task)
+ @follows("second_task")
+ def final_task():
+ ""
+
+
+ or in a more concise way:
+ ::
+
+ @follows(first_task, "second_task")
+ def final_task():
+ ""
+
+.. _manual.follows.mkdir:
+
+.. index::
+ single: @follows; mkdir (Manual)
+ single: mkdir; @follows (Manual)
+
+
+******************************************************************************
+Making directories automatically with :ref:`mkdir <decorators.mkdir>`
+******************************************************************************
+
+ A common prerequisite for any computational task, is making sure that the destination
+ directories exist.
+
+ **Ruffus** provides special syntax to support this, using the special
+ :ref:`mkdir <decorators.mkdir>` dependency. For example:
+
+ ::
+
+ @follows(first_task, mkdir("output/results/here"))
+ def second_task():
+ print "Second task"
+
+ will make sure that ``output/results/here`` exists before `second_task` is run.
+
+ In other words, it will make the ``output/results/here`` directory if it does not exist.
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/jobs_limit.txt b/doc/_build/html/_sources/tutorials/manual/jobs_limit.txt
new file mode 100644
index 0000000..50822c6
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/jobs_limit.txt
@@ -0,0 +1,80 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.jobs_limit:
+
+####################################################################################################################
+|manual.jobs_limit.chapter_num|: `Manage concurrency for a specific task with` **@jobs_limit**
+####################################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax in detail
+
+ .. index::
+ pair: @jobs_limit; Manual
+
+
+=================
+**@jobs_limit**
+=================
+
+ Calling :ref:`pipeline_run(multiprocess = NNN)<pipeline_functions.pipeline_run>` allows
+ multiple jobs (from multiple independent tasks) to be run in parallel. However, there
+ are some operations which consume so many resources that we might want them to run
+ with less or no concurrency.
+
+ For example, we might want to download some files via FTP but the server restricts
+ requests from each IP address. Even if the rest of the pipeline is running 100 jobs in
+ parallel, the FTP downloading must be restricted to 2 files at a time. We would really
+ like to keep the pipeline running as is, but let this one operation run either serially,
+ or with little concurrency.
+
+
+ If setting ``multiprocess = NNN`` sets the pipeline-wide concurrency to ``NNN``, then
+ ``@jobs_limit(MMM)`` sets concurrency at a much finer level, at ``MMM`` just for jobs
+ in the indicated task.
+
+ The optional name (e.g. ``@jobs_limit(3, "ftp_download_limit")``) allows the same limit to
+ be shared across multiple tasks. To be pedantic: a limit of ``3`` jobs at a time would be applied
+ across all tasks which have a ``@jobs_limit`` named ``"ftp_download_limit"``:
+
+ ::
+
+ from ruffus import *
+
+ # make list of 10 files
+ @split(None, "*stage1")
+ def make_files(input_file, output_files):
+ for i in range(10):
+ if i < 5:
+ open("%d.small_stage1" % i, "w")
+ else:
+ open("%d.big_stage1" % i, "w")
+
+ @jobs_limit(3, "ftp_download_limit")
+ @transform(make_files, suffix(".small_stage1"), ".stage2")
+ def stage1_small(input_file, output_file):
+ open(output_file, "w")
+
+ @jobs_limit(3, "ftp_download_limit")
+ @transform(make_files, suffix(".big_stage1"), ".stage2")
+ def stage1_big(input_file, output_file):
+ open(output_file, "w")
+
+ @jobs_limit(5)
+ @transform([stage1_small, stage1_big], suffix(".stage2"), ".stage3")
+ def stage2(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([stage2], multiprocess = 10)
+
+ will run the 10 jobs of ``stage1_big`` and ``stage1_small`` 3 at a time (highlighted in blue),
+ a limit shared across the two tasks. ``stage2`` jobs run 5 at a time (in red).
+ These limits override the numbers set in ``pipeline_run`` (``multiprocess = 10``):
+
+ .. image:: ../../images/jobs_limit2.png
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/logging.txt b/doc/_build/html/_sources/tutorials/manual/logging.txt
new file mode 100644
index 0000000..49a69bd
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/logging.txt
@@ -0,0 +1,185 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.logging:
+
+###################################################################################################
+|manual.logging.chapter_num|: `Logging progress through a pipeline`
+###################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+
+
+ .. index::
+ pair: Logging; Manual
+
+
+ There are two parts to logging with **Ruffus**:
+
+ * Logging progress through the pipeline
+
+ This produces the sort of output displayed in this manual:
+
+ ::
+
+ >>> pipeline_run([parallel_io_task])
+ Task = parallel_io_task
+ Job = ["a.1" -> "a.2", "A file"] completed
+ Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date
+ Completed Task = parallel_io_task
+
+
+ * Logging your own messages from within your pipelined functions.
+
+ Because **Ruffus** may run these in separate process (multiprocessing), some
+ attention has to be paid to how to send and synchronise your log messages
+ across process boundaries.
+
+
+ We shall deal with these in turn.
+
+
+.. _manual.logging.pipeline:
+
+=================================
+Logging task/job completion
+=================================
+ By default, *Ruffus* logs each task and each job as it is completed to
+ ``sys.stderr``.
+
+ :ref:`pipeline_run() <pipeline_functions.pipeline_run>` includes an optional ``logger`` parameter which defaults to
+ ``stderr_logger``. Set this to ``black_hole_logger`` to turn off all tracking messages as
+ the pipeline runs:
+
+ ::
+
+ pipeline_run([pipelined_task], logger = black_hole_logger)
+
+
+**********************************
+Controlling logging verbosity
+**********************************
+ :ref:`pipeline_run() <pipeline_functions.pipeline_run>` currently has five levels of verbosity, set by the optional ``verbose``
+ parameter which defaults to 1:
+
+ ::
+
+ verbose = 0: nothing
+ verbose = 1: logs completed jobs/tasks;
+ verbose = 2: logs up to date jobs in incomplete tasks
+ verbose = 3: logs reason for running job
+ verbose = 4: logs messages useful only for debugging ruffus pipeline code
+
+
+ ``Verbose`` > 2 are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+
+**********************************
+Using your own logging
+**********************************
+ You can specify your own logging by providing a log object to :ref:`pipeline_run() <pipeline_functions.pipeline_run>` .
+ This log object should have ``debug()`` and ``info()`` methods.
+
+ Instead of writing your own, it is usually more convenient to use the python
+ `logging <http://docs.python.org/library/logging.html>`_
+ module which provides logging classes with rich functionality. The following sets up
+ a logger to a rotating set of files:
+
+ ::
+
+ import logging
+ import logging.handlers
+
+ LOG_FILENAME = '/tmp/ruffus.log'
+
+ # Set up a specific logger with our desired output level
+ my_ruffus_logger = logging.getLogger('My_Ruffus_logger')
+ my_ruffus_logger.setLevel(logging.DEBUG)
+
+ # Add the log message handler to the logger
+ handler = logging.handlers.RotatingFileHandler(
+ LOG_FILENAME, maxBytes=2000, backupCount=5)
+
+ my_ruffus_logger.addHandler(handler)
+
+
+ from ruffus import *
+
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ """Description: Create the file if it does not exists"""
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary], [create_if_necessary], logger=my_ruffus_logger)
+ print open("/tmp/ruffus.log").read()
+
+
+ The contents of ``/tmp/ruffus.log`` are as specified:
+ ::
+
+ Task = create_if_necessary
+ Description: Create the file if it does not exists
+ Job = [null -> "a.1"] completed
+
+.. _manual.logging.per_job:
+
+=======================================
+Your own logging *within* each job
+=======================================
+
+ It is often useful to log the messages from within each of your pipelined functions.
+
+ However, each job runs in a separate process, and it is *not* a good
+ idea to pass the logging object itself between jobs:
+
+ #. logging is not synchronised between processes
+ #. `logging <http://docs.python.org/library/logging.html>`_ objects can not be
+ `pickle <http://docs.python.org/library/pickle.html>`_\ d and sent across processes
+
+ The best thing to do is to have a centralised log and to have each job invoke the
+ logging methods (e.g. `debug`, `warning`, `info` etc.) across the process boundaries in
+ the centralised log.
+
+ The **Ruffus** :ref:`proxy_logger <proxy-logger>` module provides an easy way to share
+ `logging <http://docs.python.org/library/logging.html>`_ objects among
+ jobs. This requires just two simple steps:
+
+ .. note::
+ :ref:`The full code <manual.logging_code>` shows how this can be coded.
+
+
+
+****************************************
+ 1. Set up log from config file
+****************************************
+ ::
+
+ from ruffus.proxy_logger import *
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger",
+ {"file_name" :"/my/lg.log"})
+
+****************************************
+ 2. Give each job proxy to logger
+****************************************
+ Now, pass:
+
+ * ``logger_proxy`` (which forwards logging calls across jobs) and
+ * ``logging_mutex`` (which prevents different jobs which are logging simultaneously
+ from being jumbled up)
+
+ to each job::
+
+ @files(None, 'a.1', logger_proxy, logging_mutex)
+ def task1(ignore_infile, outfile, logger_proxy, logging_mutex):
+ """
+ Log within task
+ """
+ open(outfile, "w").write("Here we go")
+ with logging_mutex:
+ logger_proxy.info("Here we go logging")
+
diff --git a/doc/_build/html/_sources/tutorials/manual/logging_code.txt b/doc/_build/html/_sources/tutorials/manual/logging_code.txt
new file mode 100644
index 0000000..31ad70b
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/logging_code.txt
@@ -0,0 +1,187 @@
+.. include:: ../../global.inc
+.. _manual.logging_code:
+
+########################################################################################
+Code for Chapter 16: Logging progress through a pipeline
+########################################################################################
+ * :ref:`Manual overview <manual>`
+ * :ref:`Back <manual.logging.per_job>`
+
+ This example shows how to log messages from within each of your pipelined functions.
+
+
+************************************
+Code
+************************************
+ ::
+
+
+ from ruffus import *
+ from ruffus.proxy_logger import *
+ import logging
+
+ import sys,os
+ # use simplejson in place of json for python < 2.6
+ try:
+ import json
+ except ImportError:
+ import simplejson
+ json = simplejson
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Shared logging
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ logger_args={}
+ logger_args["file_name"] = "pipeline.log"
+ logger_args["level"] = logging.DEBUG
+ logger_args["rotating"] = True
+ logger_args["maxBytes"]=20000
+ logger_args["backupCount"]=10
+ logger_args["formatter"]="%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
+
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", logger_args)
+
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Helper Function which writes to a shared log
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ import time
+ def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ #
+ # dump parameters
+ params = (infiles, outfiles)# + extra_params[0:-3]
+ #
+ logger_proxy, logging_mutex = extra_params
+ with logging_mutex:
+ logger_proxy.debug("job = %s, process name = %s" %
+ (json.dumps(params),
+ multiprocessing.current_process().name))
+ #
+ #
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+ #
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Tasks
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ #
+ # task1
+ #
+ @files(None, 'a.1', logger_proxy, logging_mutex)
+ def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+ #
+ # task2
+ #
+ @transform(task1, regex('.1'), '.2', logger_proxy, logging_mutex)
+ def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+ #
+ # task3
+ #
+ @transform(task2, regex('.2'), '.3', logger_proxy, logging_mutex)
+ def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+ #
+ # task4
+ #
+ @transform(task3, regex('.3'), '.4', logger_proxy, logging_mutex)
+ def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+ #
+ # Necessary to protect the "entry point" of the program under windows.
+ # see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+ #
+ pipeline_run([task4], multiprocess = 10, logger = logger_proxy)
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([task4], multiprocess = 10, logger = logger_proxy)
+ job = [null, "a.1"]
+ job = ["a.1", "a.2"]
+ job = ["a.2", "a.3"]
+ job = ["a.3", "a.4"]
+
+ Pipeline.log will contain our unimaginative log messages:
+
+ ::
+
+ 2009-11-15 03:04:55,884 - my_logger - DEBUG - job = [null, "a.1"], process name = PoolWorker-2
+ 2009-11-15 03:04:56,941 - my_logger - INFO - Job = [None -> a.1, <LoggingProxy>, <thread.lock>] completed
+ 2009-11-15 03:04:56,942 - my_logger - INFO - Completed Task = task1
+ 2009-11-15 03:04:56,945 - my_logger - DEBUG - job = ["a.1", "a.2"], process name = PoolWorker-4
+ 2009-11-15 03:04:57,962 - my_logger - INFO - Job = [a.1 -> a.2, <LoggingProxy>, <thread.lock>] completed
+ 2009-11-15 03:04:57,962 - my_logger - INFO - Completed Task = task2
+ 2009-11-15 03:04:57,965 - my_logger - DEBUG - job = ["a.2", "a.3"], process name = PoolWorker-3
+ 2009-11-15 03:04:59,009 - my_logger - INFO - Job = [a.2 -> a.3, <LoggingProxy>, <thread.lock>] completed
+ 2009-11-15 03:04:59,010 - my_logger - INFO - Completed Task = task3
+ 2009-11-15 03:04:59,013 - my_logger - DEBUG - job = ["a.3", "a.4"], process name = PoolWorker-5
+ 2009-11-15 03:05:00,024 - my_logger - INFO - Job = [a.3 -> a.4, <LoggingProxy>, <thread.lock>] completed
+ 2009-11-15 03:05:00,025 - my_logger - INFO - Completed Task = task4
+
diff --git a/doc/_build/html/_sources/tutorials/manual/manual_code.txt b/doc/_build/html/_sources/tutorials/manual/manual_code.txt
new file mode 100644
index 0000000..084d016
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/manual_code.txt
@@ -0,0 +1,12 @@
+.. include:: ../../global.inc
+.. _Manual_code:
+
+
+############################################################
+Code for the manual tutorial
+############################################################
+
+ * :ref:`The **Ruffus** manual<manual.introduction>`
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/manual_contents.txt b/doc/_build/html/_sources/tutorials/manual/manual_contents.txt
new file mode 100644
index 0000000..737919f
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/manual_contents.txt
@@ -0,0 +1,30 @@
+.. include:: ../../global.inc
+.. _manual:
+
+####################################################################
+**Ruffus** Manual: Table of Contents:
+####################################################################
+
+ :ref:`manual_introduction <manual.introduction>`
+
+ #) :ref:`follows <manual.follows>`
+ #) :ref:`tasks_as_recipes <manual.tasks_as_recipes>`
+ #) :ref:`files <manual.files>`
+ #) :ref:`tasks_and_globs_in_inputs <manual.tasks_and_globs_in_inputs>`
+ #) :ref:`tracing_pipeline_parameters <manual.tracing_pipeline_parameters>`
+ #) :ref:`parallel_processing <manual.multiprocessing>`
+ #) :ref:`split <manual.split>`
+ #) :ref:`transform <manual.transform>`
+ #) :ref:`merge <manual.merge>`
+ #) :ref:`posttask <manual.posttask>`
+ #) :ref:`jobs_limit <manual.jobs_limit>`
+ #) :ref:`dependencies <manual.dependencies>`
+ #) :ref:`onthefly <manual.on_the_fly>`
+ #) :ref:`collate <manual.collate>`
+ #) :ref:`advanced_transform <manual.advanced_transform>`
+ #) :ref:`parallel <manual.parallel>`
+ #) :ref:`check_if_uptodate <manual.check_if_uptodate>`
+ #) :ref:`exceptions <manual.exceptions>`
+ #) :ref:`logging <manual.logging>`
+ #) :ref:`files_re <manual.files_re>`
+
diff --git a/doc/_build/html/_sources/tutorials/manual/manual_introduction.txt b/doc/_build/html/_sources/tutorials/manual/manual_introduction.txt
new file mode 100644
index 0000000..ea58a1a
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/manual_introduction.txt
@@ -0,0 +1,224 @@
+.. include:: ../../global.inc
+.. _manual.introduction:
+
+.. index::
+ pair: manual; introduction
+
+
+####################################################################
+**Ruffus** Manual
+####################################################################
+
+| The chapters of this manual go through each of the features of **Ruffus** in turn.
+| Some of these (especially those labelled **esoteric** or **deprecated**) may not
+ be of interest to all users of **Ruffus**.
+
+If you are looking for a quick introduction to **Ruffus**, you may want to look at the
+:ref:`Simple Tutorial <Simple_Tutorial>` first, some of which content is shared with,
+or elaborated on, by this manual.
+
+
+***************************************
+Introduction
+***************************************
+
+ The **Ruffus** module is a lightweight way to run computational pipelines.
+
+ Computational pipelines often become quite simple
+ if we breakdown the process into simple stages.
+
+ .. note::
+
+ Ruffus refers to each stage of your pipeline as a :term:`task`.
+
+ | Let us start with the usual "Hello World".
+ | We have the following two python functions which
+ we would like to turn into an automatic pipeline:
+
+
+ .. image:: ../../images/simple_tutorial_hello_world.png
+
+ .. ::
+
+ ::
+
+ def first_task():
+ print "Hello "
+
+ def second_task():
+ print "world"
+
+
+ The simplest **Ruffus** pipeline would look like this:
+
+ .. image:: ../../images/simple_tutorial_intro_follows.png
+
+ .. ::
+
+ ::
+
+ from ruffus import *
+
+ def first_task():
+ print "Hello "
+
+ @follows(first_task)
+ def second_task():
+ print "world"
+
+ pipeline_run([second_task])
+
+
+ The functions which do the actual work of each stage of the pipeline remain unchanged.
+ The role of **Ruffus** is to make sure these functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if desired.
+
+ There are three simple parts to building a **ruffus** pipeline
+
+ #. importing ruffus
+ #. "Decorating" functions which are part of the pipeline
+ #. Running the pipeline!
+
+.. _manual.introduction.import:
+
+.. index::
+ single: importing ruffus
+
+
+****************************
+Importing ruffus
+****************************
+
+ The most convenient way to use ruffus is to import the various names directly:
+
+ ::
+
+ from ruffus import *
+
+ This will allow **ruffus** terms to be used directly in your code. This is also
+ the style we have adopted for this manual.
+
+ .. csv-table::
+ :header: "Category", "Terms"
+ :stub-columns: 1
+
+ "*Pipeline functions*", "
+ ::
+
+ pipeline_printout
+ pipeline_printout_graph
+ pipeline_run
+ register_cleanup"
+ "*Decorators*", "
+ ::
+
+ @follows
+ @files
+ @split
+ @transform
+ @merge
+ @collate
+ @posttask
+ @jobs_limit
+ @parallel
+ @check_if_uptodate
+ @files_re"
+ "*Loggers*", "
+ ::
+
+ stderr_logger
+ black_hole_logger"
+ "*Parameter disambiguating Indicators*", "
+ ::
+
+ suffix
+ regex
+ inputs
+ touch_file
+ combine
+ mkdir
+ output_from"
+
+If any of these clash with names in your code, you can use qualified names instead:
+ ::
+
+ import ruffus
+
+ ruffus.pipeline_printout("...")
+
+
+.. index::
+ pair: decorators; Manual
+
+.. _manual.introduction.decorators:
+
+****************************
+"Decorating" functions
+****************************
+
+ You need to tag or :term:`decorator` existing code to tell **Ruffus** that they are part
+ of the pipeline.
+
+ .. note::
+
+ :term:`decorator`\ s are ways to tag or mark out functions.
+
+ They start with an ``@`` prefix and take a number of parameters in parenthesis.
+
+ .. image:: ../../images/simple_tutorial_decorator_syntax.png
+
+ The **ruffus** decorator :ref:`@follows <decorators.follows>` makes sure that
+ ``second_task`` follows ``first_task``.
+
+
+ | Multiple :term:`decorator`\ s can be used for each :term:`task` function to add functionality
+ to *Ruffus* pipeline functions.
+ | However, the decorated python functions can still be
+ called normally, outside of *Ruffus*.
+ | *Ruffus* :term:`decorator`\ s can be added to (stacked on top of) any function in any order.
+
+ * :ref:`More on @follows in |manual.follows.chapter_num| <manual.follows>`
+ * :ref:`@follows syntax in detail <decorators.follows>`
+
+.. index::
+ pair: Running the pipeline; Manual
+ pair: pipeline_run; Manual
+
+.. _manual.introduction.running_pipeline:
+
+****************************
+Running the pipeline
+****************************
+
+ We run the pipeline by specifying the **last** stage (:term:`task` function) of your pipeline.
+ Ruffus will know what other functions this depends on, following the appropriate chain of
+ dependencies automatically, making sure that the entire pipeline is up-to-date.
+
+ In our example above, because ``second_task`` depends on ``first_task``, both functions are executed in order.
+
+ ::
+
+ >>> pipeline_run([second_task], verbose = 1)
+
+ **Ruffus** by default prints out the ``verbose`` progress through your pipeline,
+ interleaved with our ``Hello`` and ``World``.
+
+ .. image:: ../../images/simple_tutorial_hello_world_output.png
+
+ .. ::
+
+ ::
+
+ >>> pipeline_run([second_task], verbose = 1)
+ Start Task = first_task
+ Hello
+ Job completed
+ Completed Task = first_task
+ Start Task = second_task
+ world
+ Job completed
+ Completed Task = second_task
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/merge.txt b/doc/_build/html/_sources/tutorials/manual/merge.txt
new file mode 100644
index 0000000..8dd0227
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/merge.txt
@@ -0,0 +1,102 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.merge:
+
+#########################################################################################
+|manual.merge.chapter_num|: **Merge** `multiple input into a single result`
+#########################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@merge <decorators.merge>` syntax in detail
+
+ At the conclusion of our pipeline, or at key selected points, we might need a
+ summary of our progress, gathering data from a multitude of files or disparate *inputs*,
+ and summarised in the *output* of a single :term:`job`.
+
+ *Ruffus* uses the :ref:`@merge <decorators.merge>` decorator for this purpose.
+
+ Although, **@merge** tasks multiple *inputs* and produces a single *output*, **Ruffus**
+ is again agnostic as to the sort of data contained within *output*. It can be a single
+ (string) file name, or an arbitrary complicated nested structure with numbers, objects etc.
+ As always, strings contained (even with nested sequences) within *output* will be treated
+ as file names for the purpose of checking if the :term:`task` is up-to-date.
+
+ .. index::
+ pair: @merge; Manual
+
+
+=================
+**@merge**
+=================
+
+This example is borrowed from :ref:`step 6 <Simple_Tutorial_6th_step>` of the simple tutorial.
+
+ .. note:: :ref:`Accompanying Python Code <Simple_Tutorial_6th_step_code>`
+
+**************************************************************************************
+Combining partial solutions: Calculating variances
+**************************************************************************************
+
+ .. csv-table::
+ :widths: 1,99
+ :class: borderless
+
+ ".. centered::
+ Step 6 from:
+
+ .. image:: ../../images/simple_tutorial_step5_sans_key.png", "
+ We wanted to calculate the sample variance of a large list of random numbers. We
+ have seen previously how we can split up this large problem into small pieces
+ (using :ref:`@split <manual.split>` in |manual.split.chapter_num|), and work out the
+ partial solutions for each sub-problem (calculating sums with :ref:`@transform<manual.transform>`
+ in |manual.transform.chapter_num| ).
+
+ All that remains is to join up the partial solutions from the different ``.sums`` files
+ and turn these into the variance as follows::
+
+ variance = (sum_squared - sum * sum / N)/N
+
+ where ``N`` is the number of values
+
+ See the `wikipedia <http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ entry for a discussion of
+ why this is a very naive approach!"
+
+
+
+ To do this, all we have to do is go through all the values in ``*.sums``, i.e.
+ add up the ``sums`` and ``sum_squared`` for each chunk. We can then apply the above (naive) formula.
+
+ Merging files is straightforward in **Ruffus**:
+ ::
+
+ @merge(step_5_calculate_sum_of_squares, "variance.result")
+ def step_6_calculate_variance (input_file_names, output_file_name):
+ #
+ # add together sums and sums of squares from each input_file_name
+ # calculate variance and write to output_file_name
+ ""
+
+
+ The :ref:`@merge <decorators.merge>` decorator tells *Ruffus* to take all the files from the step 5 task (i.e. ``*.sums``),
+ and produced a merge file in the form of ``variance.result``.
+
+ Thus if ``step_5_calculate_sum_of_squares`` created
+ | ``1.sums`` and
+ | ``2.sums`` etc.
+
+ This would result in the following function call:
+
+ ::
+
+ step_6_calculate_variance (["1.sums", "2.sums"], "variance.result")
+
+
+ The final result is, of course, in ``variance.result``.
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/onthefly.txt b/doc/_build/html/_sources/tutorials/manual/onthefly.txt
new file mode 100644
index 0000000..75f32d7
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/onthefly.txt
@@ -0,0 +1,138 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.on_the_fly:
+
+#######################################################################################################
+|manual.on_the_fly.chapter_num|: `Generating parameters on the fly with` **@files**
+#######################################################################################################
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@files on-the-fly syntax in detail <decorators.files_on_the_fly>`
+
+ | Sometimes, it is necessary, or perhaps more convenient, to generate parameters on the fly or
+ at runtime.
+ | This powerful ability to generate the exact parameters you need is
+ sometimes worth the slight increase in complexity.
+
+ .. index::
+ pair: @files; Tutorial on-the-fly parameter generation
+
+
+***********************
+**@files**
+***********************
+ To generate parameters on the fly, pass the decorator **files** with a :term:`generator` function which
+ yields one list / tuple of parameters per job. For example::
+
+ from ruffus import *
+ def generate_parameters_on_the_fly():
+ """
+ returns one list of parameters per job
+ """
+ parameters = [
+ ['A.input', 'A.output', (1, 2)], # 1st job
+ ['B.input', 'B.output', (3, 4)], # 2nd job
+ ['C.input', 'C.output', (5, 6)], # 3rd job
+ ]
+ for job_parameters in parameters:
+ yield job_parameters
+
+ @files(generate_parameters_on_the_fly)
+ def pipeline_task(input, output, extra):
+ open(output, "w").write(open(input).read())
+ sys.stderr.write("%d + %d => %d\n" % (extra[0] , extra[1], extra[0] + extra[1]))
+
+ pipeline_run([pipeline_task])
+
+
+ .. ???
+
+ Produces::
+
+ Task = parallel_task
+ 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+
+
+ .. note::
+
+ Be aware that the parameter generating function may be invoked
+ :ref:`more than once<manual.dependencies.checking_multiple_times>`:
+
+ | The first time to check if this part of the pipeline is up-to-date.
+ | The second time when the pipeline task function is run.
+
+ The resulting *inputs*, *outputs* and any additional extra parameters per job are
+ treated normally for the purposes of checking to see if jobs are up-to-date and
+ need to be re-run.
+
+
+**********************************************
+ Permutations and Combinations
+**********************************************
+
+ The :ref:`accompanying example<manual.on_the_fly_code>` provides a more realistic reason why
+ you would want to generate parameters on the fly. It is a fun piece of code, which generates
+ N x M combinations from two sets of files as the *inputs* of a pipeline stage.
+
+ The *inputs* / *outputs* filenames are generated as a pair of nested for-loops to produce
+ the N (outside loop) x M (inside loop) combinations, with the appropriate parameters
+ for each job ``yield``\ed per iteration of the inner loop. The gist of this is:
+
+ ::
+
+ #_________________________________________________________________________________________
+ #
+ # Step 1:
+ #
+ # N x M jobs
+ #_________________________________________________________________________________________
+ def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+ for sim_file in get_simulation_files():
+ for (gene, gwas) in get_gene_gwas_file_pairs():
+ result_file = "%s.%s.results" % (gene, sim_file)
+ yield (gene, gwas, sim_file), result_file
+
+
+
+ @files(generate_simulation_params)
+ def gwas_simulation(input_files, output_file):
+ "..."
+
+ If ``get_gene_gwas_file_pairs()`` produces:
+ ::
+
+ ['a.sim', 'b.sim', 'c.sim']
+
+ and ``get_gene_gwas_file_pairs()`` produces:
+ ::
+
+ [('1.gene', '1.gwas'), ('2.gene', '2.gwas')]
+
+ then we would end up with ``3`` x ``2`` = ``6`` jobs and the following equivalent function calls:
+
+ ::
+
+ gwas_simulation(('1.gene', '1.gwas', 'a.sim'), "1.gene.a.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'a.sim'), "2.gene.a.sim.results")
+ gwas_simulation(('1.gene', '1.gwas', 'b.sim'), "1.gene.b.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'b.sim'), "2.gene.b.sim.results")
+ gwas_simulation(('1.gene', '1.gwas', 'c.sim'), "1.gene.c.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'c.sim'), "2.gene.c.sim.results")
+
+
+ The :ref:`accompanying code<manual.on_the_fly_code>` looks slightly more complicated because
+ of some extra bookkeeping.
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/onthefly_code.txt b/doc/_build/html/_sources/tutorials/manual/onthefly_code.txt
new file mode 100644
index 0000000..2b10d0c
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/onthefly_code.txt
@@ -0,0 +1,320 @@
+.. include:: ../../global.inc
+.. _manual.on_the_fly_code:
+
+###################################################################
+Code for Chapter 10: Generating parameters on the fly
+###################################################################
+ * :ref:`Manual overview <manual>`
+ * :ref:`@files on-the-fly syntax in detail <decorators.files_on_the_fly>`
+ * :ref:`Back <manual.on_the_fly>`
+
+
+ | This script takes N pairs of input file pairs (with the suffices .gene and .gwas)
+ | and runs them against M sets of simulation data (with the suffix .simulation)
+ | A summary per input file pair is then produced
+
+
+ In pseudo-code:
+
+ STEP_1:
+
+ ::
+
+ for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res
+
+
+ STEP_2:
+
+ ::
+
+ for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean
+
+
+ | n = CNT_GENE_GWAS_FILES
+ | m = CNT_SIMULATION_FILES
+
+************************************
+Code
+************************************
+ ::
+
+ from ruffus import *
+ import os
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # constants
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ working_dir = "temp_NxM"
+ simulation_data_dir = os.path.join(working_dir, "simulation")
+ gene_data_dir = os.path.join(working_dir, "gene")
+ CNT_GENE_GWAS_FILES = 2
+ CNT_SIMULATION_FILES = 3
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ import os, sys
+ from itertools import izip
+ import glob
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Functions
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ #_________________________________________________________________________________________
+ #
+ # get gene gwas file pairs
+ #
+ #_________________________________________________________________________________________
+ def get_gene_gwas_file_pairs( ):
+ """
+ Helper function to get all *.gene, *.gwas from the direction specified
+ in --gene_data_dir
+
+ Returns
+ file pairs with both .gene and .gwas extensions,
+ corresponding roots (no extension) of each file
+ """
+ gene_files = glob.glob(os.path.join(gene_data_dir, "*.gene"))
+ gwas_files = glob.glob(os.path.join(gene_data_dir, "*.gwas"))
+ #
+ common_roots = set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gene_files))
+ common_roots &=set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gwas_files))
+ common_roots = list(common_roots)
+ #
+ p = os.path; g_dir = gene_data_dir
+ file_pairs = [[p.join(g_dir, x + ".gene"), p.join(g_dir, x + ".gwas")] for x in common_roots]
+ return file_pairs, common_roots
+
+ #_________________________________________________________________________________________
+ #
+ # get simulation files
+ #
+ #_________________________________________________________________________________________
+ def get_simulation_files( ):
+ """
+ Helper function to get all *.simulation from the direction specified
+ in --simulation_data_dir
+ Returns
+ file with .simulation extensions,
+ corresponding roots (no extension) of each file
+ """
+ simulation_files = glob.glob(os.path.join(simulation_data_dir, "*.simulation"))
+ simulation_roots =map(lambda x: os.path.splitext(os.path.split(x)[1])[0], simulation_files)
+ return simulation_files, simulation_roots
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Main logic
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+
+
+ #_________________________________________________________________________________________
+ #
+ # setup_simulation_data
+ #
+ #_________________________________________________________________________________________
+
+ #
+ # mkdir: makes sure output directories exist before task
+ #
+ @follows(mkdir(gene_data_dir, simulation_data_dir))
+ def setup_simulation_data ():
+ """
+ create simulation files
+ """
+ for i in range(CNT_GENE_GWAS_FILES):
+ open(os.path.join(gene_data_dir, "%03d.gene" % i), "w")
+ open(os.path.join(gene_data_dir, "%03d.gwas" % i), "w")
+ #
+ # gene files without corresponding gwas and vice versa
+ open(os.path.join(gene_data_dir, "orphan1.gene"), "w")
+ open(os.path.join(gene_data_dir, "orphan2.gwas"), "w")
+ open(os.path.join(gene_data_dir, "orphan3.gwas"), "w")
+ #
+ for i in range(CNT_SIMULATION_FILES):
+ open(os.path.join(simulation_data_dir, "%03d.simulation" % i), "w")
+
+
+
+
+ #_________________________________________________________________________________________
+ #
+ # cleanup_simulation_data
+ #
+ #_________________________________________________________________________________________
+ def try_rmdir (d):
+ if os.path.exists(d):
+ try:
+ os.rmdir(d)
+ except OSError:
+ sys.stderr.write("Warning:\t%s is not empty and will not be removed.\n" % d)
+
+
+
+ def cleanup_simulation_data ():
+ """
+ cleanup files
+ """
+ sys.stderr.write("Cleanup working directory and simulation files.\n")
+ #
+ # cleanup gene and gwas files
+ #
+ for f in glob.glob(os.path.join(gene_data_dir, "*.gene")):
+ os.unlink(f)
+ for f in glob.glob(os.path.join(gene_data_dir, "*.gwas")):
+ os.unlink(f)
+ try_rmdir(gene_data_dir)
+ #
+ # cleanup simulation
+ #
+ for f in glob.glob(os.path.join(simulation_data_dir, "*.simulation")):
+ os.unlink(f)
+ try_rmdir(simulation_data_dir)
+ #
+ # cleanup working_dir
+ #
+ for f in glob.glob(os.path.join(working_dir, "simulation_results", "*.simulation_res")):
+ os.unlink(f)
+ try_rmdir(os.path.join(working_dir, "simulation_results"))
+ #
+ for f in glob.glob(os.path.join(working_dir, "*.mean")):
+ os.unlink(f)
+ try_rmdir(working_dir)
+
+
+ #_________________________________________________________________________________________
+ #
+ # Step 1:
+ #
+ # for n_file in NNN_pairs_of_input_files:
+ # for m_file in MMM_simulation_data:
+ #
+ # [n_file.gene,
+ # n_file.gwas,
+ # m_file.simulation] -> working_dir/n_file.m_file.simulation_res
+ #
+ #_________________________________________________________________________________________
+ def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+ simulation_files, simulation_file_roots = get_simulation_files()
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+ #
+ for sim_file, sim_file_root in izip(simulation_files, simulation_file_roots):
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+ #
+ result_file = "%s.%s.simulation_res" % (gene_file_root, sim_file_root)
+ result_file_path = os.path.join(working_dir, "simulation_results", result_file)
+ #
+ yield [gene, gwas, sim_file], result_file_path, gene_file_root, sim_file_root, result_file
+
+
+
+ #
+ # mkdir: makes sure output directories exist before task
+ #
+ @follows(mkdir(working_dir, os.path.join(working_dir, "simulation_results")))
+ @files(generate_simulation_params)
+ def gwas_simulation(input_files, result_file_path, gene_file_root, sim_file_root, result_file):
+ """
+ Dummy calculation of gene gwas vs simulation data
+ Normally runs in parallel on a computational cluster
+ """
+ (gene_file,
+ gwas_file,
+ simulation_data_file) = input_files
+ #
+ simulation_res_file = open(result_file_path, "w")
+ simulation_res_file.write("%s + %s -> %s\n" % (gene_file_root, sim_file_root, result_file))
+
+
+ #_________________________________________________________________________________________
+ #
+ # Step 2:
+ #
+ # Statistical summary per gene/gwas file pair
+ #
+ # for n_file in NNN_pairs_of_input_files:
+ # working_dir/simulation_results/n.*.simulation_res
+ # -> working_dir/n.mean
+ #
+ #_________________________________________________________________________________________
+
+
+ @collate(gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")
+ @posttask(lambda : sys.stdout.write("\nOK\n"))
+ def statistical_summary (result_files, summary_file):
+ """
+ Simulate statistical summary
+ """
+ summary_file = open(summary_file, "w")
+ for f in result_files:
+ summary_file.write(open(f).read())
+
+
+
+ pipeline_run([setup_simulation_data], multiprocess = 5, verbose = 2)
+ pipeline_run([statistical_summary], multiprocess = 5, verbose = 2)
+
+ # uncomment to printout flowchar
+ #
+ # pipeline_printout(sys.stdout, [statistical_summary], verbose=2)
+ # graph_printout ("flowchart.jpg", "jpg", [statistical_summary])
+ #
+
+ cleanup_simulation_data ()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([setup_simulation_data], multiprocess = 5, verbose = 2)
+ Make directories [temp_NxM/gene, temp_NxM/simulation] completed
+ Completed Task = setup_simulation_data_mkdir_1
+ Job completed
+ Completed Task = setup_simulation_data
+
+
+ >>> pipeline_run([statistical_summary], multiprocess = 5, verbose = 2)
+ Make directories [temp_NxM, temp_NxM/simulation_results] completed
+ Completed Task = gwas_simulation_mkdir_1
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/001.000.simulation_res, 001, 000, 001.000.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/000.000.simulation_res, 000, 000, 000.000.simulation_res] completed
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/001.001.simulation_res, 001, 001, 001.001.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/000.001.simulation_res, 000, 001, 000.001.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/000.002.simulation_res, 000, 002, 000.002.simulation_res] completed
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/001.002.simulation_res, 001, 002, 001.002.simulation_res] completed
+ Completed Task = gwas_simulation
+ Job = [[temp_NxM/simulation_results/000.000.simulation_res, temp_NxM/simulation_results/000.001.simulation_res, temp_NxM/simulation_results/000.002.simulation_res] -> temp_NxM/000.mean] completed
+ Job = [[temp_NxM/simulation_results/001.000.simulation_res, temp_NxM/simulation_results/001.001.simulation_res, temp_NxM/simulation_results/001.002.simulation_res] -> temp_NxM/001.mean] completed
diff --git a/doc/_build/html/_sources/tutorials/manual/parallel.txt b/doc/_build/html/_sources/tutorials/manual/parallel.txt
new file mode 100644
index 0000000..2f05355
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/parallel.txt
@@ -0,0 +1,65 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.parallel:
+
+#######################################################################################################################
+|manual.parallel.chapter_num|: `Esoteric: Running jobs in parallel without using files with` **@parallel**
+#######################################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@parallel<decorators.parallel>` syntax in detail
+
+
+ .. index::
+ pair: @parallel; Manual
+
+
+
+
+
+***************************************
+**@parallel**
+***************************************
+
+ **@parallel** supplies parameters for multiple **jobs** exactly like :ref:`@files<manual.files>` except that:
+
+ #. The first two parameters are not treated like *inputs* and *ouputs* parameters,
+ and strings are not assumed to be file names
+ #. Thus no checking of whether each job is up-to-date is made using *inputs* and *outputs* files
+ #. No expansions of |glob|_ patterns or *output* from previous tasks is carried out.
+
+ This syntax is most useful when a pipeline stage does not involve creating or consuming any files, and
+ you wish to forego the conveniences of :ref:`@files<manual.files>`, :ref:`@transform<manual.transform>` etc.
+
+ The following code performs some arithmetic in parallel:
+
+ ::
+
+ import sys
+ from ruffus import *
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\n" % (param1, param2, param1 + param2))
+
+ pipeline_run([parallel_task])
+
+ produces the following::
+
+ Task = parallel_task
+ Parallel task A: 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ Parallel task B: 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ Parallel task C: 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/parallel_processing.txt b/doc/_build/html/_sources/tutorials/manual/parallel_processing.txt
new file mode 100644
index 0000000..08c4143
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/parallel_processing.txt
@@ -0,0 +1,52 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.multiprocessing:
+
+#######################################################################################
+|manual.multiprocessing.chapter_num|: `Running Tasks and Jobs in parallel`
+#######################################################################################
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+
+=====================
+Multi Processing
+=====================
+
+ *Ruffus* uses python `multiprocessing <http://docs.python.org/library/multiprocessing.html>`_ to run
+ each job in a separate process.
+
+ This means that jobs do *not* necessarily complete in the order of the defined parameters.
+ Task hierachies are, of course, inviolate: upstream tasks run before downstream, dependent tasks.
+
+ Tasks that are independent (i.e. do not precede each other) may be run in parallel as well.
+
+ The number of concurrent jobs can be set in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:
+
+ ::
+
+ pipeline_run([parallel_task], multiprocess = 5)
+
+
+ If ``multiprocess`` is set to 1, then jobs will be run on a single process.
+
+
+=====================
+Data sharing
+=====================
+
+ Running jobs in separate processes allows *Ruffus* to make full use of the multiple
+ processors in modern computers. However, some of the
+ `multiprocessing guidelines <http://docs.python.org/library/multiprocessing.html#multiprocessing-programming>`_
+ should be borne in mind when writing *Ruffus* pipelines. In particular:
+
+ * Try not to pass large amounts of data between jobs, or at least be aware that this has to be marshalled
+ across process boundaries.
+
+ * Only data which can be `pickled <http://docs.python.org/library/pickle.html>`_ can be passed as
+ parameters to *Ruffus* task functions. Happily, that applies to almost any Python data type.
+ The use of the rare, unpicklable object will cause python to complain (fail) loudly when *Ruffus* pipelines
+ are run.
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/posttask.txt b/doc/_build/html/_sources/tutorials/manual/posttask.txt
new file mode 100644
index 0000000..d974889
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/posttask.txt
@@ -0,0 +1,110 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.posttask:
+
+####################################################################################################################
+|manual.posttask.chapter_num|: `Signal the completion of each stage of our pipeline with` **@posttask**
+####################################################################################################################
+
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@posttask <decorators.posttask>` syntax in detail
+
+ It is often useful to signal the completion of each task by specifying a specific
+ action to be taken or function to be called. This can range from
+ printing out some message, or ``touching`` some sentinel file,
+ to emailing the author.This is particular useful if the :term:`task` is a recipe apply to an unspecified number
+ of parameters in parallel in different :term:`job`\ s. If the task is never run, or if it
+ fails, needless-to-say no task completion action will happen.
+
+
+ *Ruffus* uses the :ref:`@posttask <decorators.posttask>` decorator for this purpose.
+
+ .. index::
+ pair: @posttask; Manual
+
+
+=================
+**@posttask**
+=================
+
+ We can signal the completion of each task by specifying
+ one or more function(s) using ``@posttask`` ::
+
+ from ruffus import *
+
+ def task_finished():
+ print "hooray"
+
+ @posttask(task_finished)
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+ This is such a short function, we might as well write it in-line:
+
+ ::
+
+ @posttask(lambda: sys.stdout.write("hooray\n"))
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+
+.. note::
+
+ The function(s) provided to ``@posttask`` will be called if the pipeline passes
+ through a task, even if none of its jobs are run because they are up-to-date.
+ This happens when a upstream task is out-of-date, and the execution passes through
+ this point in the pipeline. See the example in :ref:`Chapter 9<manual.dependencies>`
+ of this manual.
+
+
+.. index::
+ single: @posttask; touchfile (Manual)
+ single: touchfile ; @posttask (Manual)
+
+
+.. _manual.posttask.touch_file:
+
+============================================
+:ref:`touch_file<decorators.touch_file>`
+============================================
+
+ The most common way to note the completion of a task is to create some sort of
+ "flag" file. Each stage in a traditional ``make`` pipeline would contain a
+ ``touch completed.flag``.
+
+ This is so common that **Ruffus** provides a special shorthand called
+ :ref:`touch_file<decorators.touch_file>`::
+
+ from ruffus import *
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+=======================================
+Adding several post task actions
+=======================================
+ You can, of course, add more than one different action to be taken on completion of the
+ task, either by stacking up as many :ref:`@posttask<decorators.posttask>` decorators
+ as necessary, or by including several functions in the same **@posttask**:
+
+ ::
+
+ @posttask(print_hooray, print_whoppee)
+ @posttask(print_hip_hip, touch_file("sentinel_flag"))
+ @files(None, "a.1")
+ def your_pipeline_function (input_file_names, output_file_name):
+ ""
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/split.txt b/doc/_build/html/_sources/tutorials/manual/split.txt
new file mode 100644
index 0000000..f7689fd
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/split.txt
@@ -0,0 +1,177 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.split:
+
+###################################################################################
+|manual.split.chapter_num|: `Splitting up large tasks / files with` **@split**
+###################################################################################
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@split <decorators.split>` syntax in detail
+
+ A common requirement in computational pipelines is to split up a large task into
+ small jobs which can be run on different processors, (or sent to a computational
+ cluster). Very often, the number of jobs depends dynamically on the size of the
+ task, and cannot be known for sure beforehand.
+
+ *Ruffus* uses the :ref:`@split <decorators.split>` decorator to indicate that
+ the :term:`task` function will produce an indeterminate number of output files.
+
+
+
+ .. index::
+ pair: @split; Manual
+
+
+=================
+**@split**
+=================
+This example is borrowed from :ref:`step 4 <Simple_Tutorial_5th_step>` of the simple tutorial.
+
+ .. note :: See :ref:`accompanying Python Code <Simple_Tutorial_5th_step_code>`
+
+**************************************************************************************
+Splitting up a long list of random numbers to calculate their variance
+**************************************************************************************
+
+ .. csv-table::
+ :widths: 1,99
+ :class: borderless
+
+ ".. centered::
+ Step 5 from the tutorial:
+
+ .. image:: ../../images/simple_tutorial_step5_sans_key.png", "
+ Suppose we had a list of 100,000 random numbers in the file ``random_numbers.list``:
+
+ ::
+
+ import random
+ f = open('random_numbers.list', 'w')
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write('%g\n' % (random.random() * 100.0))
+
+
+ We might want to calculate the sample variance more quickly by splitting them
+ into ``NNN`` parcels of 1000 numbers each and working on them in parallel.
+ In this case we known that ``NNN == 100`` but usually the number of resulting files
+ is only apparent after we have finished processing our starting file."
+
+
+ Our pipeline function needs to take the random numbers file ``random_numbers.list``,
+ read the random numbers from it, and write to a new file every 100 lines.
+
+ The *Ruffus* decorator :ref:`@split<decorators.split>` is designed specifically for
+ splitting up *inputs* into an indeterminate ``NNN`` number of *outputs*:
+
+ .. image:: ../../images/simple_tutorial_split.png
+
+ .. ::
+
+ ::
+
+ @split("random_numbers.list", "*.chunks")
+ def step_4_split_numbers_into_chunks (input_file_name, output_files):
+ #
+ """code goes here"""
+
+
+ Ruffus will set
+
+ | ``input_file_name`` to ``"random_numbers.list"``
+ | ``output_files`` to all files which match ``*.chunks`` (i.e. ``"1.chunks"``, ``"2.chunks"`` etc.).
+
+
+
+.. _manual.split.output_files:
+
+=================
+Output files
+=================
+
+ The *output* (second) parameter of **@split** usually contains a
+ |glob|_ pattern like the ``*.chunks`` above.
+
+ .. note::
+ **Ruffus** is quite relaxed about the contents of the ``output`` parameter.
+ Strings are treated as file names. Strings containing |glob|_ pattern are expanded.
+ Other types are passed verbatim to the decorated task function.
+
+ The files which match the |glob|_ will be passed as the actual parameters to the job
+ function. Thus, the first time you run the example code ``*.chunks`` will return an empty list because
+ no ``.chunks`` files have been created, resulting in the following:
+
+ ::
+
+ step_4_split_numbers_into_chunks ("random_numbers.list", [])
+
+ After that ``*.chunks`` will match the list of current ``.chunks`` files created by
+ the previous pipeline run.
+
+
+
+ File names in *output* are generally out of date or superfluous. They are useful
+ mainly for cleaning-up detritus from previous runs
+ (have a look at :ref:`step_4_split_numbers_into_chunks(...) <Simple_Tutorial_5th_step_code>`).
+
+ .. note ::
+
+ It is important, nevertheless, to specify correctly the list of *output* files.
+ Otherwise, dependent tasks will not know what files you have created, and it will
+ not be possible automatically to chain together the *ouput* of this pipeline task into the
+ *inputs* of the next step.
+
+ You can specify multiple |glob|_ patterns to match *all* the files which are the
+ result of the splitting task function. These can even cover different directories,
+ or groups of file names. This is a more extreme example:
+
+ ::
+
+ @split("input.file", ['a*.bits', 'b*.pieces', 'somewhere_else/c*.stuff'])
+ def split_function (input_filename, output_files):
+ "Code to split up 'input.file'"
+
+
+
+ The actual resulting files of this task function are not constrained by the file names
+ in the *output* parameter of the function. The whole point of **@split** is that number
+ of resulting output files cannot be known beforehand, after all.
+
+******************
+Example
+******************
+
+
+ Suppose random_numbers.list can be split into four pieces, this function will create
+ ``1.chunks``, ``2.chunks``, ``3.chunks``, ``4.chunks``
+
+ Subsequently, we receive a larger ``random_numbers.list`` which should be split into 10
+ pieces. If the pipeline is called again, the task function receives the following parameters:
+
+ ::
+
+ step_4_split_numbers_into_chunks("random_numbers.list",
+ ["1.chunks", # previously created files
+ "2.chunks", #
+ "3.chunks", #
+ "4.chunks" ]) #
+
+
+ This doesn't stop the function from creating the extra ``5.chunks``, ``6.chunks`` etc.
+
+ .. note::
+
+ Any tasks **@follow**\ ing and specifying
+ ``step_4_split_numbers_into_chunks(...)`` as its *inputs* parameter is going to receive
+ ``1.chunks``, ``...``, ``10.chunks`` and not merely the first four files.
+
+ In other words, dependent / down-stream tasks which obtain output files automatically
+ from the task decorated by **@split** receive the most current file list.
+ The |glob|_ patterns will be matched again to see exactly what files the task function
+ has created in reality *after* the task completes.
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/tasks_and_globs_in_inputs.txt b/doc/_build/html/_sources/tutorials/manual/tasks_and_globs_in_inputs.txt
new file mode 100644
index 0000000..b915ca0
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/tasks_and_globs_in_inputs.txt
@@ -0,0 +1,265 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.tasks_and_globs_in_inputs:
+
+#################################################################################################################
+|manual.tasks_and_globs_in_inputs.chapter_num|: Chaining pipeline `Tasks` together automatically
+#################################################################################################################
+ * :ref:`Manual overview <manual>`
+
+
+ .. index::
+ pair: tasks as recipes; Manual
+
+ In the previous chapter, we explained that **ruffus** determines the data flow through
+ your pipeline by calling your :term:`task` functions (normal python functions written
+ by you) with the right parameters at the right time, making sure that
+
+ #. only out-of-date parts of the pipeline will be re-run
+ #. multiple jobs can be run in parallel (on different processors if possible)
+ #. pipeline stages can be chained together automatically
+
+ This chapter is devoted to the last item: how the output of one stage of the pipeline
+ is piped into as the input of the next stage.
+
+.. _manual.tasks_as_input:
+
+.. index::
+ pair: tasks; as input parameters (Manual)
+ pair: tasks as input parameters; Manual
+ pair: inputs parameters; tasks
+
+
+
+==========================================================
+Tasks in the *inputs* parameters: Implicit dependencies
+==========================================================
+ **Ruffus** treats the first two parameters of each job in each task as the *inputs* and
+ *outputs* parameters respectively. If the *inputs* parameter contains strings, these
+ will be treated as the names of files required by that job.
+
+ If the *inputs* parameter contains any :term:`task`\ s, **Ruffus** will take the output
+ from these specified tasks as part of the current *inputs* parameter. In addition,
+ such tasks will be listed as prequisites, much as if you had included them in a
+ separate ``@follows`` decorator.
+
+ For example, supposed we wanted to take the output files from ``task1`` and feed
+ them automatically to ``task2``, we might write the following code
+
+ ::
+
+ task1_ouput_files = ("task1.output_a", "task1.output_b", "task1.output_c")
+
+ @follows(task1)
+ @files(task1_ouput_files, "task2.output")
+ def task2(input, output):
+ pass
+
+
+
+ This can be replaced by the much more concise syntax:
+
+ ::
+
+ @files(task1, "task2.output")
+ def task2(input, output):
+ pass
+
+
+ This means:
+ * Take the output from ``task1``, and feed it automatically into ``task2``.
+ * Also make sure that ``task2`` becomes a dependency of ``task1``.
+
+
+ In other words, ``task1`` and ``task2`` have been chained together automatically.
+ This is both a great convenience and makes the flow of data through a pipeline much clearer.
+
+
+
+.. index::
+ pair: tasks; refering to by name
+ pair: inputs parameters; refering to tasks by name
+
+.. _manual.output_from:
+
+==========================================================
+Refering to tasks by name in the *inputs* parameters
+==========================================================
+
+ :ref:`Chapter 1 <manual.follows.out_of_order>` explains that task functions can be
+ defined in any order so long as undefined tasks are referred to by their (fully qualified if
+ necessary) function name string.
+
+ You can similarly refer to tasks in the *inputs* parameter by name, as a text string.
+ Normally **Ruffus** assumes that strings are file names. To indicate that that
+ you are referring to task function names instead, you need to
+ wrap the relevant parameter or (nested) parts of the parameter with the indicator object
+ :ref:`output_from("task_name") <decorators.output_from>`. Thus,
+
+ ::
+
+ @split(["a.file", ("b.file", output_from("task1", 76, "task2"))], "*.split")
+ def task2(input, output):
+ pass
+
+
+ is equivalent to:
+
+ ::
+
+ @split(["a.file", ("b.file", (task1, 76, task2))], "*.split")
+ def task2(input, output):
+ pass
+
+.. index::
+ pair: inputs parameters; globs
+ pair: globs in input parameters; Manual
+
+.. _manual.globs_as_input:
+
+=======================================
+Globs in the *inputs* parameters
+=======================================
+
+ As a syntactic convenience, **Ruffus** also allows you to specify a
+ |glob|_ pattern (e.g. ``*.txt``) in the
+ *input* parameter, it will be expanded automatically to the actually matching
+ file names. This applies to any strings within *inputs* which contain the letters: ``*?[]``.
+
+
+
+.. index::
+ pair: tasks; combined with globs and files as input parameters (Manual)
+ pair: tasks combined with globs and files as input parameters; Manual
+ pair: globs; combined with tasks and files as input parameters (Manual)
+ pair: globs combined with tasks and files as input parameters; Manual
+
+
+.. _manual.mixing_tasks_globs_files:
+
+=========================================================
+Mixing globs, tasks and files as **inputs**
+=========================================================
+
+ **Ruffus** is very flexible in allowing you to mix
+ |glob|_ patterns, references to tasks and file names
+ in the data structures you pass as the **inputs** parameters.
+
+ Suppose, in the previous example,
+
+ * that ``task1`` produces the files
+ ::
+
+ "task1.output_a"
+ "task1.output_b"
+ "task1.output_c"
+
+ * that the following additional files are also present
+ ::
+
+ "extra.a"
+ "extra.c"
+
+ Then,
+
+ ::
+
+ @files(["1_more.file", "2_more.file", task1, "extra.*"], "task2.output")
+ def task2(input, output):
+ pass
+
+
+ would result in the combination of the specified file name, the expansion of the |glob|_,
+ and the results from the previous task:
+
+ ::
+
+ input == [
+ "1_more.file" , # specified file
+ "2_more.file" , # specified file
+ "task1.output_a", # from previous task
+ "task1.output_b", # from previous task
+ "task1.output_c", # from previous task
+ "extra.a" , # from glob expansion
+ "extra.c" , # from glob expansion
+ ]
+
+
+ In other words, |glob|_ patterns and tasks are expanded "in place" when they are part of
+ python lists, sets, or tuples.
+
+.. _manual.appending_tasks_globs_to_lists_sets_tuples:
+
+===============================================================
+Appending globs or tasks to pre-existing lists, sets or tuples
+===============================================================
+
+ Sometimes we want to the *inputs* parameter to contain be a combination of |glob|_\ s and tasks,
+ and an existing list of file names.
+
+ To elaborate on the above example, suppose we have a list of files:
+
+ ::
+
+ file_list = [ "1_more.file",
+ "2_more.file"]
+
+ Now we want the input to ``task2`` to be:
+
+ ::
+
+ file_list + task1 + "extra.*"
+
+ The closest that we can express this in python syntax is by turning task1 and the |glob|_
+ to a list first then adding them together:
+
+ ::
+
+ @files(file_list + [task1] + ["extra.*"], "task2.output")
+ def task2(input, output):
+ pass
+
+
+ The same also works with tuples:
+
+ ::
+
+ file_list = ( "1_more.file",
+ "2_more.file")
+
+ @files(file_list + (task1, "extra.*"), "task2.output")
+ def task2(input, output):
+ pass
+
+
+ and sets (using the set concatenation operator):
+
+ ::
+
+ file_list = set([ "1_more.file",
+ "2_more.file"])
+
+ @files(file_list | set([task1 + "extra.*"]), "task2.output")
+ def task2(input, output):
+ pass
+
+.. _manual.understanding_complex_inputs:
+
+===============================================================
+Understanding complex *inputs* and *outputs* parameters
+===============================================================
+
+ In all cases, **Ruffus** tries to do the right thing, and to make the simple or
+ obvious case require the simplest, least onerous syntax.
+
+ If sometimes **Ruffus** does not behave the way you expect, please write to the authors:
+ it may be a bug!
+
+ In all other cases, the best thing to do, is write your **Ruffus** specifications, and
+ check the results of :ref:`pipeline_printout <pipeline_functions.pipeline_printout>`
+ to make sure that your wishes are properly
+ reflected in the parameters sent to your pipelined tasks.
+
+ In other words, read the :ref:`next chapter <manual.tracing_pipeline_parameters>`!
+
diff --git a/doc/_build/html/_sources/tutorials/manual/tasks_as_recipes.txt b/doc/_build/html/_sources/tutorials/manual/tasks_as_recipes.txt
new file mode 100644
index 0000000..39781b3
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/tasks_as_recipes.txt
@@ -0,0 +1,192 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.tasks_as_recipes:
+
+###################################################################
+|manual.tasks_as_recipes.chapter_num|: `Tasks and Recipes`
+###################################################################
+ * :ref:`Manual overview <manual>`
+
+
+ .. index::
+ pair: tasks as recipes; Manual
+
+
+ | The python functions which do the actual work of each stage or
+ :term:`task` of a **Ruffus** pipeline are written by you.
+ | The role of **Ruffus** is to make sure these functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if desired.
+
+ **Ruffus** manages the data flowing through your pipeline by supplying the correct
+ parameters to your pipeline functions. In this way, you will get the following features
+ for free:
+
+ #. only out-of-date parts of the pipeline will be re-run
+ #. multiple jobs can be run in parallel (on different processors if possible)
+ #. pipeline stages can be chained together automatically
+
+ Much of the functionality of **ruffus** involves determining the data flow through
+ your pipeline, by governing how the output of one stage of the pipeline is supplied
+ as parameters to the functions of the next.
+
+ .. index::
+ pair: skip up-to-date; Manual
+
+.. _manual.skip_up_to_date:
+
+=======================================
+Skip jobs which are up to date
+=======================================
+
+ Very often it will necessary to re-run a computational pipeline, because part of the
+ data has changed. **Ruffus** will run only those stages of the pipeline
+ which are absolutely necessary.
+
+ By default, **Ruffus** uses file modification times to see which parts of the pipeline
+ are out of date, and which :term:`task`\s need to be run again. This is so convenient that
+ even if a pipeline is not file-based (if it, for example, uses database tables instead),
+ it may be worth while to use dummy, "sentinel" files to manage the stages of a pipeline.
+
+ (It is also possible, as we shall
+ see later, to add custom functions to determine which parts of the pipeline are out
+ of date. see :ref:`@parallel <decorators.parallel>` and
+ :ref:`@check_if_uptodate <decorators.check_if_uptodate>`.)
+
+.. index::
+ single: inputs / outputs parameters
+
+.. _manual.io_parameters:
+
+.. index::
+ pair: inputs / outputs parameters; Manual
+
+=================================
+*Inputs* and *Outputs* parameters
+=================================
+ **Ruffus** treats the first two parameters of each job in each task as the *inputs* and
+ *outputs* parameters respectively. If these parameters are strings, or are sequences
+ which contain strings, these will be treated as the names of files required by and
+ produced by that job. The presence and modification times of the *inputs* and *outputs* files
+ will be used to check if it is necessry to rerun the job.
+
+ Apart from this, **Ruffus** imposes no other restrictions on the parameters for jobs, which
+ are passed verbatim to task functions.
+
+ Most of the time, it is sensible to stick with file names (strings) in the *inputs* and
+ *outputs* parameters but **Ruffus** does not try to second-guess what sort of data you
+ will be passing through your pipelines (except that strings represent file names).
+
+ Thus, given the following over-elaborate parameters (parameter passing will be discussed in
+ more detail from :ref:`|manual.files.chapter_num| <manual.files>`):
+
+ ::
+
+ [ [[1, 3], "afile.name", ("bfile.name", 72)],
+ [[56, 3.3], set(custom_object(), "output.file")],
+ 33.3,
+ "oops"]
+
+ This will be passed `"as is"` to your task function:
+
+ ::
+
+ do_something([[1, 3], "afile.name", ("bfile.name", 72)], # input
+ [[56, 3.3], set(custom_object(), "output.file")], # output
+ 33.3, # extra parameter
+ "oops") # extra parameter
+
+
+ **Ruffus** will interprete this as:
+
+ ::
+
+ Input_parameter = [[1, 3], "afile.name", ("bfile.name", 72)]
+ Output_parameter = [[56, 3.3], set(custom_object(), "output.file")]
+ Other_parameter_1 = 33.3
+ Other_parameter_2 = "oops"
+
+ **Ruffus** disregards the *structure* of your data, only identifying the (nested) strings.
+ Thus there are 2 input files:
+
+ ::
+
+ "afile.name"
+ "bfile.name"
+
+ and 1 output file:
+
+ ::
+
+ "output.file"
+
+
+
+
+
+
+.. index::
+ pair: rules; for rerunning jobs
+
+.. _manual.skip_up_to_date.rules:
+
+=======================================
+Checking if files are up to date
+=======================================
+ The following simple rules are used by **Ruffus**.
+
+ #. The pipeline stage will be rerun if:
+
+ * If any of the *inputs* files are new (newer than the *output* files)
+ * If any of the *output* files are missing
+
+ #. In addition, it is possible to run jobs which create files from scratch.
+
+ * If no *inputs* file names are supplied, the job will only run if any *output* file is missing.
+
+ #. Finally, if no *outputs* file names are supplied, the job will always run.
+
+
+ The :ref:`example <manual.files.example>` in the next chapter shows how this works in practice.
+
+
+.. index::
+ pair: Exception; Missing input files
+
+=======================================
+Missing files
+=======================================
+
+ If the *inputs* files for a job are missing, the task function will have no way
+ to produce its *output*. In this case, a ``MissingInputFileError`` exception will be raised
+ automatically. For example,
+
+ ::
+
+ task.MissingInputFileError: No way to run job: Input file ['a.1'] does not exist
+ for Job = ["a.1" -> "a.2", "A file"]
+
+.. index::
+ single: Timestamp resolution
+
+=======================================
+Caveats: Timestamp resolution
+=======================================
+
+ | Note that modification times have precision to the nearest second under some older file systems
+ (ext2/ext3?). This may be also be true for networked file systems.
+ | **Ruffus** is very conservative, and assumes that files with *exactly* the same date stamp might have been
+ created in the wrong order, and will treat the job as out-of-date. This would result in some
+ jobs re-running unnecessarily, simple because an underlying coarse-grained file system does not
+ distinguish between successively created files with sufficiently accuracy.
+
+ To get around this, **Ruffus** makes sure that each task is punctuated by a 1 second pause
+ (via ``time.sleep()``). If this is gets in the way, and you are using a modern file system with
+ nanosecond timestamp resolution, you can turn off the delay by setting
+ ``one_second_per_job`` to ``False`` in :ref:`pipeline_run <pipeline_functions.pipeline_run>`
+
+ Later versions of **Ruffus** will allow file modification times to be saved at higher precision
+ in a log file or database to get around this.
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/tracing_pipeline_parameters.txt b/doc/_build/html/_sources/tutorials/manual/tracing_pipeline_parameters.txt
new file mode 100644
index 0000000..4890eda
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/tracing_pipeline_parameters.txt
@@ -0,0 +1,115 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.tracing_pipeline_parameters:
+
+#################################################################################################################
+|manual.tracing_pipeline_parameters.chapter_num|: Tracing pipeline parameters
+#################################################################################################################
+ * :ref:`Manual overview <manual>`
+
+
+ .. index::
+ pair: pipeline_printout; Manual
+ pair: Tracing pipeline parameters; Manual
+ pair: Debugging; Manual
+
+
+ The trickiest part of developing pipelines is understanding how your
+ data flows through the pipeline.
+
+ In **Ruffus**, your data is passed from one task function to another down
+ the pipeline by the chain of linked parameters. Sometimes, it may be difficult to
+ choose the right **Ruffus** syntax at first, or to understand which parameters in
+ what format are being passed to your function.
+
+ Whether you are learning how to use **ruffus**, or trying out a new
+ feature in **ruffus**, or just have a horrendously complicated pipeline
+ to debug (we have colleagues with >100 criss-crossing pipelined stages),
+ your best friend is :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`.
+
+ **pipeline_printout** displays the parameters which would be passed to each task function
+ for each job in your pipeline. In other words, it traces how each of the functions in
+ the pipeline are called in detail.
+
+ It makes good sense to alternate between calls to **pipeline_printout** and **pipeline_run**
+ in the development of **Ruffus** pipelines (perhaps with the use of a command-line option),
+ so that you always know exactly how the pipeline is being invoked.
+
+
+=======================================
+Printing out which jobs will be run
+=======================================
+
+ **pipeline_printout** is called in exactly the same way as **pipeline_run** but
+ instead of running the pipeline, just prints the tasks which are and are not up-to-date.
+
+ The ``verbose`` parameter controls how much detail is displayed.
+
+ ::
+
+ verbose = 0 : prints nothing
+ verbose = 1 : logs warnings and tasks which are not up-to-date and which will be run
+ verbose = 2 : logs doc strings for task functions as well
+ verbose = 3 : logs job parameters for jobs which are out-of-date
+ verbose = 4 : logs list of up-to-date tasks but parameters for out-of-date jobs
+ verbose = 5 : logs parameters for all jobs whether up-to-date or not
+ verbose = 10: logs messages useful only for debugging ruffus pipeline code
+
+
+ Let us take the two step :ref:`pipeline<Simple_Tutorial_3nd_step_code>` from
+ the tutorial. :ref:`Pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+ by default merely lists the two tasks which will be run in the pipeline:
+
+
+ .. image:: ../../images/simple_tutorial_pipeline_printout1.png
+
+ .. ::
+
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task])
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = first_task
+ Task = second_task
+ ________________________________________
+
+
+
+ To see the input and output parameters of out-of-date jobs in the pipeline, we can increase the verbosity from the default (``1``) to ``3``:
+
+ .. image:: ../../images/simple_tutorial_pipeline_printout2.png
+
+ This is very useful for checking that the input and output parameters have been specified
+ correctly.
+
+=============================================
+Determining which jobs are out-of-date or not
+=============================================
+
+ It is often useful to see which tasks are or are not up-to-date. For example, if we
+ were to run the pipeline in full, and then modify one of the intermediate files, the
+ pipeline would be partially out of date.
+
+
+ Let us start by run the pipeline in full but then modify ``job1.stage`` so that the second task is no longer up-to-date::
+
+ pipeline_run([second_task])
+
+ # modify job1.stage1
+ open("job1.stage1", "w").close()
+
+
+ At a verbosity of ``5``, even jobs which are up-to-date will be displayed.
+ We can now see that the there is only one job in ``second_task(...)`` which needs to be re-run
+ because ``job1.stage1`` has been modified after ``job1.stage2`` (highlighted in blue):
+
+ .. image:: ../../images/simple_tutorial_pipeline_printout3.png
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/transform.txt b/doc/_build/html/_sources/tutorials/manual/transform.txt
new file mode 100644
index 0000000..6aa8870
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/transform.txt
@@ -0,0 +1,194 @@
+.. include:: ../../global.inc
+.. include:: chapter_numbers.inc
+
+.. _manual.transform:
+
+#######################################################################################################################
+|manual.transform.chapter_num|: `Applying the same recipe to create many different files with` **@transform**
+#######################################################################################################################
+ .. hlist::
+
+ * :ref:`Manual overview <manual>`
+ * :ref:`@transform <decorators.transform>` syntax in detail
+
+ Sometimes you might have a list of data files which you might want to send to the
+ same pipelined function, to apply
+ the same operation. The best way to manage this would be to produce a corresponding
+ list of results files:
+
+ | Compiling c source files might *@transform* an ``a.c`` file to an ``a.o`` file.
+ | A ``grep`` operation might *@transform* a ``plays.king_lear.txt`` file to an ``plays.king_lear.counts`` file.
+
+ *Ruffus* uses the :ref:`@transform <decorators.transform>` decorator for this purpose.
+
+ When you **@transform** your data from one file type to another, you are not restricted just
+ to changing the file suffix. We shall see how, with the full power of regular
+ expressions behind you, you can sort the resulting
+ data into different directories, add indices and so on.
+
+
+
+ .. index::
+ pair: @transform; Manual
+
+
+=================
+**@transform**
+=================
+**************************************************************************************
+Worked example: calculating sums and sum of squares in parallel
+**************************************************************************************
+ This example is borrowed from :ref:`step 5 <Simple_Tutorial_5th_step>` of the simple tutorial.
+
+ .. note:: See :ref:`example code here <Simple_Tutorial_5th_step_code>`
+
+
+ Given a set of files, each with a set of random numbers, we want to calculate thier
+ sums and sum of squares. The easiest way to do this is by providing a recipe for
+ transforming a ``*.chunk`` file containing a list of numbers into a ``*.sums`` file
+ with our sums and sum of squares.
+
+ *Ruffus* magically takes care of applying the same recipe (task function) to all the different
+ data files in parallel.
+
+ .. image:: ../../images/simple_tutorial_transform.png
+
+ The :ref:`@transform <decorators.transform>` decorator tells *Ruffus* to take files from the step 4 task (i.e. ``*.chunks``),
+ and produce files having the ``.sums`` suffix instead.
+ ending.
+
+ Thus if ``step_4_split_numbers_into_chunks`` created
+ ::
+
+ "1.chunks"
+ "2.chunks"
+ "3.chunks"
+
+ This would result in the following function calls:
+
+ ::
+
+ step_5_calculate_sum_of_squares ("1.chunk", "1.sums")
+ step_5_calculate_sum_of_squares ("2.chunk", "2.sums")
+ step_5_calculate_sum_of_squares ("3.chunk", "3.sums")
+
+ # etc...
+
+================================================================================================
+Using :ref:`suffix(...) <decorators.suffix>` to change give each output file a new suffix
+================================================================================================
+
+
+ The :ref:`suffix<decorators.suffix>` specification indicates that
+
+ * only filenames with ending with the suffix term (e.g. ``.chunk``) should be considered
+ * The text matching the suffix term should be replaced with the string in the output pattern.
+
+
+ This example assumes that both the *inputs* and the *outputs* consist each of a single string but
+ **Ruffus** places no such constraints on the data flowing through your pipeline.
+
+ * If there are multiple file names (strings) contained within each *inputs* parameter,
+ then only the first will be used to generate the *output*
+ * Each string that is encountered in each *output* parameter will be used for suffix replacement.
+
+
+************************************************
+An example with more complex data structures
+************************************************
+ This will become much clearer with this example:
+
+
+ ::
+
+ inputs = [
+ ["file1.ext", 10 ], #job 1
+ [37.0, "file2.wrong_extension",
+ "file2_ignored.ext"], #job 2
+ "file3.ext" #job 3
+ ]
+
+ @transform(inputs, suffix(".ext"), [(".ext1", ), ".ext2"])
+ def pipelinetask (input_file_name, output_file_name):
+ ""
+
+
+ | Granted, it may seem rather odd that the *inputs* parameter including numbers as well
+ as file names, but **Ruffus** does not second guess how you wish to arrange your pipelines.
+ | ``inputs`` contains the parameters for three jobs.
+ | In each case, the first file name string encountered will be used to generate the *output* parameter:
+
+ .. image:: ../../images/manual_transform_complex_outputs.png
+
+ .. note::
+ The first filename in the prospective job #2 does not have the ``.ext`` suffix so this job will be eliminated.
+
+ Thus, the original code:
+
+ ::
+
+ @transform(inputs, suffix(".ext"), [(15, ".ext1"), ".ext2"])
+ def pipelinetask (input_file_name, output_file_name):
+ ""
+
+ is equivalent to calling:
+
+ ::
+
+ pipelinetask(["file1.ext", 10], [(15, 'file1.ext1'), 'file1.ext2']) # job 1
+ pipelinetask("file3.ext", [(15, 'file3.ext1'), 'file3.ext2']) # job 3
+
+ Hopefully, your code will simpler than this rather pathological case!
+
+
+================================================================================================
+Regular expressions :ref:`regex(...) <decorators.regex>` provide maximum flexibility
+================================================================================================
+
+ Exactly the same function could be written using regular expressions:
+
+ ::
+
+ @transform(inputs, regex(".ext"), [(15, ".ext1"), ".ext2"])
+ def pipelinetask (input_file_name, output_file_name):
+ ""
+
+
+ | However, regular expressions are not limited to suffix matches.
+ | We can sort our *ouputs* to different subdirectories, depending on category.
+ |
+ | Our example starts off with data file for different zoo animals.
+ | We are only interested in mammals, and we would like the files of each species to
+ | end up in its own directory after processing.
+ | Starting with these species files:
+
+ ::
+
+ "mammals.tiger.wild.animals"
+ "mammals.lion.wild.animals"
+ "mammals.lion.handreared.animals"
+ "mammals.dog.tame.animals"
+ "mammals.dog.wild.animals"
+ "reptiles.crocodile.wild.animals"
+
+ Then, the following:
+ .. image:: ../../images/manual_transform.png
+
+ will put each captured mammal in its own directory:
+ ::
+
+ >>> pipeline_run([capture_mammals])
+ Job = [mammals.dog.tame.animals -> dog/dog.tame.in_my_zoo, dog] completed
+ Job = [mammals.dog.wild.animals -> dog/dog.wild.in_my_zoo, dog] completed
+ Job = [mammals.lion.handreared.animals -> lion/lion.handreared.in_my_zoo, lion] completed
+ Job = [mammals.lion.wild.animals -> lion/lion.wild.in_my_zoo, lion] completed
+ Job = [mammals.tiger.wild.animals -> tiger/tiger.wild.in_my_zoo, tiger] completed
+ Completed Task = capture_mammals
+
+ .. note:: The code can be found :ref:`here <manual.transform_code>`
+
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/manual/transform_code.txt b/doc/_build/html/_sources/tutorials/manual/transform_code.txt
new file mode 100644
index 0000000..092b899
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/manual/transform_code.txt
@@ -0,0 +1,68 @@
+.. include:: ../../global.inc
+.. _manual.transform_code:
+
+########################################################################################
+Code for Chapter 6: Applying the same recipe to create many different files
+########################################################################################
+ * :ref:`Manual overview <manual>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * :ref:`Back <manual.transform>`
+
+
+ | Our example starts off with data file for different zoo animals.
+ | We are only interested in mammals, and we would like the files of each species to
+ | end up in its own directory after processing.
+
+
+************************************
+Code
+************************************
+ ::
+
+ #
+ # Start with species files
+ #
+ open("mammals.tiger.wild.animals" , "w")
+ open("mammals.lion.wild.animals" , "w")
+ open("mammals.lion.handreared.animals", "w")
+ open("mammals.dog.tame.animals" , "w")
+ open("mammals.dog.wild.animals" , "w")
+ open("reptiles.crocodile.wild.animals", "w")
+
+ #
+ # create destinations for each species
+ #
+ import os
+ for s in ("tiger", "lion", "dog"):
+ if not os.path.exists(s):
+ os.mkdir(s)
+
+
+ #
+ # Now summarise files in directories organised by species
+ #
+ from ruffus import *
+ @transform('*.animals',
+ regex(r'mammals\.(.+)\.(.+)\.animals'), # save species and wild/tame
+ r'\1/\1.\2.in_my_zoo', # same species go together
+ r'\1') # extra species name
+ def capture_mammals(infile, outfile, species):
+ open(outfile, "w").write(open(infile).read() + "\ncaptured %s\n" % species)
+
+ pipeline_run([capture_mammals])
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([capture_mammals])
+ Job = [mammals.dog.tame.animals -> dog/dog.tame.in_my_zoo, dog] completed
+ Job = [mammals.dog.wild.animals -> dog/dog.wild.in_my_zoo, dog] completed
+ Job = [mammals.lion.handreared.animals -> lion/lion.handreared.in_my_zoo, lion] completed
+ Job = [mammals.lion.wild.animals -> lion/lion.wild.in_my_zoo, lion] completed
+ Job = [mammals.tiger.wild.animals -> tiger/tiger.wild.in_my_zoo, tiger] completed
+ Completed Task = capture_mammals
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/active_if.txt b/doc/_build/html/_sources/tutorials/new_tutorial/active_if.txt
new file mode 100644
index 0000000..ce51051
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/active_if.txt
@@ -0,0 +1,149 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: @active_if; Tutorial
+
+.. _new_manual.active_if:
+
+##########################################################################################################################################
+|new_manual.active_if.chapter_num|: Turning parts of the pipeline on and off at runtime with :ref:`@active_if <decorators.active_if>`
+##########################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@active_if syntax in detail <decorators.active_if>`
+
+
+***************************************
+Overview
+***************************************
+
+ It is sometimes useful to be able to switch on and off parts of a pipeline. For example, a pipeline
+ might have two difference code paths depending on the type of data it is being asked to analyse.
+
+ One surprisingly easy way to do this is to use a python ``if`` statement around particular task functions:
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ from ruffus import *
+
+ run_task1 = True
+
+ @originate(['a.foo', 'b.foo'])
+ def create_files(output_file):
+ open(output_file, "w")
+
+
+ if run_task1:
+ # might not run
+ @transform(create_files, suffix(".foo"), ".bar")
+ def foobar(input_file, output_file):
+ open(output_file, "w")
+
+
+ @transform(foobar, suffix(".bar"), ".result")
+ def wrap_up(input_file, output_file):
+ open(output_file, "w")
+
+
+ pipeline_run()
+
+
+ This simple solution has a number of drawbacks:
+ #. The on/off decision is a one off event that happens when the script is loaded. Ideally, we
+ would like some flexibility, and postpone the decision until ``pipeline_run()`` is invoked.
+ #. When ``if`` is false, the entire task function becomes invisible, and if there are any
+ downstream tasks, as in the above example, *Ruffus* will complain loudly about
+ missing dependencies.
+
+
+******************************************************************************
+:ref:`@active_if <decorators.active_if>` controls the state of tasks
+******************************************************************************
+
+
+ * Switches tasks on and off at run time depending on its parameters
+ * Evaluated each time ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+ * Dormant tasks behave as if they are up to date and have no output.
+
+ The Design and initial implementation were contributed by Jacob Biesinger
+
+ The following example shows its flexibility and syntax:
+
+ .. code-block:: python
+ :emphasize-lines: 20
+
+ from ruffus import *
+ run_if_true_1 = True
+ run_if_true_2 = False
+ run_if_true_3 = True
+
+
+ #
+ # task1
+ #
+ @originate(['a.foo', 'b.foo'])
+ def create_files(outfile):
+ """
+ create_files
+ """
+ open(outfile, "w").write(outfile + "\n")
+
+ #
+ # Only runs if all three run_if_true conditions are met
+ #
+ # @active_if determines if task is active
+ @active_if(run_if_true_1, lambda: run_if_true_2)
+ @active_if(run_if_true_3)
+ @transform(create_files, suffix(".foo"), ".bar")
+ def this_task_might_be_inactive(infile, outfile):
+ open(outfile, "w").write("%s -> %s\n" % (infile, outfile))
+
+
+ # @active_if switches off task because run_if_true_2 == False
+ pipeline_run(verbose = 3)
+
+ # @active_if switches on task because all run_if_true conditions are met
+ run_if_true_2 = True
+ pipeline_run(verbose = 3)
+
+
+ The task starts off inactive:
+
+
+ .. code-block:: pycon
+ :emphasize-lines: 1
+
+ >>> # @active_if switches off task "this_task_might_be_inactive" because run_if_true_2 == False
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = create_files
+ create_files
+ Job = [None -> a.foo] Missing file [a.foo]
+ Job = [None -> b.foo] Missing file [b.foo]
+ Job = [None -> a.foo] completed
+ Job = [None -> b.foo] completed
+ Completed Task = create_files
+ Inactive Task = this_task_might_be_inactive
+
+ Now turn on the task:
+
+ .. code-block:: pycon
+ :emphasize-lines: 1
+
+ >>> # @active_if switches on task "this_task_might_be_inactive" because all run_if_true conditions are met
+ >>> run_if_true_2 = True
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = this_task_might_be_inactive
+
+ Job = [a.foo -> a.bar] Missing file [a.bar]
+ Job = [b.foo -> b.bar] Missing file [b.bar]
+ Job = [a.foo -> a.bar] completed
+ Job = [b.foo -> b.bar] completed
+ Completed Task = this_task_might_be_inactive
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/check_if_uptodate.txt b/doc/_build/html/_sources/tutorials/new_tutorial/check_if_uptodate.txt
new file mode 100644
index 0000000..2323d29
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/check_if_uptodate.txt
@@ -0,0 +1,89 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: check_if_uptodate; Tutorial
+
+.. _new_manual.check_if_uptodate:
+
+########################################################################################################################################################################################################################################################################################################
+|new_manual.check_if_uptodate.chapter_num|: Esoteric: Writing custom functions to decide which jobs are up to date with :ref:`@check_if_uptodate<decorators.check_if_uptodate>`
+########################################################################################################################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@check_if_uptodate syntax in detail<decorators.check_if_uptodate>`
+
+
+******************************************************************************
+**@check_if_uptodate** : Manual dependency checking
+******************************************************************************
+ tasks specified with most decorators such as
+ * :ref:`@split <decorators.split>`
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@merge <decorators.merge>`
+ * :ref:`@collate <decorators.collate>`
+ * :ref:`@collate <decorators.subdivide>`
+
+ have automatic dependency checking based on file modification times.
+
+ Sometimes, you might want to decide have more control over whether to run jobs, especially
+ if a task does not rely on or produce files (i.e. with :ref:`@parallel <decorators.parallel>`)
+
+ You can write your own custom function to decide whether to run a job.
+ This takes as many parameters as your task function, and needs to return a
+ tuple for whether an update is required, and why (i.e. ``tuple(bool, str)``)
+
+ This simple example which creates the file ``"a.1"`` if it does not exist:
+
+ ::
+
+ from ruffus import *
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+ pipeline_run([])
+
+
+
+ could be rewritten more laboriously as:
+
+ ::
+
+
+ from ruffus import *
+ import os
+ def check_file_exists(input_file, output_file):
+ if os.path.exists(output_file):
+ return False, "File already exists"
+ return True, "%s is missing" % output_file
+
+ @parallel([[None, "a.1"]])
+ @check_if_uptodate(check_file_exists)
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+
+ Both produce the same output:
+ ::
+
+ Task = create_if_necessary
+ Job = [null, "a.1"] completed
+
+
+
+
+.. note::
+
+ The function specified by :ref:`@check_if_uptodate <decorators.check_if_uptodate>` can be called
+ more than once for each job.
+
+ See the :ref:`description here <new_manual.dependencies>` of how *Ruffus* decides which tasks to run.
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/checkpointing.txt b/doc/_build/html/_sources/tutorials/new_tutorial/checkpointing.txt
new file mode 100644
index 0000000..f512806
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/checkpointing.txt
@@ -0,0 +1,400 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: Up to date; Tutorial
+ pair: Task completion; Tutorial
+ pair: Exceptions; Tutorial
+ pair: Interrupted Pipeline; Tutorial
+
+.. _new_manual.checkpointing:
+
+######################################################################################################
+|new_manual.checkpointing.chapter_num|: Checkpointing: Interrupted Pipelines and Exceptions
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.checkpointing.code`
+
+
+
+***************************************
+Overview
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+
+ By default, *Ruffus* uses file modification times for the **input** and **output** to determine
+ whether each stage of a pipeline is up-to-date or not. But what happens when the task
+ function is interrupted, whether from the command line or by error, half way through writing the output?
+
+ In this case, the half-formed, truncated and corrupt **Output** file will look newer than its **Input** and hence up-to-date.
+
+
+.. index::
+ pair: Tutorial; interrupting tasks
+
+.. _new_manual.interrupting_tasks:
+
+***************************************
+Interrupting tasks
+***************************************
+ Let us try with an example:
+
+ .. code-block:: python
+ :emphasize-lines: 20
+
+ from ruffus import *
+ import sys, time
+
+ # create initial files
+ @originate(['job1.start'])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # long task to interrupt
+ #
+ @transform(create_initial_files, suffix(".start"), ".output")
+ def long_task(input_files, output_file):
+ with open(output_file, "w") as ff:
+ ff.write("Unfinished...")
+ # sleep for 2 seconds here so you can interrupt me
+ sys.stderr.write("Job started. Press ^C to interrupt me now...\n")
+ time.sleep(2)
+ ff.write("\nFinished")
+ sys.stderr.write("Job completed.\n")
+
+
+ # Run
+ pipeline_run([long_task])
+
+
+ When this script runs, it pauses in the middle with this message::
+
+ Job started. Press ^C to interrupt me now...
+
+ If you interrupted the script by pressing Control-C at this point, you will see that ``job1.output`` contains only ``Unfinished...``.
+ However, if you should rerun the interrupted pipeline again, Ruffus ignores the corrupt, incomplete file:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([long_task])
+ Job started. Press ^C to interrupt me now...
+ Job completed
+
+ And if you had run ``pipeline_printout``:
+
+ .. code-block:: pycon
+ :emphasize-lines: 8
+
+ >>> pipeline_printout(sys.stdout, [long_task], verbose=3)
+ ________________________________________
+ Tasks which will be run:
+
+ Task = long_task
+ Job = [job1.start
+ -> job1.output]
+ # Job needs update: Previous incomplete run leftover: [job1.output]
+
+
+ We can see that *Ruffus* magically knows that the previous run was incomplete, and that ``job1.output`` is detritus that needs to be discarded.
+
+
+.. _new_manual.logging_completed_jobs:
+
+******************************************
+Checkpointing: only log completed jobs
+******************************************
+
+ All is revealed if you were to look in the working directory. *Ruffus* has created a file called ``.ruffus_history.sqlite``.
+ In this `SQLite <https://sqlite.org/>`_ database, *Ruffus* logs only those files which are the result of a completed job,
+ all other files are suspect.
+ This file checkpoint database is a fail-safe, not a substitute for checking file modification times. If the **Input** or **Output** files are
+ modified, the pipeline will rerun.
+
+ By default, *Ruffus* saves only file timestamps to the SQLite database but you can also add a checksum of the pipeline task function body or parameters.
+ This behaviour can be controlled by setting the ``checksum_level`` parameter
+ in ``pipeline_run()``. For example, if you do not want to save any timestamps or checksums:
+
+ .. code-block:: python
+
+ pipeline_run(checksum_level = 0)
+
+ CHECKSUM_FILE_TIMESTAMPS = 0 # only rerun when the file timestamps are out of date (classic mode)
+ CHECKSUM_HISTORY_TIMESTAMPS = 1 # Default: also rerun when the history shows a job as being out of date
+ CHECKSUM_FUNCTIONS = 2 # also rerun when function body has changed
+ CHECKSUM_FUNCTIONS_AND_PARAMS = 3 # also rerun when function parameters or function body change
+
+
+ .. note::
+
+ Checksums are calculated from the `pickled <http://docs.python.org/2/library/pickle.html>`_ string for the function code and parameters.
+ If pickling fails, Ruffus will degrade gracefully to saving just the timestamp in the SQLite database.
+
+.. _new_manual.history_files_cannot_be_shared:
+
+****************************************************************************
+Do not share the same checkpoint file across for multiple pipelines!
+****************************************************************************
+
+ The name of the Ruffus python script is not saved in the checkpoint file along side timestamps and checksums.
+ That means that you can rename your pipeline source code file without having to rerun the pipeline!
+ The tradeoff is that if multiple pipelines are run from the same directory, and save their histories to the
+ same SQlite database file, and if their file names overlap (all of these are bad ideas anyway!), this is
+ bound to be a source of confusion.
+
+ Luckily, the name and path of the checkpoint file can be also changed for each pipeline
+
+.. _new_manual.changing_history_file_name:
+
+****************************************************************************
+Setting checkpoint file names
+****************************************************************************
+
+ .. warning::
+
+ Some file systems do not appear to support SQLite at all:
+
+ There are reports that SQLite databases have `file locking problems <http://beets.radbox.org/blog/sqlite-nightmare.html>`_ on Lustre.
+
+ The best solution would be to keep the SQLite database on an alternate compatible file system away from the working directory if possible.
+
+============================================================================================================================================================
+environment variable ``DEFAULT_RUFFUS_HISTORY_FILE``
+============================================================================================================================================================
+
+ The name of the checkpoint file is the value of the environment variable ``DEFAULT_RUFFUS_HISTORY_FILE``.
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/some/where/.ruffus_history.sqlite
+
+ This gives considerable flexibility, and allows a system-wide policy to be set so that all Ruffus checkpoint files are set logically to particular paths.
+
+ .. note::
+
+ It is your responsibility to make sure that the requisite destination directories for the checkpoint files exist beforehand!
+
+
+ Where this is missing, the checkpoint file defaults to ``.ruffus_history.sqlite`` in your working directory
+
+
+============================================================================================================================================================
+Setting the checkpoint file name manually
+============================================================================================================================================================
+
+ This checkpoint file name can always be overridden as a parameter to Ruffus functions:
+
+ .. code-block:: python
+
+ pipeline_run(history_file = "XXX")
+ pipeline_printout(history_file = "XXX")
+ pipeline_printout_graph(history_file = "XXX")
+
+
+ There is also built in support in ``Ruffus.cmdline``. So if you use this module, you can simply add to your command line:
+
+ .. code-block:: bash
+
+ # use a custom checkpoint file
+ myscript --checksum_file_name .myscript.ruffus_history.sqlite
+
+ This takes precedence over everything else.
+
+
+
+****************************************************************************
+Useful checkpoint file name policies ``DEFAULT_RUFFUS_HISTORY_FILE``
+****************************************************************************
+
+ If the pipeline script is called ``test/bin/scripts/run.me.py``, then these are the resulting checkpoint files locations:
+
+============================================================================================================================================================
+Example 1: same directory, different name
+============================================================================================================================================================
+ If the environment variable is:
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=.{basename}.ruffus_history.sqlite
+
+ Then the job checkpoint database for ``run.me.py`` will be ``.run.me.ruffus_history.sqlite``
+
+ .. code-block:: bash
+
+ /test/bin/scripts/run.me.py
+ /common/path/for/job_history/scripts/.run.me.ruffus_history.sqlite
+
+============================================================================================================================================================
+Example 2: Different directory, same name
+============================================================================================================================================================
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/common/path/for/job_history/.{basename}.ruffus_history.sqlite
+
+ .. code-block:: bash
+
+ /common/path/for/job_history/.run.me.ruffus_history.sqlite
+
+
+============================================================================================================================================================
+Example 2: Different directory, same name but keep one level of subdirectory to disambiguate
+============================================================================================================================================================
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/common/path/for/job_history/{subdir[0]}/.{basename}.ruffus_history.sqlite
+
+
+ .. code-block:: bash
+
+ /common/path/for/job_history/scripts/.run.me.ruffus_history.sqlite
+
+
+
+============================================================================================================================================================
+Example 2: nested in common directory
+============================================================================================================================================================
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/common/path/for/job_history/{path}/.{basename}.ruffus_history.sqlite
+
+ .. code-block:: bash
+
+ /common/path/for/job_history/test/bin/scripts/.run.me.ruffus_history.sqlite
+
+
+
+
+.. index::
+ pair: Tutorial; Regenerating the checkpoint file
+
+.. _new_manual.regenerating_history_file:
+
+******************************************************************************
+Regenerating the checkpoint file
+******************************************************************************
+
+ Occasionally you may need to re-generate the checkpoint file.
+
+ This could be necessary:
+
+ * because you are upgrading from a previous version of Ruffus without checkpoint file support
+ * on the rare occasions when the SQLite file becomes corrupted and has to deleted
+ * if you wish to circumvent the file checking of Ruffus after making some manual changes!
+
+ To do this, it is only necessary to call ``pipeline_run`` appropriately:
+
+ .. code-block:: python
+
+ CHECKSUM_REGENERATE = 2
+ pipeline(touch_files_only = CHECKSUM_REGENERATE)
+
+
+ Similarly, if you are using ``Ruffus.cmdline``, you can call:
+
+ .. code-block:: bash
+
+ myscript --recreate_database
+
+
+ Note that this regenerates the checkpoint file to reflect the existing *Input*, *Output* files on disk.
+ In other words, the onus is on you to make sure there are no half-formed, corrupt files. On the other hand,
+ the pipeline does not need to have been previously run successfully for this to work. Essentially, Ruffus,
+ pretends to run the pipeline, while logging all the files with consistent file modication times, stopping
+ at the first tasks which appear out of date or incomplete.
+
+
+.. index::
+ pair: rules; for rerunning jobs
+
+.. _new_manual.skip_up_to_date.rules:
+
+******************************************************************************
+Rules for determining if files are up to date
+******************************************************************************
+ The following simple rules are used by *Ruffus*.
+
+ #. The pipeline stage will be rerun if:
+
+ * If any of the **Input** files are new (newer than the **Output** files)
+ * If any of the **Output** files are missing
+
+ #. In addition, it is possible to run jobs which create files from scratch.
+
+ * If no **Input** file names are supplied, the job will only run if any *output* file is missing.
+
+ #. Finally, if no **Output** file names are supplied, the job will always run.
+
+
+
+.. index::
+ pair: Exception; Missing input files
+
+******************************************************************************
+Missing files generate exceptions
+******************************************************************************
+
+ If the *inputs* files for a job are missing, the task function will have no way
+ to produce its *output*. In this case, a ``MissingInputFileError`` exception will be raised
+ automatically. For example,
+
+ ::
+
+ task.MissingInputFileError: No way to run job: Input file ['a.1'] does not exist
+ for Job = ["a.1" -> "a.2", "A file"]
+
+.. index::
+ pair: Manual; Timestamp resolution
+
+******************************************************************************
+Caveats: Coarse Timestamp resolution
+******************************************************************************
+
+ Note that modification times have precision to the nearest second under some older file systems
+ (ext2/ext3?). This may be also be true for networked file systems.
+
+ *Ruffus* supplements the file system time resolution by independently recording the timestamp at
+ full OS resolution (usually to at least the millisecond) at job completion, when presumably the **Output**
+ files will have been created.
+
+ However, *Ruffus* only does this if the discrepancy between file time and system time is less than a second
+ (due to poor file system timestamp resolution). If there are large mismatches between the two, due for example
+ to network time slippage, misconfiguration etc, *Ruffus* reverts to using the file system time and adds a one second
+ delay between jobs (via ``time.sleep()``) to make sure input and output file stamps are different.
+
+ If you know that your filesystem has coarse-grained timestamp resolution, you can always revert to this very conservative behaviour,
+ at the prices of some annoying 1s pauses, by setting :ref:`pipeline_run(one_second_per_job = True) <pipeline_functions.pipeline_run>`
+
+
+
+.. index::
+ pair: Manual; flag files
+
+******************************************************************************
+Flag files: Checkpointing for the paranoid
+******************************************************************************
+
+ One other way of checkpointing your pipelines is to create an extra "flag" file as an additional
+ **Output** file name. The flag file is only created or updated when everything else in the
+ job has completed successifully and written to disk. A missing or out of date flag file then
+ would be a sign for Ruffus that the task never completed properly in the first place.
+
+ This used to be much the best way of performing checkpointing in Ruffus and is still
+ the most bulletproof way of proceeding. For example, even the loss or corruption
+ of the checkpoint file, would not affect things greatly.
+
+ Nevertheless flag files are largely superfluous in modern *Ruffus*.
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/checkpointing_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/checkpointing_code.txt
new file mode 100644
index 0000000..27632e8
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/checkpointing_code.txt
@@ -0,0 +1,23 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.checkpointing.code:
+
+#################################################################################################################
+|new_manual.checkpointing.chapter_num|: Python Code for Checkpointing: Interrupted Pipelines and Exceptions
+#################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`Back to |new_manual.checkpointing.chapter_num|: Interrupted Pipelines and Exceptions <new_manual.checkpointing>`
+
+
+************************************************************************
+Code for .:ref:`suffix() <decorators.suffix>` example
+************************************************************************
+ .. code-block:: python
+
+ from ruffus import *
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/combinatorics.txt b/doc/_build/html/_sources/tutorials/new_tutorial/combinatorics.txt
new file mode 100644
index 0000000..3e89ca1
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/combinatorics.txt
@@ -0,0 +1,442 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: combinatorics; Tutorial
+
+.. _new_manual.combinatorics:
+
+##################################################################################################################################################################################################################################################
+|new_manual.combinatorics.chapter_num|: :ref:`@combinations<decorators.combinations>`, :ref:`@permutations<decorators.permutations>` and all versus all :ref:`@product<decorators.product>`
+##################################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@product <decorators.product>`
+ * :ref:`formatter() <decorators.formatter>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.combinatorics.code`
+
+
+**************************************
+Overview
+**************************************
+
+ A surprising number of computational problems involve some sort of all versus all calculations.
+ Previously, this would have required all the parameters to be supplied using a custom function
+ on the fly with :ref:`@files<decorators.files_on_the_fly>`.
+
+ From version 2.4, *Ruffus* supports :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`,
+ :ref:`@combinations <decorators.combinations>`, :ref:`@permutations <decorators.permutations>`,
+ :ref:`@product <decorators.product>`.
+
+ These provide as far as possible all the functionality of the four combinatorics iterators
+ from the standard python `itertools <http://docs.python.org/2/library/itertools.html>`__
+ functions of the same name.
+
+***************************************************************************
+Generating output with :ref:`formatter()<decorators.formatter>`
+***************************************************************************
+
+ String replacement always takes place via :ref:`formatter()<decorators.formatter>`. Unfortunately,
+ the other *Ruffus* workhorses of :ref:`regex()<decorators.regex>` and :ref:`suffix()<decorators.suffix>`
+ do not have sufficient syntactic flexibility.
+
+ Each combinatorics decorator deals with multiple sets of inputs whether this might be:
+
+ * a self-self comparison (such as :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`,
+ :ref:`@combinations <decorators.combinations>`, :ref:`@permutations <decorators.permutations>`) or,
+ * a self-other comparison (:ref:`@product <decorators.product>`)
+
+ The replacement strings thus require an extra level of indirection to refer to
+ parsed components.
+
+ #. The first level refers to which *set* of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+
+ For example, if the *inputs* are **[A1,A2],[B1,B2],[C1,C2] vs [P1,P2],[Q1,Q2],[R1,R2] vs [X1,X2],[Y1,Y2],[Z1,Z2]**,
+ then ``'{basename[2][0]}'`` is the `basename <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ for
+
+ * the third set of inputs (**X,Y,Z**) and
+ * the first file name string in each **Input** of that set (**X1, Y1, Z1**)
+
+
+
+.. _new_manual.product:
+
+***************************************************************************
+All vs all comparisons with :ref:`@product <decorators.product>`
+***************************************************************************
+
+ :ref:`@product <decorators.product>` generates the Cartesian **product** between sets of input files,
+ i.e. all vs all comparisons.
+
+ The effect is analogous to a nested for loop.
+
+ :ref:`@product <decorators.product>` can be useful, for example, in bioinformatics for finding
+ the corresponding genes (orthologues) for a set of proteins in multiple species.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import product
+ >>> # product('ABC', 'XYZ') --> AX AY AZ BX BY BZ CX CY CZ
+ >>> [ "".join(a) for a in product('ABC', 'XYZ')]
+ ['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']
+
+
+
+ This example Calculates the **@product** of **A,B** and **P,Q** and **X,Y** files
+
+ .. code-block:: python
+ :emphasize-lines: 4,17,19,22,25,27,28,29,30,32,34,35,36
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # Three sets of initial files
+ @originate([ 'a.start', 'b.start'])
+ def create_initial_files_ab(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ 'p.start', 'q.start'])
+ def create_initial_files_pq(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ ['x.1_start', 'x.2_start'],
+ ['y.1_start', 'y.2_start'] ])
+ def create_initial_files_xy(output_file):
+ with open(output_file, "w") as oo: pass
+
+ # @product
+ @product( create_initial_files_ab, # Input
+ formatter("(.start)$"), # match input file set # 1
+
+ create_initial_files_pq, # Input
+ formatter("(.start)$"), # match input file set # 2
+
+ create_initial_files_xy, # Input
+ formatter("(.start)$"), # match input file set # 3
+
+ "{path[0][0]}/" # Output Replacement string
+ "{basename[0][0]}_vs_" #
+ "{basename[1][0]}_vs_" #
+ "{basename[2][0]}.product", #
+
+ "{path[0][0]}", # Extra parameter: path for 1st set of files, 1st file name
+
+ ["{basename[0][0]}", # Extra parameter: basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "# basenames = ", " ".join(basenames)
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter, "\n"
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6,10,14,18,22,26,30
+
+ >>> pipeline_run(verbose=0)
+
+ # basenames = a p x
+ input_parameter = ('a.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_x.product
+
+ # basenames = a p y
+ input_parameter = ('a.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_y.product
+
+ # basenames = a q x
+ input_parameter = ('a.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_x.product
+
+ # basenames = a q y
+ input_parameter = ('a.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_y.product
+
+ # basenames = b p x
+ input_parameter = ('b.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_x.product
+
+ # basenames = b p y
+ input_parameter = ('b.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_y.product
+
+ # basenames = b q x
+ input_parameter = ('b.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_x.product
+
+ # basenames = b q y
+ input_parameter = ('b.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_y.product
+
+
+.. _new_manual.permutations:
+
+******************************************************************************************************************************************************
+Permute all k-tuple orderings of inputs without repeats using :ref:`@permutations <decorators.permutations>`
+******************************************************************************************************************************************************
+
+ Generates the **permutations** for all the elements of a set of **Input** (e.g. **A B C D**),
+ * r-length tuples of *input* elements
+ * excluding repeated elements (**A A**)
+ * and order of the tuples is significant (both **A B** and **B A**).
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import permutations
+ >>> # permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+ >>> [ "".join(a) for a in permutations("ABCD", 2)]
+ ['AB', 'AC', 'AD', 'BA', 'BC', 'BD', 'CA', 'CB', 'CD', 'DA', 'DB', 'DC']
+
+ This following example calculates the **@permutations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @permutations
+ @permutations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.permutations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ ])
+ def permutations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+
+ A - B
+ A - C
+ A - D
+ B - A
+ B - C
+ B - D
+ C - A
+ C - B
+ C - D
+ D - A
+ D - B
+ D - C
+
+.. _new_manual.combinations:
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Select unordered k-tuples within inputs excluding repeated elements using :ref:`@combinations <decorators.combinations>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ Generates the **combinations** for all the elements of a set of **Input** (e.g. **A B C D**),
+ * r-length tuples of *input* elements
+ * without repeated elements (**A A**)
+ * where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ :ref:`@combinations <decorators.combinations>` can be useful, for example, in calculating a transition probability matrix
+ for a set of states. The diagonals are meaningless "self-self" transitions which are excluded.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations
+ >>> # combinations('ABCD', 3) --> ABC ABD ACD BCD
+ >>> [ "".join(a) for a in combinations("ABCD", 3)]
+ ['ABC', 'ABD', 'ACD', 'BCD']
+
+ This example calculates the **@combinations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations
+ @combinations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 3 at a time
+ 3,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}_vs_"
+ "{basename[2][1]}.combinations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def combinations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - B - C
+ A - B - D
+ A - C - D
+ B - C - D
+
+.. _new_manual.combinations_with_replacement:
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Select unordered k-tuples within inputs *including* repeated elements with :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ Generates the **combinations_with_replacement** for all the elements of a set of **Input** (e.g. **A B C D**),
+ * r-length tuples of *input* elements
+ * including repeated elements (**A A**)
+ * where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+
+ :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>` can be useful,
+ for example, in bioinformatics for finding evolutionary relationships between genetic elements such as proteins
+ and genes. Self-self comparisons can be used a baseline for scaling similarity scores.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations_with_replacement
+ >>> # combinations_with_replacement('ABCD', 2) --> AA AB AC AD BB BC BD CC CD DD
+ >>> [ "".join(a) for a in combinations_with_replacement('ABCD', 2)]
+ ['AA', 'AB', 'AC', 'AD', 'BB', 'BC', 'BD', 'CC', 'CD', 'DD']
+
+ This example calculates the **@combinations_with_replacement** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations_with_replacement
+ @combinations_with_replacement(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.combinations_with_replacement",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2rd
+ ])
+ def combinations_with_replacement_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - A
+ A - B
+ A - C
+ A - D
+ B - B
+ B - C
+ B - D
+ C - C
+ C - D
+ D - D
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/combinatorics_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/combinatorics_code.txt
new file mode 100644
index 0000000..3113601
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/combinatorics_code.txt
@@ -0,0 +1,308 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.combinatorics.code:
+
+############################################################################################################################################################################################################
+|new_manual.combinatorics.chapter_num|: Python Code for :ref:`@combinations<decorators.combinations>`, :ref:`@permutations<decorators.permutations>` and all versus all :ref:`@product<decorators.product>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@product <decorators.product>`
+ * Back to |new_manual.combinatorics.chapter_num|: :ref:`Preparing directories for output with @combinatorics() <new_manual.combinatorics>`
+
+***************************************************************************
+Example code for :ref:`@product <decorators.product>`
+***************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 4,17,19,22,25,27,28,29,30,32,34,35,36
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # Three sets of initial files
+ @originate([ 'a.start', 'b.start'])
+ def create_initial_files_ab(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ 'p.start', 'q.start'])
+ def create_initial_files_pq(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ ['x.1_start', 'x.2_start'],
+ ['y.1_start', 'y.2_start'] ])
+ def create_initial_files_xy(output_file):
+ with open(output_file, "w") as oo: pass
+
+ # @product
+ @product( create_initial_files_ab, # Input
+ formatter("(.start)$"), # match input file set # 1
+
+ create_initial_files_pq, # Input
+ formatter("(.start)$"), # match input file set # 2
+
+ create_initial_files_xy, # Input
+ formatter("(.start)$"), # match input file set # 3
+
+ "{path[0][0]}/" # Output Replacement string
+ "{basename[0][0]}_vs_" #
+ "{basename[1][0]}_vs_" #
+ "{basename[2][0]}.product", #
+
+ "{path[0][0]}", # Extra parameter: path for 1st set of files, 1st file name
+
+ ["{basename[0][0]}", # Extra parameter: basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "# basenames = ", " ".join(basenames)
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter, "\n"
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6,10,14,18,22,26,30
+
+ >>> pipeline_run(verbose=0)
+
+ # basenames = a p x
+ input_parameter = ('a.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_x.product
+
+ # basenames = a p y
+ input_parameter = ('a.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_y.product
+
+ # basenames = a q x
+ input_parameter = ('a.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_x.product
+
+ # basenames = a q y
+ input_parameter = ('a.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_y.product
+
+ # basenames = b p x
+ input_parameter = ('b.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_x.product
+
+ # basenames = b p y
+ input_parameter = ('b.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_y.product
+
+ # basenames = b q x
+ input_parameter = ('b.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_x.product
+
+ # basenames = b q y
+ input_parameter = ('b.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_y.product
+
+******************************************************************************************************************************************************
+Example code for :ref:`@permutations <decorators.permutations>`
+******************************************************************************************************************************************************
+
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @permutations
+ @permutations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.permutations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ ])
+ def permutations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+
+ A - B
+ A - C
+ A - D
+ B - A
+ B - C
+ B - D
+ C - A
+ C - B
+ C - D
+ D - A
+ D - B
+ D - C
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Example code for :ref:`@combinations <decorators.combinations>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations
+ @combinations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 3 at a time
+ 3,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}_vs_"
+ "{basename[2][1]}.combinations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def combinations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - B - C
+ A - B - D
+ A - C - D
+ B - C - D
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Example code for :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations_with_replacement
+ @combinations_with_replacement(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.combinations_with_replacement",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2rd
+ ])
+ def combinations_with_replacement_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - A
+ A - B
+ A - C
+ A - D
+ B - B
+ B - C
+ B - D
+ C - C
+ C - D
+ D - D
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/command_line.txt b/doc/_build/html/_sources/tutorials/new_tutorial/command_line.txt
new file mode 100644
index 0000000..e2a6419
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/command_line.txt
@@ -0,0 +1,352 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: command line; Tutorial
+
+.. _new_manual.cmdline:
+
+######################################################################################################
+|new_manual.cmdline.chapter_num|: Running *Ruffus* from the command line with ruffus.cmdline
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual table of Contents <new_manual.table_of_contents>`
+
+
+We find that much of our *Ruffus* pipeline code is built on the same template and this is generally
+a good place to start developing a new pipeline.
+
+From version 2.4, *Ruffus* includes an optional ``Ruffus.cmdline`` module that provides
+support for a set of common command line arguments. This makes writing *Ruffus* pipelines much more pleasant.
+
+
+.. _new_manual.cmdline.get_argparse:
+
+.. _new_manual.cmdline.run:
+
+.. _new_manual.cmdline.setup_logging:
+
+************************************************************************************************************
+Template for `argparse <http://docs.python.org/2.7/library/argparse.html>`__
+************************************************************************************************************
+ All you need to do is copy these 6 lines
+
+
+ .. code-block:: python
+ :emphasize-lines: 5, 13
+
+ import ruffus.cmdline as cmdline
+
+ parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')
+
+ # <<<---- add your own command line options like --input_file here
+ # parser.add_argument("--input_file")
+
+ options = parser.parse_args()
+
+ # standard python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ # <<<---- pipelined functions go here
+
+ cmdline.run (options)
+
+ You are recommended to use the standard `argparse <http://docs.python.org/2.7/library/argparse.html>`__ module
+ but the deprecated `optparse <http://docs.python.org/2.7/library/optparse.html>`__ module works as well. (See :ref:`below <code_template.optparse>` for the template)
+
+
+******************************************************
+Command Line Arguments
+******************************************************
+
+ ``Ruffus.cmdline`` by default provides these predefined options:
+
+ .. code-block:: bash
+ :emphasize-lines: 5,12,15,22
+
+ -v, --verbose
+ --version
+ -L, --log_file
+
+ # tasks
+ -T, --target_tasks
+ --forced_tasks
+ -j, --jobs
+ --use_threads
+
+
+ # printout
+ -n, --just_print
+
+ # flow chart
+ --flowchart
+ --key_legend_in_graph
+ --draw_graph_horizontally
+ --flowchart_format
+
+
+ # check sum
+ --touch_files_only
+ --checksum_file_name
+ --recreate_database
+
+
+******************************************************
+1) Logging
+******************************************************
+
+ The script provides for logging both to the command line:
+
+ .. code-block:: bash
+
+ myscript -v
+ myscript --verbose
+
+ and an optional log file:
+
+ .. code-block:: bash
+
+ # keep tabs on yourself
+ myscript --log_file /var/log/secret.logbook
+
+ Logging is ignored if neither ``--verbose`` or ``--log_file`` are specified on the command line
+
+ ``Ruffus.cmdline`` automatically allows you to write to a shared log file via a proxy from multiple processes.
+ However, you do need to use ``logging_mutex`` for the log files to be synchronised properly across different jobs:
+
+ .. code-block:: python
+
+ with logging_mutex:
+
+ logger_proxy.info("Look Ma. No hands")
+
+ Logging is set up so that you can write
+
+
+=================================
+ A) Only to the log file:
+=================================
+
+ .. code-block:: python
+
+ logger.info("A message")
+
+=================================
+ B) Only to the display:
+=================================
+
+ .. code-block:: python
+
+ logger.debug("A message")
+
+
+.. _new_manual.cmdline.MESSAGE:
+
+======================================
+ C) To both simultaneously:
+======================================
+
+ .. code-block:: python
+
+ from ruffus.cmdline import MESSAGE
+
+ logger.log(MESSAGE, "A message")
+
+
+******************************************************
+2) Tracing pipeline progress
+******************************************************
+
+ This is extremely useful for understanding what is happening with your pipeline, what tasks and which
+ jobs are up-to-date etc.
+
+ See :ref:`new_manual.pipeline_printout`
+
+ To trace the pipeline, call script with the following options
+
+ .. code-block:: bash
+
+ # well-mannered, reserved
+ myscript --just_print
+ myscript -n
+
+ or
+
+ # extremely loquacious
+ myscript --just_print --verbose 5
+ myscript -n -v5
+
+ Increasing levels of verbosity (``--verbose`` to ``--verbose 5``) provide more detailed output
+
+
+
+******************************************************
+3) Printing a flowchart
+******************************************************
+
+ This is the subject of :ref:`new_manual.pipeline_printout_graph`.
+
+ Flowcharts can be specified using the following option:
+
+ .. code-block:: bash
+
+ myscript --flowchart xxxchart.svg
+
+ The extension of the flowchart file indicates what format the flowchart should take,
+ for example, ``svg``, ``jpg`` etc.
+
+ Override with ``--flowchart_format``
+
+******************************************************
+4) Running in parallel on multiple processors
+******************************************************
+
+
+ Optionally specify the number of parallel strands of execution and which is the last *target* task to run.
+ The pipeline will run starting from any out-of-date tasks which precede the *target* and proceed no further
+ beyond the *target*.
+
+ .. code-block:: bash
+
+ myscript --jobs 15 --target_tasks "final_task"
+ myscript -j 15
+
+
+
+
+******************************************************************************************************
+5) Setup checkpointing so that *Ruffus* knows which files are out of date
+******************************************************************************************************
+
+ The :ref:`checkpoint file <new_manual.checkpointing>` uses to the value set in the
+ environment (``DEFAULT_RUFFUS_HISTORY_FILE``).
+
+ If this is not set, it will default to ``.ruffus_history.sqlite`` in the current working directory.
+
+ Either can be changed on the command line:
+
+ .. code-block:: bash
+
+ myscript --checksum_file_name mychecksum.sqlite
+
+
+============================================================================================================================================================
+Recreating checkpoints
+============================================================================================================================================================
+
+ Create or update the checkpoint file so that all existing files in completed jobs appear up to date
+
+ Will stop sensibly if current state is incomplete or inconsistent
+
+ ::
+
+ myscript --recreate_database
+
+============================================================================================================================================================
+Touch files
+============================================================================================================================================================
+
+ As far as possible, create empty files with the correct timestamp to make the pipeline appear up to date.
+
+ .. code-block:: bash
+
+ myscript --touch_files_only
+
+
+******************************************************************************************************
+6) Skipping specified options
+******************************************************************************************************
+ Note that particular options can be skipped (not added to the command line), if they conflict with your own options, for example:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ # see below for how to use get_argparse
+ parser = cmdline.get_argparse( description='WHAT DOES THIS PIPELINE DO?',
+ # Exclude the following options: --log_file --key_legend_in_graph
+ ignored_args = ["log_file", "key_legend_in_graph"])
+
+
+******************************************************************************************************
+7) Specifying verbosity and abbreviating long paths
+******************************************************************************************************
+
+ The verbosity can be specified on the command line
+
+ .. code-block:: bash
+
+ myscript --verbose 5
+
+ # verbosity of 5 + 1 = 6
+ myscript --verbose 5 --verbose
+
+ # verbosity reset to 2
+ myscript --verbose 5 --verbose --verbose 2
+
+ If the printed paths are too long, and need to be abbreviated, or alternatively, if you want see the full absolute paths of your input and output parameters,
+ you can specify an extension to the verbosity. See the manual discussion of :ref:`verbose_abbreviated_path <new_manual.pipeline_printout.verbose_abbreviated_path>` for
+ more details. This is specified as ``--verbose VERBOSITY:VERBOSE_ABBREVIATED_PATH``. (No spaces!)
+
+ For example:
+
+ .. code-block:: bash
+ :emphasize-lines: 4,7
+
+ # verbosity of 4
+ myscript.py --verbose 4
+
+ # display three levels of nested directories
+ myscript.py --verbose 4:3
+
+ # restrict input and output parameters to 60 letters
+ myscript.py --verbose 4:-60
+
+
+******************************************************************************************************
+8) Displaying the version
+******************************************************************************************************
+ Note that the version for your script will default to ``"%(prog)s 1.0"`` unless specified:
+
+ .. code-block:: python
+
+ parser = cmdline.get_argparse( description='WHAT DOES THIS PIPELINE DO?',
+ version = "my_programme.py v. 2.23")
+
+
+
+
+
+
+
+.. _code_template.optparse:
+
+************************************************************************************************************
+Template for `optparse <http://docs.python.org/2.7/library/optparse.html>`__
+************************************************************************************************************
+
+ deprecated since python 2.7
+
+ .. code-block:: python
+ :emphasize-lines: 8,16
+
+ #
+ # Using optparse (new in python v 2.6)
+ #
+ from ruffus import *
+
+ parser = cmdline.get_optgparse(version="%prog 1.0", usage = "\n\n %prog [options]")
+
+ # <<<---- add your own command line options like --input_file here
+ # parser.add_option("-i", "--input_file", dest="input_file", help="Input file")
+
+ (options, remaining_args) = parser.parse_args()
+
+ # logger which can be passed to ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging ("this_program", options.log_file, options.verbose)
+
+ # <<<---- pipelined functions go here
+
+ cmdline.run (options)
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/decorators_compendium.txt b/doc/_build/html/_sources/tutorials/new_tutorial/decorators_compendium.txt
new file mode 100644
index 0000000..f1b2862
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/decorators_compendium.txt
@@ -0,0 +1,154 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: decorators_compendium; Tutorial
+
+.. _new_manual.decorators_compendium:
+
+#####################################################################################################################
+|new_manual.decorators_compendium.chapter_num|: Pipeline topologies and a compendium of *Ruffus* decorators
+#####################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`decorators <decorators>`
+
+
+***************************************
+Overview
+***************************************
+
+ Computational pipelines transform your data in stages until the final result is produced.
+
+ You can visualise your pipeline data flowing like water down a system of pipes.
+ *Ruffus* has many ways of joining up your pipes to create different topologies.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **Write down the file names of the data as it flows across your pipeline.**
+ * **Draw lines between the file names to show how they should be connected together.**
+
+
+******************************************************************************
+:ref:`@transform <decorators.transform>`
+******************************************************************************
+
+
+ So far, our data files have been flowing through our pipelines independently in lockstep.
+
+ .. image:: ../../images/bestiary_transform.png
+ :scale: 50
+
+ If we drew a graph of the data files moving through the pipeline, all of our flowcharts would look like something like this.
+
+ The :ref:`@transform <decorators.transform>` decorator connects up your data files in 1 to 1 operations, ensuring that for every **Input**, a corresponding **Output** is
+ generated, ready to got into the next pipeline stage. If we start with three sets of starting data, we would end up with three final sets of results.
+
+******************************************************************************
+A bestiary of *Ruffus* decorators
+******************************************************************************
+
+ Very often, we would like to transform our data in more complex ways, this is where other *Ruffus* decorators come in.
+
+ .. image:: ../../images/bestiary_decorators.png
+ :scale: 50
+
+******************************************************************************
+:ref:`@originate <decorators.originate>`
+******************************************************************************
+
+ * Introduced in |new_manual.transform_in_parallel.chapter_num| :ref:`More on @transform-ing data and @originate <new_manual.transform_in_parallel>`,
+ :ref:`@originate <decorators.originate>` generates **Output** files from scratch without the benefits of any **Input** files.
+
+******************************************************************************
+:ref:`@merge <decorators.merge>`
+******************************************************************************
+ * A **many to one** operator.
+ * The last decorator at the far right to the figure, :ref:`@merge <decorators.merge>` merges multiple **Input** into one **Output**.
+
+******************************************************************************
+:ref:`@split <decorators.split>`
+******************************************************************************
+ * A **one to many** operator,
+ * :ref:`@split <decorators.split>` is the evil twin of :ref:`@merge <decorators.merge>`. It takes a single set of **Input** and splits them into multiple smaller pieces.
+ * The best part of :ref:`@split <decorators.split>` is that we don't necessarily have to decide ahead of time *how many* smaller pieces it should produce. If we have encounter a larger file,
+ we might need to split it up into more fragments for greater parallelism.
+ * Since :ref:`@split <decorators.split>` is a **one to many** operator, if you pass it **many** inputs (e.g. via :ref:`@transform <decorators.transform>`, it performs an implicit :ref:`@merge <decorators.merge>` step to make one
+ set of **Input** that you can redistribute into a different number of pieces. If you are looking to split *each* **Input** into further smaller fragments, then you
+ need :ref:`@subdivide <decorators.subdivide>`
+
+******************************************************************************
+:ref:`@subdivide <decorators.subdivide>`
+******************************************************************************
+ * A **many to even more** operator.
+ * It takes each of multiple **Input**, and further subdivides them.
+ * Uses :ref:`suffix() <decorators.suffix>`, :ref:`formatter() <decorators.formatter>` or :ref:`regex() <decorators.regex>` to generate **Output** names from its **Input** files but like :ref:`@split <decorators.split>`, we don't have to decide ahead of time
+ *how many* smaller pieces each **Input** should be further divided into. For example, a large **Input** files might be subdivided into 7 pieces while the next job might,
+ however, split its **Input** into just 4 pieces.
+
+******************************************************************************
+:ref:`@collate <decorators.collate>`
+******************************************************************************
+ * A **many to fewer** operator.
+ * :ref:`@collate <decorators.collate>` is the opposite twin of ``subdivide``: it takes multiple **Output** and groups or collates them into bundles of **Output**.
+ * :ref:`@collate <decorators.collate>` uses :ref:`formatter() <decorators.formatter>` or :ref:`regex() <decorators.regex>` to generate **Output** names.
+ * All **Input** files which map to the same **Output** are grouped together into one job (one task function call) which
+ produces one **Output**.
+
+******************************************************************************
+Combinatorics
+******************************************************************************
+
+ More rarely, we need to generate a set of **Output** based on a combination or permutation or product of the **Input**.
+
+ For example, in bioinformatics, we might need to look for all instances of a set of genes in the genomes of a different number of species.
+ In other words, we need to find the :ref:`@product <decorators.product>` of XXX genes x YYY species.
+
+ *Ruffus* provides decorators modelled on the "Combinatoric generators" in the Standard Python `itertools <http://docs.python.org/2/library/itertools.html>`_ library.
+
+ To use combinatoric decorators, you need to explicitly include them from *Ruffus*:
+
+ .. code-block:: python
+
+
+ import ruffus
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ .. image:: ../../images/bestiary_combinatorics.png
+ :scale: 50
+
+******************************************************************************
+:ref:`@product <decorators.product>`
+******************************************************************************
+ * Given several sets of **Input**, it generates all versus all **Output**. For example, if there are four sets of **Input** files, :ref:`@product <decorators.product>` will generate ``WWW x XXX x YYY x ZZZ`` **Output**.
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in
+ all **Input** sets. In the above example, this allows the generation of ``WWW x XXX x YYY x ZZZ`` unique names.
+
+******************************************************************************
+:ref:`@combinations <decorators.combinations>`
+******************************************************************************
+ * Given one set of **Input**, it generates the combinations of r-length tuples among them.
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in all **Input** sets.
+ * For example, given **Input** called ``A``, ``B`` and ``C``, it will generate: ``A-B``, ``A-C``, ``B-C``
+ * The order of **Input** items is ignored so either ``A-B`` or ``B-A`` will be included, not both
+ * Self-vs-self combinations (``A-A``) are excluded.
+
+************************************************************************************************************************************************************
+:ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+************************************************************************************************************************************************************
+ * Given one set of **Input**, it generates the combinations of r-length tuples among them but includes self-vs-self conbinations.
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in all **Input** sets.
+ * For example, given **Input** called ``A``, ``B`` and ``C``, it will generate: ``A-A``, ``A-B``, ``A-C``, ``B-B``, ``B-C``, ``C-C``
+
+******************************************************************************
+:ref:`@permutations <decorators.permutations>`
+******************************************************************************
+ * Given one set of **Input**, it generates the permutations of r-length tuples among them. This excludes self-vs-self combinations but includes all orderings (``A-B`` and ``B-A``).
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in all **Input** sets.
+ * For example, given **Input** called ``A``, ``B`` and ``C``, it will generate: ``A-A``, ``A-B``, ``A-C``, ``B-A``, ``B-C``, ``C-A``, ``C-B``
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/dependencies.txt b/doc/_build/html/_sources/tutorials/new_tutorial/dependencies.txt
new file mode 100644
index 0000000..66600e4
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/dependencies.txt
@@ -0,0 +1,110 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: Checking dependencies; Tutorial
+
+.. _new_manual.dependencies:
+
+##################################################################################
+|new_manual.dependencies.chapter_num|: How dependency is checked
+##################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+
+**************************************
+Overview
+**************************************
+
+ How does *Ruffus* decide how to run your pipeline?
+
+ * In which order should pipelined functions be called?
+
+ * Which parts of the pipeline are up-to-date and do not need to be rerun?
+
+
+=============================================
+Running all out-of-date tasks and dependents
+=============================================
+
+ .. image:: ../../images/manual_dependencies_flowchart_intro.png
+ :scale: 50
+
+
+ By default, *Ruffus* will
+
+ * build a flow chart (dependency tree) of pipelined tasks (functions)
+ * start from the most ancestral tasks with the fewest dependencies (``task1`` and ``task4`` in the flowchart above).
+ * walk up the tree to find the first incomplete / out-of-date tasks (i.e. ``task3`` and ``task5``.
+ * start running from there
+
+ All down-stream (dependent) tasks will be re-run anyway, so we don't have to test
+ whether they are up-to-date or not.
+
+ .. _new_manual.dependencies.checking_multiple_times:
+
+ .. note::
+
+ This means that *Ruffus* *may* ask any task if their jobs are out of date more than once:
+
+ * once when deciding which parts of the pipeline have to be run
+ * once just before executing the task.
+
+ *Ruffus* tries to be clever / efficient, and does the minimal amount of querying.
+
+
+.. _new_manual.dependencies.forced_reruns:
+
+=======================================
+Forced Reruns
+=======================================
+ Even if a pipeline stage appears to be up to date,
+ you can always force the pipeline to include from one or more task functions.
+
+ This is particularly useful, for example, if the pipeline data hasn't changed but
+ the analysis or computional code has.
+
+ ::
+
+ pipeline_run(forcedtorun_tasks = [up_to_date_task1])
+
+
+ will run all tasks from ``up_to_date_task1`` to ``final_task``
+
+
+ Both the "target" and the "forced" lists can include as many tasks as you wish. All dependencies
+ are still carried out and out-of-date jobs rerun.
+
+.. _new_manual.dependencies.minimal_reruns:
+
+=======================================
+Esoteric option: Minimal Reruns
+=======================================
+
+ In the above example, if we were to delete the results of ``up_to_date_task1``, *Ruffus*
+ would rerun ``up_to_date_task1``, ``up_to_date_task2`` and ``task3``.
+
+ However, you might argue that so long as ``up_to_date_task2`` is up-to-date, and it
+ is the only necessary prerequisite for ``task3``, we should not be concerned about
+ ``up_to_date_task1``.
+
+ This is enabled with:
+
+ .. code-block:: python
+
+ pipeline_run([task6], gnu_make_maximal_rebuild_mode = False)
+
+ This option walks down the dependency tree and proceeds no further when it encounters
+ an up-to-date task (``up_to_date_task2``) whatever the state of what lies beyond it.
+
+ This rather dangerous option is useful if you don't want to keep all the intermediate
+ files/results from upstream tasks. The pipeline will only not involve any incomplete
+ tasks which precede an up-to-date result.
+
+ This is seldom what you intend, and you should always check that the appropriate stages
+ of the pipeline are executed in the flowchart output.
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/deprecated_files.txt b/doc/_build/html/_sources/tutorials/new_tutorial/deprecated_files.txt
new file mode 100644
index 0000000..740d3b8
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/deprecated_files.txt
@@ -0,0 +1,238 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: deprecated @files; Tutorial
+
+.. _new_manual.deprecated_files:
+
+#####################################################################################################################
+|new_manual.deprecated_files.chapter_num|: **@files**: Deprecated syntax
+#####################################################################################################################
+
+.. warning ::
+
+ -
+
+ **This is deprecated syntax**
+
+ **which is no longer supported and**
+
+ **should NOT be used in new code.**
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`decorators <decorators>`
+ * :ref:`@files <decorators.files>` syntax in detail
+
+
+***************************************
+Overview
+***************************************
+
+
+ | The python functions which do the actual work of each stage or
+ :term:`task` of a *Ruffus* pipeline are written by you.
+ | The role of *Ruffus* is to make sure these functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if desired.
+
+ The easiest way to specify parameters to *Ruffus* :term:`task` functions is to use
+ the :ref:`@files <decorators.files>` decorator.
+
+ .. index::
+ pair: @files; Manual
+
+
+***************************************
+**@files**
+***************************************
+
+ Running this code:
+
+ ::
+
+ from ruffus import *
+
+ @files('a.1', ['a.2', 'b.2'], 'A file')
+ def single_job_io_task(infile, outfiles, text):
+ for o in outfiles: open(o, "w")
+
+ # prepare input file
+ open('a.1', "w")
+
+ pipeline_run()
+
+
+ Is equivalent to calling:
+ ::
+
+ single_job_io_task('a.1', ['a.2', 'b.2'], 'A file')
+
+
+ And produces:
+ ::
+
+ >>> pipeline_run()
+ Job = [a.1 -> [a.2, b.2], A file] completed
+ Completed Task = single_job_io_task
+
+ *Ruffus* will automatically check if your task is up to date. The second time :ref:`pipeline_run() <pipeline_functions.pipeline_run>`
+ is called, nothing will happen. But if you update ``a.1``, the task will rerun:
+
+ ::
+
+ >>> open('a.1', "w")
+ >>> pipeline_run()
+ Job = [a.1 -> [a.2, b.2], A file] completed
+ Completed Task = single_job_io_task
+
+ See :ref:`chapter 2 <new_manual.skip_up_to_date.rules>` for a more in-depth discussion of how *Ruffus*
+ decides which parts of the pipeline are complete and up-to-date.
+
+
+.. index::
+ pair: @files; in parallel
+
+.. _new_manual.files.parallel:
+
+******************************************************************************
+Running the same code on different parameters in parallel
+******************************************************************************
+
+ Your pipeline may require the same function to be called multiple times on independent parameters.
+ In which case, you can supply all the parameters to @files, each will be sent to separate jobs that
+ may run in parallel if necessary. *Ruffus* will check if each separate :term:`job` is up-to-date using
+ the *inputs* and *outputs* (first two) parameters (See the :ref:`new_manual.only_rerun_out_of_date` ).
+
+
+ For example, if a sequence
+ (e.g. a list or tuple) of 5 parameters are passed to **@files**, that indicates
+ there will also be 5 separate jobs:
+
+ ::
+
+ from ruffus import *
+ parameters = [
+ [ 'job1.file' ], # 1st job
+ [ 'job2.file', 4 ], # 2st job
+ [ 'job3.file', [3, 2] ], # 3st job
+ [ 67, [13, 'job4.file'] ], # 4st job
+ [ 'job5.file' ], # 5st job
+ ]
+ @files(parameters)
+ def task_file(*params):
+ ""
+
+ | *Ruffus* creates as many jobs as there are elements in ``parameters``.
+ | In turn, each of these elements consist of series of parameters which will be
+ passed to each separate job.
+
+ Thus the above code is equivalent to calling:
+
+ ::
+
+ task_file('job1.file')
+ task_file('job2.file', 4)
+ task_file('job3.file', [3, 2])
+ task_file(67, [13, 'job4.file'])
+ task_file('job5.file')
+
+
+ What ``task_file()`` does with these parameters is up to you!
+
+ The only constraint on the parameters is that *Ruffus* will treat any first
+ parameter of each job as the *inputs* and any second as the *output*. Any
+ strings in the *inputs* or *output* parameters (including those nested in sequences)
+ will be treated as file names.
+
+ Thus, to pick the parameters out of one of the above jobs:
+
+ ::
+
+ task_file(67, [13, 'job4.file'])
+
+ | *inputs* == ``67``
+ | *outputs* == ``[13, 'job4.file']``
+ |
+ | The solitary output filename is ``job4.file``
+
+
+.. index::
+ pair: @files; check if up to date
+
+.. _new_manual.files.is_uptodate:
+.. _new_manual.files.example:
+
+=======================================
+Checking if jobs are up to date
+=======================================
+
+ | Usually we do not want to run all the stages in a pipeline but only where
+ the input data has changed or is no longer up to date.
+ | One easy way to do this is to check the modification times for files produced
+ at each stage of the pipeline.
+
+ | Let us first create our starting files ``a.1`` and ``b.1``
+ | We can then run the following pipeline function to create
+
+ * ``a.2`` from ``a.1`` and
+ * ``b.2`` from ``b.1``
+
+ ::
+
+ # create starting files
+ open("a.1", "w")
+ open("b.1", "w")
+
+
+ from ruffus import *
+ parameters = [
+ [ 'a.1', 'a.2', 'A file'], # 1st job
+ [ 'b.1', 'b.2', 'B file'], # 2nd job
+ ]
+
+ @files(parameters)
+ def parallel_io_task(infile, outfile, text):
+ # copy infile contents to outfile
+ infile_text = open(infile).read()
+ f = open(outfile, "w").write(infile_text + "\n" + text)
+
+ pipeline_run()
+
+
+ .. ???
+
+ This produces the following output:
+ ::
+
+ >>> pipeline_run()
+ Job = [a.1 -> a.2, A file] completed
+ Job = [b.1 -> b.2, B file] completed
+ Completed Task = parallel_io_task
+
+
+ | If you called :ref:`pipeline_run() <pipeline_functions.pipeline_run>` again, nothing would happen because the files are up to date:
+ | ``a.2`` is more recent than ``a.1`` and
+ | ``b.2`` is more recent than ``b.1``
+
+ However, if you subsequently modified ``a.1`` again:
+ ::
+
+ open("a.1", "w")
+ pipeline_run(verbose = 1)
+
+ you would see the following::
+
+ >>> pipeline_run([parallel_io_task])
+ Task = parallel_io_task
+ Job = ["a.1" -> "a.2", "A file"] completed
+ Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date
+ Completed Task = parallel_io_task
+
+ The 2nd job is up to date and will be skipped.
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/deprecated_files_re.txt b/doc/_build/html/_sources/tutorials/new_tutorial/deprecated_files_re.txt
new file mode 100644
index 0000000..93782c5
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/deprecated_files_re.txt
@@ -0,0 +1,145 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: deprecated @files_re; Tutorial
+
+.. _new_manual.deprecated_files_re:
+
+#####################################################################################################################
+|new_manual.deprecated_files_re.chapter_num|: **@files_re**: Deprecated `syntax using regular expressions`
+#####################################################################################################################
+
+.. warning ::
+
+ -
+
+ **This is deprecated syntax**
+
+ **which is no longer supported and**
+
+ **should NOT be used in new code.**
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`decorators <decorators>`
+ * :ref:`@files_re <decorators.files_re>` syntax in detail
+
+
+***************************************
+Overview
+***************************************
+
+
+
+ **@files_re** combines the functionality of @transform, @collate and @merge in
+ one overloaded decorator.
+
+ This is the reason why its use is discouraged. **@files_re** syntax is far too overloaded
+ and context-dependent to support its myriad of different functions.
+
+ The following documentation is provided to help maintain historical *Ruffus* usage.
+
+=======================================
+Transforming input and output filenames
+=======================================
+
+
+ For example, the following code takes files from
+ the previous pipeline task, and makes new output parameters with the ``.sums`` suffix
+ in place of the ``.chunks`` suffix:
+
+ ::
+
+ @transform(step_4_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ #
+ # calculate sums and sums of squares for all values in the input_file_name
+ # writing to output_file_name
+ ""
+
+ This can be written using @files_re equivalently:
+
+ ::
+
+ @files_re(step_4_split_numbers_into_chunks, r".chunks", r".sums")
+ def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ ""
+
+.. _new_manual.files_re.combine:
+.. index::
+ pair: combine; Manual
+
+=====================================================
+Collating many *inputs* into a single *output*
+=====================================================
+
+ Similarly, the following code collects **inputs**
+ from the same species in the same directory:
+
+ ::
+
+ @collate('*.animals', # inputs = all *.animal files
+ regex(r'mammals.([^.]+)'), # regular expression
+ r'\1/animals.in_my_zoo', # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
+ This can be written using @files_re equivalently using the :ref:`combine<decorators.combine>` indicator:
+
+ ::
+
+ @files_re('*.animals', # inputs = all *.animal files
+ r'mammals.([^.]+)', # regular expression
+ combine(r'\1/animals.in_my_zoo'), # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
+
+
+==============================================================================
+Generating *input* and *output* parameter using regular expresssions
+==============================================================================
+
+ The following code generates additional
+ *input* prerequisite file names which match the original *input* files.
+
+ We want each job of our ``analyse()`` function to get corresponding pairs
+ of ``xx.chunks`` and ``xx.red_indian`` files when
+
+ ``*.chunks`` are generated by the task function ``split_up_problem()`` and
+ ``*.red_indian`` are generated by the task function ``make_red_indians()``:
+
+ ::
+
+ @follows(make_red_indians)
+ @transform(split_up_problem, # starting set of *inputs*
+ regex(r"(.*).chunks"), # regular expression
+ inputs([r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results" # xx.results
+ )
+ def analyse(input_filenames, output_file_name):
+ "Do analysis here"
+
+
+ The equivalent code using @files_re looks very similar:
+
+ ::
+
+ @follows(make_red_indians)
+ @files_re( split_up_problem, # starting set of *inputs*
+ r"(.*).chunks", # regular expression
+ [r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results") # xx.results
+ def analyse(input_filenames, output_file_name):
+ "Do analysis here"
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/exceptions.txt b/doc/_build/html/_sources/tutorials/new_tutorial/exceptions.txt
new file mode 100644
index 0000000..8a3ed92
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/exceptions.txt
@@ -0,0 +1,191 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: exceptions; Tutorial
+
+.. _new_manual.exceptions:
+
+###################################################################################################
+|new_manual.exceptions.chapter_num|: Exceptions thrown inside pipelines
+###################################################################################################
+
+**************************************
+Overview
+**************************************
+
+
+ The goal for *Ruffus* is that exceptions should just work *out-of-the-box* without any fuss.
+ This is especially important for exceptions that come from your code which may be raised
+ in a different process. Often multiple parallel operations (jobs or tasks) fail at the
+ same time. *Ruffus* will forward each of these exceptions with the tracebacks so you
+ can jump straight to the offending line.
+
+ This example shows separate exceptions from two jobs running in parallel:
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate(["a.start", "b.start", "c.start", "d.start", "e.start"])
+ def throw_exceptions_here(output_file):
+ raise Exception("OOPS")
+
+ pipeline_run(multiprocess = 2)
+
+ .. code-block:: pycon
+ :emphasize-lines: 5, 21
+
+ >>> pipeline_run(multiprocess = 2)
+
+ ruffus.ruffus_exceptions.RethrownJobError:
+
+ Original exceptions:
+
+ Exception #1
+ 'exceptions.Exception(OOPS)' raised in ...
+ Task = def throw_exceptions_here(...):
+ Job = [None -> b.start]
+
+ Traceback (most recent call last):
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 685, in run_pooled_job_without_exceptions
+ return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 549, in job_wrapper_output_files
+ job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 504, in job_wrapper_io_files
+ ret_val = user_defined_work_func(*(param[1:]))
+ File "<stdin>", line 3, in throw_exceptions_here
+ Exception: OOPS
+
+
+ Exception #2
+ 'exceptions.Exception(OOPS)' raised in ...
+ Task = def throw_exceptions_here(...):
+ Job = [None -> a.start]
+
+ Traceback (most recent call last):
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 685, in run_pooled_job_without_exceptions
+ return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 549, in job_wrapper_output_files
+ job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 504, in job_wrapper_io_files
+ ret_val = user_defined_work_func(*(param[1:]))
+ File "<stdin>", line 3, in throw_exceptions_here
+ Exception: OOPS
+
+
+ .. image:: ../../images/manual_exceptions.png
+
+
+.. _new_manual.exceptions.multiple_errors:
+
+.. index:: signalling, interrupts, break, errors, exceptions, multiple errors
+
+****************************************************************
+Pipelines running in parallel accumulate Exceptions
+****************************************************************
+
+ As show above, by default *Ruffus* accumulates ``NN`` exceptions before interrupting the pipeline prematurely where
+ ``NN`` is the specified parallelism for :ref:`pipeline_run(multiprocess = NN) <pipeline_functions.pipeline_run>`
+
+ This seems a fair tradeoff between being able to gather detailed error information for
+ running jobs, and not wasting too much time for a task that is going to fail anyway.
+
+
+****************************************************************
+Terminate pipeline immediately upon Exceptions
+****************************************************************
+
+
+==============================================================================================================================
+Set :ref:`pipeline_run(exceptions_terminate_immediately = True) <pipeline_functions.pipeline_run>`
+==============================================================================================================================
+
+ To have all exceptions interrupt the pipeline immediately, invoke:
+
+ .. code-block:: python
+
+ pipeline_run(exceptions_terminate_immediately = True)
+
+
+ For example, with this change, only a single exception will be thrown before the pipeline is interrupted:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate(["a.start", "b.start", "c.start", "d.start", "e.start"])
+ def throw_exceptions_here(output_file):
+ raise Exception("OOPS")
+
+ pipeline_run(multiprocess = 2, exceptions_terminate_immediately = True)
+
+ .. code-block:: pycon
+ :emphasize-lines: 5, 21
+
+ >>> pipeline_run(multiprocess = 2)
+
+ ruffus.ruffus_exceptions.RethrownJobError:
+
+ Original exception:
+
+ Exception #1
+ 'exceptions.Exception(OOPS)' raised in ...
+ Task = def throw_exceptions_here(...):
+ Job = [None -> a.start]
+
+ Traceback (most recent call last):
+ [Tedious traceback snipped out!!!....]
+ Exception: OOPS
+
+
+==============================================================================================================================
+raise ``Ruffus.JobSignalledBreak``
+==============================================================================================================================
+
+ The same can be accomplished on a finer scale by throwing the ``Ruffus.JobSignalledBreak`` Exception. Unlike
+ other exceptions, this causes an immediate halt in pipeline execution. If there are other exceptions in play at that
+ point, they will be rethrown in the main process but no new exceptions will be added.
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate(["a.start", "b.start", "c.start", "d.start", "e.start"])
+ def throw_exceptions_here(output_file):
+ raise JobSignalledBreak("OOPS")
+
+ pipeline_run(multiprocess = 2)
+
+
+****************************************************************
+Display exceptions as they occur
+****************************************************************
+
+ In the following example, the jobs throw exceptions
+ at two second staggered intervals into the job. With ``log_exceptions = True``, the
+ exceptions are displayed as they occur even though the pipeline continues running.
+
+ logger.error(...) will be invoked with the string representation of the each exception, and associated stack trace.
+
+ The default logger prints to sys.stderr, but as usual can be changed to any class from the logging module or compatible object via
+ :ref:`pipeline_run(logger = XXX) <pipeline_functions.pipeline_run>`
+
+
+ .. code-block:: python
+
+ from ruffus import *
+ import time, os
+
+ @originate(["1.start", "2.start", "3.start", "4.start", "5.start"])
+ def throw_exceptions_here(output_file):
+ delay = int(os.path.splitext(output_file)[0])
+ time.sleep(delay * 2)
+ raise JobSignalledBreak("OOPS")
+
+ pipeline_run(log_exceptions = True, multiprocess = 5)
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/flowchart_colours.txt b/doc/_build/html/_sources/tutorials/new_tutorial/flowchart_colours.txt
new file mode 100644
index 0000000..e03f171
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/flowchart_colours.txt
@@ -0,0 +1,61 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: flowchart colours; Tutorial
+
+.. _new_manual.flowchart_colours:
+
+##########################################################################################################################################################################################################################################
+|new_manual.flowchart_colours.chapter_num|: Flow Chart Colours with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+##########################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+ * :download:`Download code <../../static_data/example_scripts/play_with_colours.py>`
+ * :ref:`Code <new_manual.flowchart_colours.code>` for experimenting with colours
+
+******************
+Flowchart colours
+******************
+
+The appearance of *Ruffus* flowcharts produced by :ref:`pipeline_printout_graph <pipeline_functions.pipeline_printout_graph>`
+can be extensively customised.
+
+This is mainly controlled by the :ref:`user_colour_scheme <pipeline_functions.pipeline_printout_graph.user_colour_scheme>` (note UK spelling of "colour") parameter
+
+Example:
+
+ Use colour scheme index = 1
+ ::
+
+ pipeline_printout_graph ("flowchart.svg", "svg", [final_task],
+ user_colour_scheme = {
+ "colour_scheme_index" :1,
+ "Pipeline" :{"fontcolor" : '"#FF3232"' },
+ "Key" :{"fontcolor" : "Red",
+ "fillcolor" : '"#F6F4F4"' },
+ "Task to run" :{"linecolor" : '"#0044A0"' },
+ "Final target" :{"fillcolor" : '"#EFA03B"',
+ "fontcolor" : "black",
+ "dashed" : 0 }
+ })
+
+
+There are 8 colour schemes by setting ``"colour_scheme_index"``:
+ ::
+
+ pipeline_printout_graph ("flowchart.svg", "svg", [final_task],
+ user_colour_scheme = {"colour_scheme_index" :6})
+
+
+These colours were chosen after much fierce arguments between the authors and friends, and much
+inspiration from http://kuler.adobe.com/#create/fromacolor. Please
+feel free to submit any additional sets of colours for our consideration.
+
+
+(Click here for image in :download:`svg <../../images/flowchart_colour_schemes.svg>`.)
+
+.. image:: ../../images/flowchart_colour_schemes.png
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/flowchart_colours_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/flowchart_colours_code.txt
new file mode 100644
index 0000000..13bb236
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/flowchart_colours_code.txt
@@ -0,0 +1,288 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: flowchart colours; Tutorial
+
+.. _new_manual.flowchart_colours.code:
+
+##########################################################################################################################################################################################################################################
+|new_manual.flowchart_colours.chapter_num|: Python code for Flow Chart Colours with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+##########################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+ * :download:`Download code <../../static_data/example_scripts/play_with_colours.py>`
+ * Back to :ref:`Flowchart colours <new_manual.flowchart_colours>`
+
+ This example shows how flowchart colours can be customised.
+
+
+************************************
+Code
+************************************
+ ::
+
+ #!/usr/bin/env python
+ """
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+ """
+
+ ################################################################################
+ #
+ # play_with_colours.py
+ #
+ #
+ # Copyright (c) 7/13/2010 Leo Goodstadt
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
+ # of this software and associated documentation files (the "Software"), to deal
+ # in the Software without restriction, including without limitation the rights
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ # copies of the Software, and to permit persons to whom the Software is
+ # furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice shall be included in
+ # all copies or substantial portions of the Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ # THE SOFTWARE.
+ #################################################################################
+
+ import sys, os
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # options
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%play_with_colours 1.0",
+ usage = "\n\n play_with_colours "
+ "--flowchart FILE [options] "
+ "[--colour_scheme_index INT ] "
+ "[--key_legend_in_graph]")
+
+ #
+ # pipeline
+ #
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+ parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+
+ (options, remaining_args) = parser.parse_args()
+ if not options.flowchart:
+ raise Exception("Missing mandatory parameter: --flowchart.\n")
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ from ruffus import *
+ from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Pipeline
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+ #
+ # up to date tasks
+ #
+ @check_if_uptodate (lambda : (False, ""))
+ def Up_to_date_task1(infile, outfile):
+ pass
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task1)
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task2)
+ def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task3)
+ def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+ #
+ # Explicitly specified
+ #
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task1)
+ def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+ #
+ # Tasks to run
+ #
+ @follows(Explicitly_specified_task)
+ def Task_to_run1(infile, outfile):
+ pass
+
+
+ @follows(Task_to_run1)
+ def Task_to_run2(infile, outfile):
+ pass
+
+ @follows(Task_to_run2)
+ def Task_to_run3(infile, outfile):
+ pass
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Task_to_run2)
+ def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+ #
+ # Final target
+ #
+ @follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+ def Final_target(infile, outfile):
+ pass
+
+ #
+ # Ignored downstream
+ #
+ @follows(Final_target)
+ def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ @follows(Final_target)
+ def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Main logic
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ from collections import defaultdict
+ custom_flow_chart_colour_scheme = defaultdict(dict)
+
+ #
+ # Base chart on this overall colour scheme index
+ #
+ custom_flow_chart_colour_scheme["colour_scheme_index"] = options.colour_scheme_index
+
+ #
+ # Overriding colours
+ #
+ if options.colour_scheme_index == None:
+ custom_flow_chart_colour_scheme["Vicious cycle"]["linecolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Pipeline"]["fontcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Key"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ custom_flow_chart_colour_scheme["Task to run"]["linecolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ custom_flow_chart_colour_scheme["Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Final target"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fillcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["color"] = "white"
+ custom_flow_chart_colour_scheme["Vicious cycle"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fillcolor"] = '"#B8CC6E"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Down stream"]["fillcolor"] = "white"
+ custom_flow_chart_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["color"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Task to run"]["fillcolor"] = '"#EBF3FF"'
+ custom_flow_chart_colour_scheme["Task to run"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+ if __name__ == '__main__':
+ pipeline_printout_graph (
+
+ open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+
+ # final targets
+ [Final_target, Up_to_date_final_target],
+
+ # Explicitly specified tasks
+ [Explicitly_specified_task],
+
+ # Do we want key legend
+ no_key_legend = not options.key_legend_in_graph,
+
+ # Print all the task types whether used or not
+ minimal_key_legend = False,
+
+ user_colour_scheme = custom_flow_chart_colour_scheme,
+ pipeline_name = "Colour schemes")
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/inputs.txt b/doc/_build/html/_sources/tutorials/new_tutorial/inputs.txt
new file mode 100644
index 0000000..823f187
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/inputs.txt
@@ -0,0 +1,239 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: inputs; Tutorial
+ pair: add_inputs; Tutorial
+ pair: string substiution for inputs; Tutorial
+
+.. _new_manual.inputs:
+
+###########################################################################################################################################################################################################################################################################################
+|new_manual.inputs.chapter_num|: Manipulating task inputs via string substitution using :ref:`inputs() <decorators.inputs>` and :ref:`add_inputs() <decorators.add_inputs>`
+###########################################################################################################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`inputs() <decorators.inputs>` syntax
+ * :ref:`add_inputs() <decorators.add_inputs>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.inputs.code`
+
+***********************
+Overview
+***********************
+
+ The previous chapters have been described how *Ruffus* allows the **Output** names for each job
+ to be generated from the *Input* names via string substitution. This is how *Ruffus* can
+ automatically chain multiple tasks in a pipeline together seamlessly.
+
+ Sometimes it is useful to be able to modify the **Input** by string substitution
+ as well. There are two situations where this additional flexibility is needed:
+
+ #. You need to add additional prequisites or filenames to the **Input** of every single job
+ #. You need to add additional **Input** file names which are some variant of the existing ones.
+
+ Both will be much more obvious with some examples
+
+
+*******************************************************************************************************************
+Adding additional *input* prerequisites per job with :ref:`add_inputs() <decorators.add_inputs>`
+*******************************************************************************************************************
+
+
+===================================================================
+1. Example: compiling c++ code
+===================================================================
+
+ Let us first compile some c++ (``"*.cpp"``) files using plain :ref:`@transform <decorators.transform>` syntax:
+
+ .. code-block:: python
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ from ruffus import *
+
+ @transform(source_files, suffix(".cpp"), ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+
+======================================================================================================================================
+2. Example: Adding a common header file with :ref:`add_inputs() <decorators.add_inputs>`
+======================================================================================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ # make header files
+ @transform(source_files, suffix(".cpp"), ".h")
+ def create_matching_headers(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h",
+ # add result of task create_matching_headers to the input of every job
+ create_matching_headers),
+ ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ >>> pipeline_run()
+ Job = [hasty.cpp -> hasty.h] completed
+ Job = [messy.cpp -> messy.h] completed
+ Job = [tasty.cpp -> tasty.h] completed
+ Completed Task = create_matching_headers
+ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+=====================================================================
+3. Example: Additional *Input* can be tasks
+=====================================================================
+
+ We can also add a task name to :ref:`add_inputs() <decorators.add_inputs>`.
+ This chains the **Output**, i.e. run time results, of any previous task as
+ an additional **Input** to every single job in the task.
+
+ .. code-block:: python
+ :emphasize-lines: 1,7,9
+
+ # make header files
+ @transform(source_files, suffix(".cpp"), ".h")
+ def create_matching_headers(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h",
+ # add result of task create_matching_headers to the input of every job
+ create_matching_headers),
+ ".o")
+ def compile(input_filenames, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+================================================================================================================================================================================================================================================
+4. Example: Add corresponding files using :ref:`add_inputs() <decorators.add_inputs>` with :ref:`formatter <decorators.formatter>` or :ref:`regex <decorators.regex>`
+================================================================================================================================================================================================================================================
+ The previous example created headers corresponding to our source files and added them
+ as the **Input** to the compilation. That is generally not what you want. Instead,
+ what is generally need is a way to
+
+ 1) Look up the exact corresponding header for the *specific* job, and not add all
+ possible files to all jobs in a task. When compiling ``hasty.cpp``, we just need
+ to add ``hasty.h`` (and ``universal.h``).
+ 2) Add a pre-existing file name (``hasty.h`` already exists. Don't create it via
+ another task.)
+
+ This is a surprisingly common requirement: In bioinformatics sometimes DNA or RNA
+ sequence files come singly in `*.fastq <http://en.wikipedia.org/wiki/FASTQ_format>`__
+ and sometimes in `matching pairs <http://en.wikipedia.org/wiki/DNA_sequencing_theory#Pairwise_end-sequencing>`__:
+ ``*1.fastq, *2.fastq`` etc. In the latter case, we often need to make sure that both
+ sequence files are being processed in tandem. One way is to take one file name (``*1.fastq``)
+ and look up the other.
+
+ :ref:`add_inputs() <decorators.add_inputs>` uses standard *Ruffus* string substitution
+ via :ref:`formatter <decorators.formatter>` and :ref:`regex <decorators.regex>` to lookup (generate) **Input** file names.
+ (As a rule :ref:`suffix <decorators.suffix>` only substitutes **Output** file names.)
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ @transform( source_files,
+ formatter(".cpp$"),
+ # corresponding header for each source file
+ add_inputs("{basename[0]}.h",
+ # add header to the input of every job
+ "universal.h"),
+ "{basename[0]}.o")
+ def compile(input_filenames, output_file):
+ open(output_file, "w")
+
+ This script gives the following output
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, hasty.h, universal.h] -> hasty.o] completed
+ Job = [[messy.cpp, messy.h, universal.h] -> messy.o] completed
+ Job = [[tasty.cpp, tasty.h, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+********************************************************************************
+Replacing all input parameters with :ref:`inputs() <decorators.inputs>`
+********************************************************************************
+
+ The previous examples all *added* to the set of **Input** file names.
+ Sometimes it is necessary to replace all the **Input** parameters altogether.
+
+================================================================================================================================================================================================================================================
+5. Example: Running matching python scripts using :ref:`inputs() <decorators.inputs>`
+================================================================================================================================================================================================================================================
+
+ Here is a contrived example: we wish to find all cython/python files which have been
+ compiled into corresponding c++ source files.
+ Instead of compiling the c++, we shall invoke the corresponding python scripts.
+
+ Given three c++ files and their corresponding python scripts:
+
+ .. code-block:: python
+ :emphasize-lines: 4
+
+ @transform( source_files,
+ formatter(".cpp$"),
+
+ # corresponding python file for each source file
+ inputs("{basename[0]}.py"),
+
+ "{basename[0]}.results")
+ def run_corresponding_python(input_filenames, output_file):
+ open(output_file, "w")
+
+
+ The *Ruffus* code will call each python script corresponding to their c++ counterpart:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [hasty.py -> hasty.results] completed
+ Job = [messy.py -> messy.results] completed
+ Job = [tasty.py -> tasty.results] completed
+ Completed Task = run_corresponding_python
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/inputs_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/inputs_code.txt
new file mode 100644
index 0000000..050ae10
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/inputs_code.txt
@@ -0,0 +1,229 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.inputs.code:
+
+############################################################################################################################################################################################################
+|new_manual.inputs.chapter_num|: Python Code for Manipulating task inputs via string substitution using :ref:`inputs() <decorators.inputs>` and :ref:`add_inputs() <decorators.add_inputs>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`inputs() <decorators.inputs>` syntax
+ * :ref:`add_inputs() <decorators.add_inputs>` syntax
+ * Back to |new_manual.inputs.chapter_num|: :ref:`Manipulating task inputs via string substitution <new_manual.inputs>`
+
+******************************************************************************************************************************************************
+Example code for adding additional *input* prerequisites per job with :ref:`add_inputs() <decorators.add_inputs>`
+******************************************************************************************************************************************************
+
+.. _new_manual.inputs.example1:
+
+===================================================================
+1. Example: compiling c++ code
+===================================================================
+
+ .. code-block:: python
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ from ruffus import *
+
+ @transform(source_files, suffix(".cpp"), ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [hasty.cpp -> hasty.o] completed
+ Job = [messy.cpp -> messy.o] completed
+ Job = [tasty.cpp -> tasty.o] completed
+ Completed Task = compile
+
+.. _new_manual.inputs.example2:
+
+======================================================================================================================================
+2. Example: Adding a common header file with :ref:`add_inputs() <decorators.add_inputs>`
+======================================================================================================================================
+
+
+ .. code-block:: python
+ :emphasize-lines: 12
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ @transform( source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h"),
+ ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, universal.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+.. _new_manual.inputs.example3:
+
+=====================================================================
+3. Example: Additional *Input* can be tasks
+=====================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ # make header files
+ @transform(source_files, suffix(".cpp"), ".h")
+ def create_matching_headers(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h",
+ # add result of task create_matching_headers to the input of every job
+ create_matching_headers),
+ ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+
+ >>> pipeline_run()
+ Job = [hasty.cpp -> hasty.h] completed
+ Job = [messy.cpp -> messy.h] completed
+ Job = [tasty.cpp -> tasty.h] completed
+ Completed Task = create_matching_headers
+ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed
+ Completed Task = compile
+
+.. _new_manual.inputs.example4:
+
+================================================================================================================================================================================================================================================
+4. Example: Add corresponding files using :ref:`add_inputs() <decorators.add_inputs>` with :ref:`formatter <decorators.formatter>` or :ref:`regex <decorators.regex>`
+================================================================================================================================================================================================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ header_files = ["hasty.h", "tasty.h", "messy.h"]
+ for source_file in source_files + header_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ @transform( source_files,
+ formatter(".cpp$"),
+ # corresponding header for each source file
+ add_inputs("{basename[0]}.h",
+ # add header to the input of every job
+ "universal.h"),
+ "{basename[0]}.o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, hasty.h, universal.h] -> hasty.o] completed
+ Job = [[messy.cpp, messy.h, universal.h] -> messy.o] completed
+ Job = [[tasty.cpp, tasty.h, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+*********************************************************************************************
+Example code for replacing all input parameters with :ref:`inputs() <decorators.inputs>`
+*********************************************************************************************
+
+.. _new_manual.inputs.example5:
+
+================================================================================================================================================================================================================================================
+5. Example: Running matching python scripts using :ref:`inputs() <decorators.inputs>`
+================================================================================================================================================================================================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ python_files = ["hasty.py", "tasty.py", "messy.py"]
+ for source_file in source_files + python_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ @transform( source_files,
+ formatter(".cpp$"),
+ # corresponding python file for each source file
+ inputs("{basename[0]}.py"),
+
+ "{basename[0]}.results")
+ def run_corresponding_python(input_filenames, output_file):
+ open(output_file, "w")
+
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [hasty.py -> hasty.results] completed
+ Job = [messy.py -> messy.results] completed
+ Job = [tasty.py -> tasty.results] completed
+ Completed Task = run_corresponding_python
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/introduction.txt b/doc/_build/html/_sources/tutorials/new_tutorial/introduction.txt
new file mode 100644
index 0000000..d36354a
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/introduction.txt
@@ -0,0 +1,399 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. role:: raw-html(raw)
+ :format: html
+
+:raw-html:`<style> .blue {color:blue} </style>`
+
+:raw-html:`<style> .highlight-red {color:red} </style>`
+
+.. role:: highlight-red
+
+.. role:: blue
+
+
+.. index::
+ pair: overview; Tutorial
+
+.. _new_manual.introduction:
+
+######################################################################################################
+|new_manual.introduction.chapter_num|: An introduction to basic *Ruffus* syntax
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+
+************************************
+Overview
+************************************
+
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ One easy way to understand pipelines is by imagining your data flowing across a series of
+ pipes until it reaches its final destination. Even quite complicated processes can be
+ broken into simple stages. Of course, it helps to visualise the whole process.
+
+ *Ruffus* is a way of automating the plumbing in your pipeline: You supply the python functions
+ which perform the data transformation, and tell *Ruffus* how these pipeline ``task`` functions
+ are connected up. *Ruffus* will make sure that the right data flows down your pipeline in the
+ right way at the right time.
+
+
+ .. note::
+
+ *Ruffus* refers to each stage of your pipeline as a :term:`task`.
+
+.. _new_manual.introduction.import:
+
+.. index::
+ single: importing ruffus
+
+****************************
+Importing *Ruffus*
+****************************
+
+ The most convenient way to use *Ruffus* is to import the various names directly:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ This will allow *Ruffus* terms to be used directly in your code. This is also
+ the style we have adopted for this manual.
+
+
+ If any of these clash with names in your code, you can use qualified names instead:
+ ::
+
+ import ruffus
+
+ ruffus.pipeline_printout("...")
+
+ *Ruffus* uses only standard python syntax.
+
+ There is no need to install anything extra or to have your script "preprocessed" to run
+ your pipeline.
+
+****************************************************************************************************************
+*Ruffus* `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__
+****************************************************************************************************************
+
+ To let *Ruffus* know that which python functions are part of your pipeline,
+ they need to be tagged or annotated using
+ *Ruffus* `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ .
+
+ `Decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ have been part of the Python language since version 2.4.
+ Common examples from the standard library include `@staticmethod <https://docs.python.org/2/library/functions.html#staticmethod>`__ and
+ `classmethod <https://docs.python.org/2/library/functions.html#classmethod>`__.
+
+ `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ start with a ``@``
+ prefix, and take a number of parameters in parenthesis, much like in a function call.
+
+ `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ are placed before a normal python function.
+
+ .. image:: ../../images/tutorial_step1_decorator_syntax.png
+
+
+ Multiple decorators can be stacked as necessary in whichever order:
+
+ .. code-block:: python
+
+ @follows(first_task)
+ @follows(another_task)
+ @originate(range(5))
+ def second_task():
+ ""
+
+ *Ruffus* `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ do not
+ otherwise alter the underlying function. These can still be called normally.
+
+***************************************
+Your first *Ruffus* pipeline
+***************************************
+
+==============================================================================
+1. Write down the file names
+==============================================================================
+
+ *Ruffus* is designed for data moving through a computational pipeline as a series of files.
+
+ It is also possible to use *Ruffus* pipelines without using intermediate data files but for your
+ first efforts, it is probably best not to subvert its canonical design.
+
+ The first thing when designing a new *Ruffus* pipeline is to sketch out the set of file names for
+ the pipeline on paper:
+
+ .. image:: ../../images/tutorial_ruffus_files.jpg
+ :scale: 50
+
+ Here we have a number of DNA sequence files (``*.fasta``)
+ #. mapped to a genome (``*.sam``), and
+ #. compressed (``*.bam``) before being
+ #. summarised statistically (``*.statistics``)
+
+ The first striking thing is that all of the files following the same **consistent naming scheme**.
+
+ .. note::
+
+ :highlight-red:`The most important part of a Ruffus pipeline is to have a consistent naming scheme for your files.`
+
+ This allows you to build sane pipelines.
+
+
+ In this case, each of the files at the same stage share the same file extension, e.g. (``.sam``).
+ This is usually the simplest and most sensible choice. (We shall see in later chapters
+ that *Ruffus* supports more complicated naming patterns so long as they are consistent.)
+
+
+==============================================================================
+2. Write the python functions for each stage
+==============================================================================
+
+ Next, we can sketch out the python functions which do the actual work for the pipeline.
+
+ .. note::
+
+ #. :highlight-red:`These are normal python functions with the important proviso that`
+
+ #. The first parameter contains the **Input** (file names)
+ #. The second parameter contains the **Output** (file names)
+
+ You can otherwise supply as many parameters as is required.
+
+ #. :highlight-red:`Each python function should only take a` *Single* **Input** at a time
+
+ All the parallelism in your pipeline should be handled by *Ruffus*. Make sure
+ each function analyses one thing at a time.
+
+
+ *Ruffus* refers to a pipelined function as a :term:`task`.
+
+ The code for our three task functions look something like:
+
+ .. code-block:: python
+ :emphasize-lines: 2,4,5
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ def map_dna_sequence(input_file, # 1st parameter is Input
+ output_file): # 2nd parameter is Output
+ """
+ Sketch of real mapping function
+ We can do the mapping ourselves
+ or call some other programme:
+ os.system("stampy %s %s..." % (input_file, output_file))
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # STAGE 2 sam->bam
+ #
+ def compress_sam_file(input_file, # Input parameter
+ output_file): # Output parameter
+ """
+ Sketch of real compression function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ def summarise_bam_file(input_file, # Input parameter
+ output_file, # Output parameter
+ extra_stats_parameter): # Any number of extra parameters as required
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+
+ If we were calling our functions manually, without the benefit of *Ruffus*, we would need
+ the following sequence of calls:
+
+ .. code-block:: python
+
+ # STAGE 1
+ map_dna_sequence("a.fasta", "a.sam")
+ map_dna_sequence("b.fasta", "b.sam")
+ map_dna_sequence("c.fasta", "c.sam")
+
+ # STAGE 2
+ compress_sam_file("a.sam", "a.bam")
+ compress_sam_file("b.sam", "b.bam")
+ compress_sam_file("c.sam", "c.bam")
+
+ # STAGE 3
+ summarise_bam_file("a.bam", "a.statistics")
+ summarise_bam_file("b.bam", "b.statistics")
+ summarise_bam_file("c.bam", "c.statistics")
+
+==============================================================================
+3. Link the python functions into a pipeline
+==============================================================================
+
+ *Ruffus* makes exactly the same function calls on your behalf. However, first, we need to
+ tell *Ruffus* what the arguments should be for each of the function calls.
+
+ * The **Input** is easy: This is either the starting file set (``*.fasta``) or whatever is produced
+ by the previous stage.
+
+ * The **Output** file name is the same as the **Input** but with the appropriate extension.
+
+ These are specified using the *Ruffus* :ref:`@transform <decorators.transform>` decorator as follows:
+
+ .. code-block:: python
+ :emphasize-lines: 6-8,17-19,29-31
+
+ from ruffus import *
+
+ starting_files = ["a.fasta", "b.fasta", "c.fasta"]
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 2 sam->bam
+ #
+ @transform(map_dna_sequence, # Input = previous stage
+ suffix(".sam"), # suffix = .sam
+ ".bam") # Output suffix = .bam
+ def compress_sam_file(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ @transform(compress_sam_file, # Input = previous stage
+ suffix(".bam"), # suffix = .bam
+ ".statistics", # Output suffix = .statistics
+ "use_linear_model") # Extra statistics parameter
+ def summarise_bam_file(input_file,
+ output_file,
+ extra_stats_parameter):
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+
+==============================================================================
+4. @transform syntax
+==============================================================================
+
+ #. | The 1st parameter for :ref:`@transform <decorators.transform>` is the **Input**.
+ | This is either the set of starting data or the name of the previous pipeline function.
+ | *Ruffus* *chains* together the stages of a pipeline by linking the **Output** of the previous stage into the **Input** of the next.
+
+ #. | The 2nd parameter is the current :ref:`suffix <decorators.suffix>`
+ | (i.e. our **Input** file extensions of ``".fasta"`` or ``".sam"`` or ``".bam"``)
+
+ #. | The 3rd parameter is what we want our **Output** file name to be after :ref:`suffix <decorators.suffix>` string substitution (e.g. ``.fasta - > .sam``).
+ | This works because we are using a sane naming scheme for our data files.
+
+ #. Other parameters can be passed to ``@transform`` and they will be forwarded to our python
+ pipeline function.
+
+
+ The functions that do the actual work of each stage of the pipeline remain unchanged.
+ The role of *Ruffus* is to make sure each is called in the right order,
+ with the right parameters, running in parallel (using multiprocessing if desired).
+
+
+.. index::
+ pair: pipeline_run; Tutorial
+
+.. _new_manual.pipeline_run:
+
+==============================================================================
+5. Run the pipeline!
+==============================================================================
+
+ .. note ::
+
+ **Key Ruffus Terminology**:
+
+ A :term:`task` is an annotated python function which represents a recipe or stage of your pipeline.
+
+ A :term:`job` is each time your recipe is applied to a piece of data, i.e. each time *Ruffus* calls your function.
+
+ Each **task** or pipeline recipe can thus have many **jobs** each of which can work in parallel on different data.
+
+ Now we can run the pipeline with the *Ruffus* function :ref:`pipeline_run<pipeline_functions.pipeline_run>`:
+
+ .. code-block:: python
+
+ pipeline_run()
+
+
+
+ This produces three sets of results in parallel, as you might expect:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [a.fasta -> a.sam] completed
+ Job = [b.fasta -> b.sam] completed
+ Job = [c.fasta -> c.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [a.sam -> a.bam] completed
+ Job = [b.sam -> b.bam] completed
+ Job = [c.sam -> c.bam] completed
+ Completed Task = compress_sam_file
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
+
+
+ To work out which functions to call, :ref:`pipeline_run<pipeline_functions.pipeline_run>`
+ finds the **last** :term:`task` function of your pipeline, then
+ works out all the other functions this depends on, working backwards up the chain of
+ dependencies automatically.
+
+ We can specify this end point of your pipeline explicitly:
+
+ ::
+
+ >>> pipeline_run(target_tasks = [summarise_bam_file])
+
+
+ This allows us to only run part of the pipeline, for example:
+
+ ::
+
+ >>> pipeline_run(target_tasks = [compress_sam_file])
+
+
+.. note::
+
+ The :ref:`example code <new_manual.introduction.code>` can be copied and pasted into a python
+ command shell.
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/introduction_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/introduction_code.txt
new file mode 100644
index 0000000..7bdd203
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/introduction_code.txt
@@ -0,0 +1,94 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.introduction.code:
+
+##############################################################################################################
+|new_manual.introduction.chapter_num|: Python Code for An introduction to basic Ruffus syntax
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.introduction.chapter_num|: :ref:`An introduction to basic Ruffus syntax <new_manual.introduction>`
+
+*******************************************
+Your first Ruffus script
+*******************************************
+
+ .. code-block:: python
+
+ ::
+
+ #
+ # The starting data files would normally exist beforehand!
+ # We create some empty files for this example
+ #
+ starting_files = ["a.fasta", "b.fasta", "c.fasta"]
+
+ for ff in starting_files:
+ open(ff, "w")
+
+
+ from ruffus import *
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 2 sam->bam
+ #
+ @transform(map_dna_sequence, # Input = previous stage
+ suffix(".sam"), # suffix = .sam
+ ".bam") # Output suffix = .bam
+ def compress_sam_file(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ @transform(compress_sam_file, # Input = previous stage
+ suffix(".bam"), # suffix = .bam
+ ".statistics", # Output suffix = .statistics
+ "use_linear_model") # Extra statistics parameter
+ def summarise_bam_file(input_file,
+ output_file,
+ extra_stats_parameter):
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ pipeline_run()
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [a.fasta -> a.sam] completed
+ Job = [b.fasta -> b.sam] completed
+ Job = [c.fasta -> c.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [a.sam -> a.bam] completed
+ Job = [b.sam -> b.bam] completed
+ Job = [c.sam -> c.bam] completed
+ Completed Task = compress_sam_file
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/list_of_ruffus_names.txt b/doc/_build/html/_sources/tutorials/new_tutorial/list_of_ruffus_names.txt
new file mode 100644
index 0000000..aa8b66b
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/list_of_ruffus_names.txt
@@ -0,0 +1,77 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: Ruffus names list; Tutorial
+
+.. _new_manual.ruffus_names:
+
+##########################################################################################################################################################################################################################################
+|new_manual.ruffus_names.chapter_num|: Names exported from Ruffus
+##########################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+******************
+Ruffus Names
+******************
+
+ This is a list of all the names *Ruffus* makes available:
+
+
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | Category | Manual |
+ +===============================================================+===============================================================================================================================================================+
+ | **Pipeline functions** | | :ref:`pipeline_printout() <pipeline_functions.pipeline_printout>` (:ref:`Manual <new_manual.pipeline_printout>`) |
+ | | | :ref:`pipeline_printout() <pipeline_functions.pipeline_printout_graph>` (:ref:`Manual <new_manual.pipeline_printout_graph>`) |
+ | | | :ref:`pipeline_printout() <pipeline_functions.pipeline_run>` (:ref:`Manual <new_manual.pipeline_run>`) |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Decorators** | |
+ | | | :ref:`@active_if <decorators.active_if>` (:ref:`Manual <new_manual.active_if>`) |
+ | | | :ref:`@check_if_uptodate <decorators.check_if_uptodate>` (:ref:`Manual <new_manual.check_if_uptodate>`) |
+ | | | :ref:`@collate <decorators.collate>` (:ref:`Manual <new_manual.collate>`) |
+ | | | :ref:`@files <decorators.files>` (:ref:`Manual <new_manual.deprecated_files>`) |
+ | | | :ref:`@follows <decorators.follows>` (:ref:`Manual <new_manual.follows>`) |
+ | | | :ref:`@jobs_limit <decorators.jobs_limit>` (:ref:`Manual <new_manual.jobs_limit>`) |
+ | | | :ref:`@merge <decorators.merge>` (:ref:`Manual <new_manual.merge>`) |
+ | | | :ref:`@mkdir <decorators.mkdir>` (:ref:`Manual <new_manual.mkdir>`) |
+ | | | :ref:`@originate <decorators.originate>` (:ref:`Manual <new_manual.originate>`) |
+ | | | :ref:`@parallel <decorators.parallel>` (:ref:`Manual <new_manual.deprecated_parallel>`) |
+ | | | :ref:`@posttask <decorators.posttask>` (:ref:`Manual <new_manual.posttask>`) |
+ | | | :ref:`@split <decorators.split>` (:ref:`Manual <new_manual.split>`) |
+ | | | :ref:`@subdivide <decorators.subdivide>` (:ref:`Manual <new_manual.subdivide>`) |
+ | | | :ref:`@transform <decorators.transform>` (:ref:`Manual <new_manual.transform>`) |
+ | | | :ref:`@files_re <decorators.files_re>` (:ref:`Manual <new_manual.deprecated_files_re>`) |
+ | | |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Loggers** | |
+ | | | stderr_logger |
+ | | | black_hole_logger |
+ | | |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Parameter disambiguating Indicators** | | :ref:`suffix <decorators.suffix>` (:ref:`Manual <new_manual.suffix>`) |
+ | | | :ref:`regex <decorators.regex>` (:ref:`Manual <new_manual.regex>`) |
+ | | | :ref:`formatter <decorators.formatter>` (:ref:`Manual <new_manual.formatter>`) |
+ | | | :ref:`inputs <decorators.inputs>` (:ref:`Manual <new_manual.inputs>`) |
+ | | | :ref:`inputs <decorators.add_inputs>` (:ref:`Manual <new_manual.inputs>`) |
+ | | | :ref:`touch_file <decorators.touch_file>` (:ref:`Manual <new_manual.posttask.touch_file>`) |
+ | | | :ref:`combine <decorators.combine>` |
+ | | | :ref:`mkdir <decorators.follows.mkdir>` (:ref:`Manual <new_manual.follows.mkdir>`) |
+ | | | :ref:`output_from <decorators.output_from>` (:ref:`Manual <new_manual.output_from>`) |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Decorators in ruffus.combinatorics** | |
+ | | | :ref:`@combinations <decorators.combinations>` (:ref:`Manual <new_manual.combinations>`) |
+ | | | :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>` (:ref:`Manual <new_manual.combinations_with_replacement>`) |
+ | | | :ref:`@permutations <decorators.permutations>` (:ref:`Manual <new_manual.permutations>`) |
+ | | | :ref:`@product <decorators.product>` (:ref:`Manual <new_manual.product>`) |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Decorators in ruffus.cmdline** | |
+ | | | :ref:`get_argparse <new_manual.cmdline.get_argparse>` |
+ | | | :ref:`setup_logging <new_manual.cmdline.setup_logging>` |
+ | | | :ref:`run <new_manual.cmdline.run>` |
+ | | | :ref:`MESSAGE <new_manual.cmdline.MESSAGE>` |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/logging.txt b/doc/_build/html/_sources/tutorials/new_tutorial/logging.txt
new file mode 100644
index 0000000..4e42e69
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/logging.txt
@@ -0,0 +1,221 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: logging; Tutorial
+
+.. _new_manual.logging:
+
+######################################################################################################
+|new_manual.logging.chapter_num|: Logging progress through a pipeline
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+.. note::
+
+ Remember to look at the :ref:`example code <new_manual.logging.code>`
+
+*************************
+Overview
+*************************
+
+ There are two parts to logging with **Ruffus**:
+
+ * Logging progress through the pipeline
+
+ This produces the sort of output displayed in this manual:
+
+ ::
+
+ >>> pipeline_run([parallel_io_task])
+ Task = parallel_io_task
+ Job = ["a.1" -> "a.2", "A file"] completed
+ Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date
+ Completed Task = parallel_io_task
+
+
+ * Logging your own messages from within your pipelined functions.
+
+ Because **Ruffus** may run each task function in separate process on a separate
+ CPU (multiprocessing), some attention has to be paid to how to send and
+ synchronise your log messages across process boundaries.
+
+
+ We shall deal with these in turn.
+
+
+.. _new_manual.logging.pipeline:
+
+**********************************
+Logging task/job completion
+**********************************
+ By default, *Ruffus* logs each task and each job as it is completed to
+ `sys.stderr <http://docs.python.org/2/library/sys.html#sys.stderr>`__.
+
+ By default, Ruffus logs to ``STDERR``: :ref:`pipeline_run(logger = stderr_logger) <pipeline_functions.pipeline_run>`.
+
+ If you want to turn off all tracking messages as the pipeline runs, apart from setting ``verbose = 0``, you
+ can also use the aptly named Ruffus ``black_hole_logger``:
+
+ .. code-block:: python
+
+ pipeline_run(logger = black_hole_logger)
+
+.. index::
+ pair: pipeline_run verbosity; Tutorial
+
+=================================
+Controlling logging verbosity
+=================================
+ :ref:`pipeline_run() <pipeline_functions.pipeline_run>` currently has five levels of verbosity, set by the optional ``verbose``
+ parameter which defaults to 1:
+
+ ::
+
+ verbose = 0: nothing
+ verbose = 1: logs completed jobs/tasks;
+ verbose = 2: logs up to date jobs in incomplete tasks
+ verbose = 3: logs reason for running job
+ verbose = 4: logs messages useful only for debugging ruffus pipeline code
+
+
+ ``verbose`` > ``5`` are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+
+.. index::
+ pair: logging with ruffus.cmdline; Tutorial
+
+********************************************************************************
+Use :ref:`ruffus.cmdline <new_manual.cmdline>`
+********************************************************************************
+
+ As always, it is easiest to use :ref:`ruffus.cmdline <new_manual.cmdline>`.
+
+ Set your script to
+
+ * write messages to ``STDERR`` with the ``--verbose`` option and
+ * to a log file with the ``--log_file`` option.
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ from ruffus import *
+
+ # Python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ @transform( ["job1.input"], suffix(".input"), ".output1"),
+ def first_task(input_file, output_file):
+ pass
+
+ pipeline_run(logger=logger)
+
+
+.. index::
+ pair: logging customising; Tutorial
+
+****************************************
+Customising logging
+****************************************
+
+ You can also specify exactly how logging works by providing a `logging <http://docs.python.org/library/logging.html>`_ object
+ to :ref:`pipeline_run() <pipeline_functions.pipeline_run>` .
+ This log object should have ``debug()`` and ``info()`` methods.
+
+ Instead of writing your own, it is usually more convenient to use the python
+ `logging <http://docs.python.org/library/logging.html>`_
+ module which provides logging classes with rich functionality.
+
+ The :ref:`example code<new_manual.logging.code>` sets up a logger to a rotating set of files
+
+
+.. index::
+ pair: logging your own message; Tutorial
+
+.. _new_manual.logging.per_job:
+
+****************************************
+Log your own messages
+****************************************
+
+ You need to take a little care when logging your custom messages *within* your pipeline.
+
+ * If your Ruffus pipeline may run in parallel, make sure that logging is synchronised.
+ * If your Ruffus pipeline may run across separate processes, send your logging object across process boundaries.
+
+
+ `logging <http://docs.python.org/library/logging.html>`_ objects can not be
+ `pickled <http://docs.python.org/library/pickle.html>`_ and shared naively across
+ processes. Instead, we need to create proxies which forward the logging to a single
+ shared log.
+
+ The :ref:`ruffus.proxy_logger <proxy-logger>` module provides an easy way to share
+ `logging <http://docs.python.org/library/logging.html>`_ objects among
+ jobs. This requires just two simple steps:
+
+
+
+
+.. note::
+
+ * This is a good template for sharing `non-picklable objects <http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled>`_
+ across processes.
+
+
+.. _new_manual.sharing_proxy_object:
+
+
+============================================================
+ 1. Set up logging
+============================================================
+
+ Things are easiest if you are using ``ruffus.cmdline``:
+
+ .. code-block:: python
+
+ # standard python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+ Otherwise, manually:
+
+ .. code-block:: python
+
+
+ from ruffus.proxy_logger import *
+ (logger,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger",
+ {"file_name" :"/my/lg.log"})
+
+============================================================
+ 2. Share the proxy
+============================================================
+ Now, pass:
+
+ * ``logger`` (which forwards logging calls across jobs) and
+ * ``logging_mutex`` (which prevents different jobs which are logging simultaneously
+ from being jumbled up)
+
+ to each job:
+
+ .. code-block:: python
+ :emphasize-lines: 4,6,9
+
+ @transform( initial_file,
+ suffix(".input"),
+ ".output1",
+ logger, logging_mutex), # pass log and synchronisation as parameters
+ def first_task(input_file, output_file,
+ logger, logging_mutex): # pass log and synchronisation as parameters
+ pass
+
+ # synchronise logging
+ with logging_mutex:
+ logger.info("Here we go logging...")
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/logging_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/logging_code.txt
new file mode 100644
index 0000000..69bd51b
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/logging_code.txt
@@ -0,0 +1,55 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.logging.code:
+
+######################################################################################################
+|new_manual.logging.chapter_num|: Python Code for Logging progress through a pipeline
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * Back to |new_manual.logging.chapter_num|: :ref:`Logging progress through a pipeline <new_manual.logging>`
+
+****************************************
+Rotating set of file logs
+****************************************
+
+ .. code-block:: python
+ :emphasize-lines: 10,14,17,31
+
+ import logging
+ import logging.handlers
+
+ LOG_FILENAME = '/tmp/ruffus.log'
+
+ # Set up a specific logger with our desired output level
+ logger = logging.getLogger('My_Ruffus_logger')
+ logger.setLevel(logging.DEBUG)
+
+ # Rotate a set of 5 log files every 2kb
+ handler = logging.handlers.RotatingFileHandler(
+ LOG_FILENAME, maxBytes=2000, backupCount=5)
+
+ # Add the log message handler to the logger
+ logger.addHandler(handler)
+
+ # Ruffus pipeline
+ from ruffus import *
+
+ # Start with some initial data file of yours...
+ initial_file = "job1.input"
+ open(initial_file, "w")
+
+ @transform( initial_file,
+ suffix(".input"),
+ ".output1"),
+ def first_task(input_file, output_file):
+ "Some detailed description"
+ pass
+
+ # use our custom logging object
+ pipeline_run(logger=logger)
+ print open("/tmp/ruffus.log").read()
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/manual_contents.txt b/doc/_build/html/_sources/tutorials/new_tutorial/manual_contents.txt
new file mode 100644
index 0000000..e17dbf2
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/manual_contents.txt
@@ -0,0 +1,64 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.table_of_contents:
+
+####################################################################
+**Ruffus** Manual: List of Chapters and Example code
+####################################################################
+
+ Download as :download:`pdf <../../static_data/ruffus.pdf>`.
+
+ * |new_manual.introduction.chapter_num|: :ref:`An introduction to basic Ruffus syntax <new_manual.introduction>`
+ * |new_manual.transform.chapter_num|: :ref:`Transforming data in a pipeline with @transform <new_manual.transform>`
+ * |new_manual.transform_in_parallel.chapter_num|: :ref:`More on @transform-ing data <new_manual.transform_in_parallel>`
+ * |new_manual.originate.chapter_num|: :ref:`Creating files with @originate <new_manual.originate>`
+ * |new_manual.pipeline_printout.chapter_num|: :ref:`Understanding how your pipeline works with pipeline_printout() <new_manual.pipeline_printout>`
+ * |new_manual.cmdline.chapter_num|: :ref:`Running Ruffus from the command line with ruffus.cmdline <new_manual.cmdline>`
+ * |new_manual.pipeline_printout_graph.chapter_num|: :ref:`Displaying the pipeline visually with pipeline_printout_graph() <new_manual.pipeline_printout_graph>`
+ * |new_manual.output_file_names.chapter_num|: :ref:`Specifying output file names with formatter() and regex() <new_manual.output_file_names>`
+ * |new_manual.mkdir.chapter_num|: :ref:`Preparing directories for output with @mkdir <new_manual.mkdir>`
+ * |new_manual.checkpointing.chapter_num|: :ref:`Checkpointing: Interrupted Pipelines and Exceptions <new_manual.checkpointing>`
+ * |new_manual.decorators_compendium.chapter_num|: :ref:`Pipeline topologies and a compendium of Ruffus decorators <new_manual.decorators_compendium>`
+ * |new_manual.split.chapter_num|: :ref:`Splitting up large tasks / files with @split <new_manual.split>`
+ * |new_manual.merge.chapter_num|: :ref:`@merge multiple input into a single result <new_manual.merge>`
+ * |new_manual.logging.chapter_num|: :ref:`Logging progress through a pipeline <new_manual.logging>`
+ * |new_manual.multiprocessing.chapter_num|: :ref:`Multiprocessing, drmaa and Computation Clusters <new_manual.multiprocessing>`
+ * |new_manual.subdivide_collate.chapter_num|: :ref:`@subdivide tasks to run efficiently and regroup with @collate <new_manual.subdivide_collate>`
+ * |new_manual.combinatorics.chapter_num|: :ref:`@combinations, @permutations and all versus all @product <new_manual.combinatorics>`
+ * |new_manual.active_if.chapter_num|: :ref:`Turning parts of the pipeline on and off at runtime with @active_if <new_manual.active_if>`
+ * |new_manual.inputs.chapter_num|: :ref:`Manipulating task inputs via string substitution with inputs() and add_inputs() <new_manual.inputs>`
+ * |new_manual.posttask.chapter_num|: :ref:`Signal the completion of each stage of our pipeline with @posttask <new_manual.posttask>`
+ * |new_manual.on_the_fly.chapter_num|: :ref:`Esoteric: Generating parameters on the fly with @files <new_manual.on_the_fly>`
+ * |new_manual.parallel.chapter_num|: :ref:`Esoteric: Running jobs in parallel without files using @parallel <new_manual.deprecated_parallel>`
+ * |new_manual.check_if_uptodate.chapter_num|: :ref:`Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate <new_manual.check_if_uptodate>`
+ * |new_manual.flowchart_colours.chapter_num| :ref:`Flow Chart Colours with pipeline_printout_graph <new_manual.flowchart_colours>`
+ * |new_manual.dependencies.chapter_num| :ref:`Under the hood: How dependency works <new_manual.dependencies>`
+ * |new_manual.exceptions.chapter_num| :ref:`Exceptions thrown inside pipelines <new_manual.exceptions>`
+ * |new_manual.ruffus_names.chapter_num| :ref:`Names (keywords) exported from Ruffus <new_manual.ruffus_names>`
+ * |new_manual.deprecated_files.chapter_num|: :ref:`Legacy and deprecated syntax @files <new_manual.deprecated_files>`
+ * |new_manual.deprecated_files_re.chapter_num|: :ref:`Legacy and deprecated syntax @files_re <new_manual.deprecated_files_re>`
+
+
+
+**Ruffus** Manual: List of Example Code for Each Chapter:
+
+ * :ref:`new_manual.introduction.code`
+ * :ref:`new_manual.transform.code`
+ * :ref:`new_manual.transform_in_parallel.code`
+ * :ref:`new_manual.originate.code`
+ * :ref:`new_manual.pipeline_printout.code`
+ * :ref:`new_manual.pipeline_printout_graph.code`
+ * :ref:`new_manual.output_file_names.code`
+ * :ref:`new_manual.mkdir.code`
+ * :ref:`new_manual.checkpointing.code`
+ * :ref:`new_manual.split.code`
+ * :ref:`new_manual.merge.code`
+ * :ref:`new_manual.multiprocessing.code`
+ * :ref:`new_manual.logging.code`
+ * :ref:`new_manual.subdivide_collate.code`
+ * :ref:`new_manual.combinatorics.code`
+ * :ref:`new_manual.inputs.code`
+ * :ref:`new_manual.on_the_fly.code`
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/merge.txt b/doc/_build/html/_sources/tutorials/new_tutorial/merge.txt
new file mode 100644
index 0000000..4b8466e
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/merge.txt
@@ -0,0 +1,140 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: merge; Tutorial
+
+.. _new_manual.merge:
+
+######################################################################################################
+|new_manual.merge.chapter_num|: ``@merge`` multiple input into a single result
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@merge <decorators.merge>` syntax
+ * :ref:`Example code for this chapter <new_manual.merge.code>`
+
+
+**************************************************************************************
+Overview of :ref:`@merge <decorators.merge>`
+**************************************************************************************
+
+ The :ref:`previous chapter <new_manual.split>` explained how **Ruffus** allows large
+ jobs to be split into small pieces with :ref:`@split <decorators.split>` and analysed
+ in parallel using for example, our old friend :ref:`@transform <decorators.transform>`.
+
+ Having done this, our next task is to recombine the fragments into a seamless whole.
+
+ This is the role of the :ref:`@merge <decorators.merge>` decorator.
+
+**************************************************************************************
+:ref:`@merge <decorators.merge>` is a many to one operator
+**************************************************************************************
+
+ :ref:`@transform <decorators.transform>` tasks multiple *inputs* and produces a single *output*, **Ruffus**
+ is again agnostic as to the sort of data contained within this single *output*. It can be a single
+ (string) file name, an arbitrary complicated nested structure with numbers, objects etc.
+ Or even a list.
+
+ The main thing is that downstream tasks will interpret this output as a single entity leading to a single
+ job.
+
+ :ref:`@split <decorators.split>` and :ref:`@merge <decorators.merge>` are, in other words, about network topology.
+
+ Because of this :ref:`@merge <decorators.merge>` is also very useful for summarising the progress
+ in our pipeline. At key selected points, we can gather data from the multitude of data or disparate *inputs*
+ and :ref:`@merge <decorators.merge>` them to a single set of summaries.
+
+
+
+**************************************************************************************
+Example: Combining partial solutions: Calculating variances
+**************************************************************************************
+
+ The :ref:`previous chapter <new_manual.split>` we had almost completed all the pieces of our flowchart:
+
+ .. image:: ../../images/manual_split_merge_example.jpg
+ :scale: 30
+
+ What remains is to take the partial solutions from the different ``.sums`` files
+ and turn these into the variance as follows:
+
+ ::
+
+ variance = (sum_squared - sum * sum / N)/N
+
+ where ``N`` is the number of values
+
+ See the `wikipedia <http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ entry for a discussion of
+ why this is a very naive approach.
+
+
+
+ To do this, all we have to do is iterate through all the values in ``*.sums``,
+ add up the ``sums`` and ``sum_squared``, and apply the above (naive) formula.
+
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # @merge files together
+ #
+ @merge(sum_of_squares, "variance.result")
+ def calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = map(float, open(input_file_name).readlines())
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ open(output_file_name, "w").write("%s\n" % variance)
+
+
+
+ This results in the following equivalent function call:
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+
+ calculate_variance (["1.sums", "2.sums", "3.sums",
+ "4.sums", "5.sums", "6.sums",
+ "7.sums", "8.sums", "9.sums, "10.sums"], "variance.result")
+
+ and the following display:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] -> variance.result] completed
+ Completed Task = calculate_variance
+
+
+
+ The final result is in ``variance.result``
+
+
+ Have a look at the :ref:`complete example code for this chapter <new_manual.merge.code>`.
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/merge_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/merge_code.txt
new file mode 100644
index 0000000..cc5bd8b
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/merge_code.txt
@@ -0,0 +1,147 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.merge.code:
+
+##############################################################################################################
+|new_manual.merge.chapter_num|: Python Code for ``@merge`` multiple input into a single result
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@merge syntax in detail <decorators.merge>`
+ * Back to |new_manual.merge.chapter_num|: :ref:`Splitting up large tasks / files with @merge <new_manual.merge>`
+
+*******************************************
+Splitting large jobs
+*******************************************
+
+ ::
+
+ from ruffus import *
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ import random, os, glob
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @originate("random_numbers.list")
+ def create_random_numbers(output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # split initial file
+ #
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ """
+ splits random numbers file into xxx files of chunk_size each
+ """
+ #
+ # clean up any files from previous runs
+ #
+ #for ff in glob.glob("*.chunks"):
+ for ff in input_file_names:
+ os.unlink(ff)
+ #
+ #
+ # create new file every chunk_size lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for input_file_name in input_file_names:
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(split_problem, suffix(".chunks"), ".sums")
+ def sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+ #---------------------------------------------------------------
+ #
+ # Calculate variance from sums
+ #
+ @merge(sum_of_squares, "variance.result")
+ def calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = map(float, open(input_file_name).readlines())
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ open(output_file_name, "w").write("%s\n" % variance)
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [None -> random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Job = [[random_numbers.list] -> *.chunks] completed
+ Completed Task = split_problem
+ Job = [1.chunks -> 1.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [6.chunks -> 6.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Completed Task = sum_of_squares
+ Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] -> variance.result] completed
+ Completed Task = calculate_variance
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/mkdir.txt b/doc/_build/html/_sources/tutorials/new_tutorial/mkdir.txt
new file mode 100644
index 0000000..a7632c6
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/mkdir.txt
@@ -0,0 +1,152 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: mkdir; Tutorial
+
+.. _new_manual.mkdir:
+
+######################################################################################################################################################################
+|new_manual.mkdir.chapter_num|: Preparing directories for output with :ref:`@mkdir() <decorators.mkdir>`
+######################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@follows(mkdir()) syntax in detail <decorators.follows>`
+ * :ref:`@mkdir syntax in detail <decorators.mkdir>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.mkdir.code`
+
+
+***************************************
+Overview
+***************************************
+
+ In |new_manual.transform_in_parallel.chapter_num|, we saw that we could use :ref:`@follows(mkdir()) <new_manual.follows.mkdir>` to
+ ensure that output directories exist:
+
+ .. code-block:: python
+ :emphasize-lines: 4
+
+ #
+ # create_new_files() @follows mkdir
+ #
+ @follows(mkdir("output/results/here"))
+ @originate(["output/results/here/a.start_file",
+ "output/results/here/b.start_file"])
+ def create_new_files(output_file_pair):
+ pass
+
+
+ This ensures that the decorated task follows (:ref:`@follows <new_manual.follows.mkdir>`) the
+ making of the specified directory (``mkdir()``).
+
+ Sometimes, however, the **Output** is intended not for any single directory but a group
+ of destinations depending on the parsed contents of **Input** paths.
+
+*********************************************************************************************************************
+Creating directories after string substitution in a zoo...
+*********************************************************************************************************************
+
+ You may remember :ref:`this example <new_manual.output_file_names.formatter.zoo>` from |new_manual.output_file_names.chapter_num|:
+
+ We want to feed the denizens of a zoo. The original file names are spread over several directories and we
+ group their food supply by the *clade* of the animal in the following manner:
+
+ .. image:: ../../images/simple_tutorial_zoo_animals_formatter_example.jpg
+ :scale: 50
+
+ .. code-block:: python
+ :emphasize-lines: 13,14
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this blows up
+ # open(output_file, "w")
+
+
+ The example code from |new_manual.output_file_names.chapter_num| is, however, incomplete. If we were to actually create the specified
+ files we would realise that we had forgotten to create the destination directories ``reptiles``, ``mammals`` first!
+
+==============================================================================
+using :ref:`formatter() <decorators.formatter>`
+==============================================================================
+
+ We could of course create directories manually.
+ However, apart from being tedious and error prone, we have already gone to some lengths
+ to parse out the diretories for :ref:`@transform <decorators.transform>`.
+ Why don't we use the same logic to make the directories?
+
+ Can you see the parallels between the syntax for :ref:`@mkdir <decorators.mkdir>` and :ref:`@transform <decorators.transform>`?
+
+ .. code-block:: python
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+ "{subpath[0][1]}/{clade[0]}) # new_directory
+
+ # Put animals of each clade in the same directory
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
+ See the :ref:`example code <new_manual.mkdir.code>`
+
+==============================================================================
+using :ref:`regex() <decorators.regex>`
+==============================================================================
+
+ If you are particularly fond of using regular expression to parse file paths,
+ you could also use :ref:`regex() <decorators.regex>`:
+
+
+ .. code-block:: python
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ regex(r"(.*?)/?(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+ r"\1/\g<clade>") # new_directory
+
+ # Put animals of each clade in the same directory
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/mkdir_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/mkdir_code.txt
new file mode 100644
index 0000000..91f3661
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/mkdir_code.txt
@@ -0,0 +1,113 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.mkdir.code:
+
+############################################################################################################################################################################################################
+|new_manual.mkdir.chapter_num|: Python Code for Preparing directories for output with :ref:`@mkdir() <decorators.mkdir>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`mkdir() <decorators.mkdir>` syntax
+ * :ref:`formatter() <decorators.formatter>` syntax
+ * :ref:`regex() <decorators.regex>` syntax
+ * Back to |new_manual.mkdir.chapter_num|: :ref:`Preparing directories for output with @mkdir() <new_manual.mkdir>`
+
+****************************************************************************************************************
+Code for :ref:`formatter() <decorators.formatter>` Zoo example
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}") # new_directory
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
+
+ pipeline_run(verbose=0)
+
+
+****************************************************************************************************************
+Code for :ref:`regex() <decorators.regex>` Zoo example
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+ r"\g<clade>") # new_directory
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ r"\1\g<clade>/\g<tame>.\2.food", # Replacement
+
+ r"\1\g<clade>", # new_directory
+ r"\2", # animal_name
+ "\g<tame>") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
+
+ pipeline_run(verbose=0)
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/multiprocessing.txt b/doc/_build/html/_sources/tutorials/new_tutorial/multiprocessing.txt
new file mode 100644
index 0000000..aaf77ed
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/multiprocessing.txt
@@ -0,0 +1,293 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: multiprocessing; Tutorial
+
+.. _new_manual.multiprocessing:
+
+####################################################################################################################################################
+|new_manual.multiprocessing.chapter_num|: Multiprocessing, ``drmaa`` and Computation Clusters
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax
+ * :ref:`pipeline_run() <pipeline_functions.pipeline_run>` syntax
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.multiprocessing.code`
+
+***********************
+Overview
+***********************
+
+.. index::
+ pair: pipeline_run(multiprocess); Tutorial
+
+=====================
+Multi Processing
+=====================
+
+ *Ruffus* uses python `multiprocessing <http://docs.python.org/library/multiprocessing.html>`_ to run
+ each job in a separate process.
+
+ This means that jobs do *not* necessarily complete in the order of the defined parameters.
+ Task hierachies are, of course, inviolate: upstream tasks run before downstream, dependent tasks.
+
+ Tasks that are independent (i.e. do not precede each other) may be run in parallel as well.
+
+ The number of concurrent jobs can be set in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:
+
+ ::
+
+ pipeline_run([parallel_task], multiprocess = 5)
+
+
+ If ``multiprocess`` is set to 1, then jobs will be run on a single process.
+
+
+
+.. index::
+ pair: data sharing across processes; Tutorial
+
+=====================
+Data sharing
+=====================
+
+ Running jobs in separate processes allows *Ruffus* to make full use of the multiple
+ processors in modern computers. However, some `multiprocessing guidelines <http://docs.python.org/library/multiprocessing.html#multiprocessing-programming>`_
+ should be borne in mind when writing *Ruffus* pipelines. In particular:
+
+ * Try not to pass large amounts of data between jobs, or at least be aware that this has to be marshalled
+ across process boundaries.
+
+ * Only data which can be `pickled <http://docs.python.org/library/pickle.html>`_ can be passed as
+ parameters to *Ruffus* task functions. Happily, that applies to almost any native Python data type.
+ The use of the rare, unpicklable object will cause python to complain (fail) loudly when *Ruffus* pipelines
+ are run.
+
+
+
+.. index::
+ pair: @jobs_limit; Tutorial
+
+.. _new_manual.jobs_limit:
+
+
+********************************************************************************************
+Restricting parallelism with :ref:`@jobs_limit <decorators.jobs_limit>`
+********************************************************************************************
+
+ Calling :ref:`pipeline_run(multiprocess = NNN)<pipeline_functions.pipeline_run>` allows
+ multiple jobs (from multiple independent tasks) to be run in parallel. However, there
+ are some operations that consume so many resources that we might want them to run
+ with less or no concurrency.
+
+ For example, we might want to download some files via FTP but the server restricts
+ requests from each IP address. Even if the rest of the pipeline is running 100 jobs in
+ parallel, the FTP downloading must be restricted to 2 files at a time. We would really
+ like to keep the pipeline running as is, but let this one operation run either serially,
+ or with little concurrency.
+
+
+ * :ref:`pipeline_run(multiprocess = NNN)<pipeline_functions.pipeline_run>` sets the pipeline-wide concurrency but
+ * :ref:`@jobs_limit(MMM)<decorators.jobs_limit>` sets concurrency at ``MMM`` only for jobs in the decorated task.
+
+ The optional name (e.g. ``@jobs_limit(3, "ftp_download_limit")``) allows the same limit to
+ be shared across multiple tasks. To be pedantic: a limit of ``3`` jobs at a time would be applied
+ across all tasks which have a ``@jobs_limit`` named ``"ftp_download_limit"``.
+
+ The :ref:`example code<new_manual.multiprocessing.code>` uses up to 10 processes across the
+ pipeline, but runs the ``stage1_big`` and ``stage1_small`` tasks 3 at a time (shared across
+ both tasks). ``stage2`` jobs run 5 at a time.
+
+
+
+.. _new_manual.ruffus.drmaa_wrapper.run_job:
+
+********************************************************************************************
+Using ``drmaa`` to dispatch work to Computational Clusters or Grid engines from Ruffus jobs
+********************************************************************************************
+
+ Ruffus has been widely used to manage work on computational clusters or grid engines. Though Ruffus
+ task functions cannot (yet!) run natively and transparently on remote cluster nodes, it is trivial
+ to dispatch work across the cluster.
+
+ From version 2.4 onwards, Ruffus includes an optional helper module which interacts with
+ `python bindings <https://github.com/drmaa-python/drmaa-python>`__ for the widely used `drmaa <http://en.wikipedia.org/wiki/DRMAA>`__
+ Open Grid Forum API specification. This allows jobs to dispatch work to a computational cluster and wait until it completes.
+
+
+ Here are the necessary steps
+
+==============================================================================
+1) Use a shared drmaa session:
+==============================================================================
+
+ Before your pipeline runs:
+
+ .. code-block:: python
+
+ #
+ # start shared drmaa session for all jobs / tasks in pipeline
+ #
+ import drmaa
+ drmaa_session = drmaa.Session()
+ drmaa_session.initialize()
+
+
+ Cleanup after your pipeline completes:
+
+ .. code-block:: python
+
+ #
+ # pipeline functions go here
+ #
+ if __name__ == '__main__':
+ drmaa_session.exit()
+
+
+==============================================================================
+2) import ``ruffus.drmaa_wrapper``
+==============================================================================
+
+ * The optional ``ruffus.drmaa_wrapper`` module needs to be imported explicitly:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+ # imported ruffus.drmaa_wrapper explicitly
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+
+
+==============================================================================
+3) call :ref:`drmaa_wrapper.run_job()<drmaa_wrapper.run_job>`
+==============================================================================
+
+ :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` dispatches the work to a cluster node within a normal Ruffus job and waits for completion
+
+ This is the equivalent of `os.system <http://docs.python.org/2/library/os.html#os.system>`__ or
+ `subprocess.check_output <http://docs.python.org/2/library/subprocess.html#subprocess.check_call>`__ but the code will run remotely as specified:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+ # ruffus.drmaa_wrapper.run_job
+ stdout_res, stderr_res = run_job(cmd_str = "touch " + output_file,
+ job_name = job_name,
+ logger = logger,
+ drmaa_session = drmaa_session,
+ run_locally = options.local_run,
+ job_other_options = job_other_options)
+
+ The complete code is available :ref:`here <using_ruffus.drmaa_wrapper>`
+
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` is a convenience wrapper around the `python drmaa bindings <https://github.com/drmaa-python/drmaa-python>`__
+ `RunJob <http://drmaa-python.readthedocs.org/en/latest/tutorials.html#waiting-for-a-job>`__ function.
+ It takes care of writing drmaa *job templates* for you.
+ * Each call creates a separate drmaa *job template*.
+
+==================================================================================================
+4) Use multithread: :ref:`pipeline_run(multithread = NNN) <pipeline_functions.pipeline_run>`
+==================================================================================================
+
+ .. warning ::
+
+ :ref:`drmaa_wrapper.run_job()<drmaa_wrapper.run_job>`
+
+ **requires** ``pipeline_run`` :ref:`(multithread = NNN)<pipeline_functions.pipeline_run>`
+
+ **and will not work with** ``pipeline_run`` :ref:`(multiprocess = NNN)<pipeline_functions.pipeline_run>`
+
+
+ Using multithreading rather than multiprocessing
+ * allows the drmaa session to be shared
+ * prevents "processing storms" which lock up the queue submission node when hundreds or thousands of grid engine / cluster commands complete at the same time.
+
+ .. code-block:: python
+
+ pipeline_run (..., multithread = NNN, ...)
+
+ or if you are using ruffus.cmdline:
+
+ .. code-block:: python
+
+ cmdline.run (options, multithread = options.jobs)
+
+
+ Normally multithreading reduces the amount of parallelism in python due to the python `Global interpreter Lock (GIL) <http://en.wikipedia.org/wiki/Global_Interpreter_Lock>`__.
+ However, as the work load is almost entirely on another computer (i.e. a cluster / grid engine node) with a separate python interpreter, any cost benefit calculations of this sort are moot.
+
+==================================================================================================
+5) Develop locally
+==================================================================================================
+
+ :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` provides two convenience parameters for developing grid engine pipelines:
+
+ * commands can run locally, i.e. on the local machine rather than on cluster nodes:
+
+ .. code-block:: python
+
+ run_job(cmd_str, run_locally = True)
+
+ * Output files can be `touch <http://en.wikipedia.org/wiki/Touch_(Unix)>`__\ed, i.e. given the appearance of the work having being done without actually running the commands
+
+ .. code-block:: python
+
+ run_job(cmd_str, touch_only = True)
+
+
+.. index::
+ pair: pipeline_run touch mode; Tutorial
+ pair: touch mode pipeline_run; Tutorial
+
+.. _new_manual.pipeline_run_touch:
+
+
+********************************************************************************************
+Forcing a pipeline to appear up to date
+********************************************************************************************
+
+ Sometimes, we *know* that a pipeline has run to completion, that everything is up-to-date. However, Ruffus still insists on the basis
+ of file modification times that you need to rerun.
+
+ For example, sometimes a trivial accounting modification needs to be made to a data file.
+ Even though you know that this changes nothing in practice, Ruffus will detect the modification and
+ ask to rerun everything from that point forwards.
+
+ One way to convince Ruffus that everything is fine is to manually `touch <http://en.wikipedia.org/wiki/Touch_(Unix)>`__
+ all subsequent data files one by one in sequence so that the file timestamps follow the appropriate progression.
+
+ You can also ask *Ruffus* to do this automatically for you by running the pipeline in `touch <http://en.wikipedia.org/wiki/Touch_(Unix)>`__
+ mode:
+
+ .. code-block:: python
+
+ pipeline_run( touch_files_only = True)
+
+
+ :ref:`pipeline_run <pipeline_functions.pipeline_run>` will run your pipeline script normally working backwards from any specified final target, or else the
+ last task in the pipeline. It works out where it should begin running, i.e. with the first out-of-date data files.
+ After that point, instead of calling your pipeline task functions, each missing or out-of-date file is
+ `touch-ed <http://en.wikipedia.org/wiki/Touch_(Unix)>`__ in turn so that the file modification dates
+ follow on successively.
+
+
+ This turns out to be useful way to check that your pipeline runs correctly by creating a series of dummy (empty files).
+ However, *Ruffus* does not know how to read your mind to know which files to create from :ref:`@split <decorators.split>` or
+ :ref:`@subdivide <decorators.subdivide>` tasks.
+
+
+ Using :ref:`ruffus.cmdline <new_manual.cmdline>` from version 2.4, you can just specify:
+
+ .. code-block:: bash
+
+ your script --touch_files_only [--other_options_of_your_own_etc]
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/multiprocessing_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/multiprocessing_code.txt
new file mode 100644
index 0000000..6766bd5
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/multiprocessing_code.txt
@@ -0,0 +1,183 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.multiprocessing.code:
+
+################################################################################################################################################################
+|new_manual.multiprocessing.chapter_num|: Python Code for Multiprocessing, ``drmaa`` and Computation Clusters
+################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax
+ * :ref:`pipeline_run() <pipeline_functions.pipeline_run>` syntax
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` syntax
+ * Back to |new_manual.multiprocessing.chapter_num|: :ref:`Multiprocessing, drmaa and Computation Clusters <new_manual.multiprocessing>`
+
+************************************************************************************
+:ref:`@jobs_limit <decorators.jobs_limit>`
+************************************************************************************
+
+ * First 2 tasks are constrained to a parallelism of 3 shared jobs at a time
+ * Final task is constrained to a parallelism of 5 jobs at a time
+ * The entire pipeline is constrained to a (theoretical) parallelism of 10 jobs at a time
+
+ .. code-block:: python
+ :emphasize-lines: 12,17,22
+
+ from ruffus import *
+ import time
+
+ # make list of 10 files
+ @split(None, "*stage1")
+ def make_files(input_files, output_files):
+ for i in range(10):
+ if i < 5:
+ open("%d.small_stage1" % i, "w")
+ else:
+ open("%d.big_stage1" % i, "w")
+
+ @jobs_limit(3, "ftp_download_limit")
+ @transform(make_files, suffix(".small_stage1"), ".stage2")
+ def stage1_small(input_file, output_file):
+ print "FTP downloading %s ->Start" % input_file
+ time.sleep(2)
+ open(output_file, "w")
+ print "FTP downloading %s ->Finished" % input_file
+
+ @jobs_limit(3, "ftp_download_limit")
+ @transform(make_files, suffix(".big_stage1"), ".stage2")
+ def stage1_big(input_file, output_file):
+ print "FTP downloading %s ->Start" % input_file
+ time.sleep(2)
+ open(output_file, "w")
+ print "FTP downloading %s ->Finished" % input_file
+
+ @jobs_limit(5)
+ @transform([stage1_small, stage1_big], suffix(".stage2"), ".stage3")
+ def stage2(input_file, output_file):
+ print "Processing stage2 %s ->Start" % input_file
+ time.sleep(2)
+ open(output_file, "w")
+ print "Processing stage2 %s ->Finished" % input_file
+
+ pipeline_run(multiprocess = 10, verbose = 0)
+
+
+ Giving:
+
+ .. code-block:: pycon
+ :emphasize-lines: 3,25
+
+ >>> pipeline_run(multiprocess = 10, verbose = 0)
+
+ >>> # 3 jobs at a time, interleaved
+ FTP downloading 5.big_stage1 ->Start
+ FTP downloading 6.big_stage1 ->Start
+ FTP downloading 7.big_stage1 ->Start
+ FTP downloading 5.big_stage1 ->Finished
+ FTP downloading 8.big_stage1 ->Start
+ FTP downloading 6.big_stage1 ->Finished
+ FTP downloading 9.big_stage1 ->Start
+ FTP downloading 7.big_stage1 ->Finished
+ FTP downloading 0.small_stage1 ->Start
+ FTP downloading 8.big_stage1 ->Finished
+ FTP downloading 1.small_stage1 ->Start
+ FTP downloading 9.big_stage1 ->Finished
+ FTP downloading 2.small_stage1 ->Start
+ FTP downloading 0.small_stage1 ->Finished
+ FTP downloading 3.small_stage1 ->Start
+ FTP downloading 1.small_stage1 ->Finished
+ FTP downloading 4.small_stage1 ->Start
+ FTP downloading 2.small_stage1 ->Finished
+ FTP downloading 3.small_stage1 ->Finished
+ FTP downloading 4.small_stage1 ->Finished
+
+ >>> # 5 jobs at a time, interleaved
+ Processing stage2 0.stage2 ->Start
+ Processing stage2 1.stage2 ->Start
+ Processing stage2 2.stage2 ->Start
+ Processing stage2 3.stage2 ->Start
+ Processing stage2 4.stage2 ->Start
+ Processing stage2 0.stage2 ->Finished
+ Processing stage2 5.stage2 ->Start
+ Processing stage2 1.stage2 ->Finished
+ Processing stage2 6.stage2 ->Start
+ Processing stage2 2.stage2 ->Finished
+ Processing stage2 4.stage2 ->Finished
+ Processing stage2 7.stage2 ->Start
+ Processing stage2 8.stage2 ->Start
+ Processing stage2 3.stage2 ->Finished
+ Processing stage2 9.stage2 ->Start
+ Processing stage2 5.stage2 ->Finished
+ Processing stage2 7.stage2 ->Finished
+ Processing stage2 6.stage2 ->Finished
+ Processing stage2 8.stage2 ->Finished
+ Processing stage2 9.stage2 ->Finished
+
+.. _using_ruffus.drmaa_wrapper:
+
+************************************************************************************
+Using ``ruffus.drmaa_wrapper``
+************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 17,31,53
+
+ #!/usr/bin/python
+ job_queue_name = "YOUR_QUEUE_NAME_GOES_HERE"
+ job_other_options = "-P YOUR_PROJECT_NAME_GOES_HERE"
+
+ from ruffus import *
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+
+ parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')
+
+ options = parser.parse_args()
+
+ # logger which can be passed to multiprocessing ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+ #
+ # start shared drmaa session for all jobs / tasks in pipeline
+ #
+ import drmaa
+ drmaa_session = drmaa.Session()
+ drmaa_session.initialize()
+
+ @originate(["1.chromosome", "X.chromosome"],
+ logger, logger_mutex)
+ def create_test_files(output_file):
+ try:
+ stdout_res, stderr_res = "",""
+ job_queue_name, job_other_options = get_queue_options()
+
+ #
+ # ruffus.drmaa_wrapper.run_job
+ #
+ stdout_res, stderr_res = run_job(cmd_str = "touch " + output_file,
+ job_name = job_name,
+ logger = logger,
+ drmaa_session = drmaa_session,
+ run_locally = options.local_run,
+ job_queue_name = job_queue_name,
+ job_other_options = job_other_options)
+
+ # relay all the stdout, stderr, drmaa output to diagnose failures
+ except error_drmaa_job as err:
+ raise Exception("\n".join(map(str,
+ "Failed to run:"
+ cmd,
+ err,
+ stdout_res,
+ stderr_res)))
+
+
+ if __name__ == '__main__':
+ cmdline.run (options, multithread = options.jobs)
+ # cleanup drmaa
+ drmaa_session.exit()
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/onthefly.txt b/doc/_build/html/_sources/tutorials/new_tutorial/onthefly.txt
new file mode 100644
index 0000000..221b050
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/onthefly.txt
@@ -0,0 +1,192 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: on_the_fly; Tutorial
+
+.. _new_manual.on_the_fly:
+
+####################################################################################################################################################
+|new_manual.on_the_fly.chapter_num|: Esoteric: Generating parameters on the fly with :ref:`@files<decorators.files_on_the_fly>`
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@files on-the-fly syntax in detail <decorators.files_on_the_fly>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.on_the_fly.code`
+
+
+***********************
+Overview
+***********************
+
+ The different *Ruffus* :ref:`decorators <decorators>` connect up different tasks and
+ generate *Output* (file names) from your *Input* in all sorts of different ways.
+
+ However, sometimes, none of them *quite* do exactly what you need. And it becomes
+ necessary to generate your own *Input* and *Output* parameters on the fly.
+
+ Although this additional flexibility comes at the cost of a lot of extra inconvenient
+ code, you can continue to leverage the rest of *Ruffus* functionality such as
+ checking whether files are up to date or not.
+
+.. index::
+ pair: @files; Tutorial on-the-fly parameter generation
+
+
+*********************************************************************
+:ref:`@files <decorators.files_on_the_fly>` syntax
+*********************************************************************
+ To generate parameters on the fly, use the :ref:`@files <decorators.files_on_the_fly>`
+ with a :term:`generator` function which yields one list / tuple of parameters per job.
+
+ For example:
+
+ .. code-block:: python
+ :emphasize-lines: 3,16
+
+ from ruffus import *
+
+ # generator function
+ def generate_parameters_on_the_fly():
+ """
+ returns one list of parameters per job
+ """
+ parameters = [
+ ['A.input', 'A.output', (1, 2)], # 1st job
+ ['B.input', 'B.output', (3, 4)], # 2nd job
+ ['C.input', 'C.output', (5, 6)], # 3rd job
+ ]
+ for job_parameters in parameters:
+ yield job_parameters
+
+ # tell ruffus that parameters should be generated on the fly
+ @files(generate_parameters_on_the_fly)
+ def pipeline_task(input, output, extra):
+ open(output, "w").write(open(input).read())
+ sys.stderr.write("%d + %d => %d\n" % (extra[0] , extra[1], extra[0] + extra[1]))
+
+ pipeline_run()
+
+
+ Produces:
+
+ .. code-block:: pycon
+
+ Task = parallel_task
+ 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+
+
+ .. note::
+
+ Be aware that the parameter generating function may be invoked
+ :ref:`more than once<new_manual.dependencies.checking_multiple_times>`:
+ * The first time to check if this part of the pipeline is up-to-date.
+ * The second time when the pipeline task function is run.
+
+ The resulting custom *inputs*, *outputs* parameters per job are
+ treated normally for the purposes of checking to see if jobs are up-to-date and
+ need to be re-run.
+
+
+**********************************************
+ A Cartesian Product, all vs all example
+**********************************************
+
+ The :ref:`accompanying example<new_manual.on_the_fly.code>` provides a more realistic reason why
+ you would want to generate parameters on the fly. It is a fun piece of code, which generates
+ N x M combinations from two sets of files as the *inputs* of a pipeline stage.
+
+ The *inputs* / *outputs* filenames are generated as a pair of nested for-loops to produce
+ the N (outside loop) x M (inside loop) combinations, with the appropriate parameters
+ for each job ``yield``\ed per iteration of the inner loop. The gist of this is:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ #_________________________________________________________________________________________
+ #
+ # Generator function
+ #
+ # N x M jobs
+ #_________________________________________________________________________________________
+ def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+ for sim_file in get_simulation_files():
+ for (gene, gwas) in get_gene_gwas_file_pairs():
+ result_file = "%s.%s.results" % (gene, sim_file)
+ yield (gene, gwas, sim_file), result_file
+
+
+
+ @files(generate_simulation_params)
+ def gwas_simulation(input_files, output_file):
+ "..."
+
+ If ``get_gene_gwas_file_pairs()`` produces:
+ ::
+
+ ['a.sim', 'b.sim', 'c.sim']
+
+ and ``get_gene_gwas_file_pairs()`` produces:
+ ::
+
+ [('1.gene', '1.gwas'), ('2.gene', '2.gwas')]
+
+ then we would end up with ``3`` x ``2`` = ``6`` jobs and the following equivalent function calls:
+
+ ::
+
+ gwas_simulation(('1.gene', '1.gwas', 'a.sim'), "1.gene.a.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'a.sim'), "2.gene.a.sim.results")
+ gwas_simulation(('1.gene', '1.gwas', 'b.sim'), "1.gene.b.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'b.sim'), "2.gene.b.sim.results")
+ gwas_simulation(('1.gene', '1.gwas', 'c.sim'), "1.gene.c.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'c.sim'), "2.gene.c.sim.results")
+
+
+ The :ref:`accompanying code<new_manual.on_the_fly.code>` looks slightly more complicated because
+ of some extra bookkeeping.
+
+
+
+ You can compare this approach with the alternative of using :ref:`@product <decorators.product>`:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ #_________________________________________________________________________________________
+ #
+ # N x M jobs
+ #_________________________________________________________________________________________
+ @product( os.path.join(simulation_data_dir, "*.simulation"),
+ formatter(),
+
+ os.path.join(gene_data_dir, "*.gene"),
+ formatter(),
+
+ # add gwas as an input: looks like *.gene but with a differnt extension
+ add_inputs("{path[1][0]/{basename[1][0]}.gwas")
+
+ "{basename[0][0]}.{basename[1][0]}.results") # output file
+ def gwas_simulation(input_files, output_file):
+ "..."
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/onthefly_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/onthefly_code.txt
new file mode 100644
index 0000000..2ddc060
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/onthefly_code.txt
@@ -0,0 +1,328 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.on_the_fly.code:
+
+############################################################################################################################################################################################################
+|new_manual.on_the_fly.chapter_num|: Esoteric: Python Code for Generating parameters on the fly with :ref:`@files<decorators.files_on_the_fly>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@files on-the-fly syntax in detail <decorators.files_on_the_fly>`
+ * Back to |new_manual.on_the_fly.chapter_num|: :ref:`Generating parameters on the fly <new_manual.on_the_fly>`
+
+************************************
+Introduction
+************************************
+
+ | This script takes N pairs of input file pairs (with the suffices .gene and .gwas)
+ | and runs them against M sets of simulation data (with the suffix .simulation)
+ | A summary per input file pair is then produced
+
+
+ In pseudo-code:
+
+ STEP_1:
+
+ ::
+
+ for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res
+
+
+ STEP_2:
+
+ ::
+
+ for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean
+
+
+ | n = CNT_GENE_GWAS_FILES
+ | m = CNT_SIMULATION_FILES
+
+************************************
+Code
+************************************
+ ::
+
+ from ruffus import *
+ import os
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # constants
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ working_dir = "temp_NxM"
+ simulation_data_dir = os.path.join(working_dir, "simulation")
+ gene_data_dir = os.path.join(working_dir, "gene")
+ CNT_GENE_GWAS_FILES = 2
+ CNT_SIMULATION_FILES = 3
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ import os, sys
+ from itertools import izip
+ import glob
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Functions
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ #_________________________________________________________________________________________
+ #
+ # get gene gwas file pairs
+ #
+ #_________________________________________________________________________________________
+ def get_gene_gwas_file_pairs( ):
+ """
+ Helper function to get all *.gene, *.gwas from the direction specified
+ in --gene_data_dir
+
+ Returns
+ file pairs with both .gene and .gwas extensions,
+ corresponding roots (no extension) of each file
+ """
+ gene_files = glob.glob(os.path.join(gene_data_dir, "*.gene"))
+ gwas_files = glob.glob(os.path.join(gene_data_dir, "*.gwas"))
+ #
+ common_roots = set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gene_files))
+ common_roots &=set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gwas_files))
+ common_roots = list(common_roots)
+ #
+ p = os.path; g_dir = gene_data_dir
+ file_pairs = [[p.join(g_dir, x + ".gene"), p.join(g_dir, x + ".gwas")] for x in common_roots]
+ return file_pairs, common_roots
+
+ #_________________________________________________________________________________________
+ #
+ # get simulation files
+ #
+ #_________________________________________________________________________________________
+ def get_simulation_files( ):
+ """
+ Helper function to get all *.simulation from the direction specified
+ in --simulation_data_dir
+ Returns
+ file with .simulation extensions,
+ corresponding roots (no extension) of each file
+ """
+ simulation_files = glob.glob(os.path.join(simulation_data_dir, "*.simulation"))
+ simulation_roots =map(lambda x: os.path.splitext(os.path.split(x)[1])[0], simulation_files)
+ return simulation_files, simulation_roots
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Main logic
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+
+
+ #_________________________________________________________________________________________
+ #
+ # setup_simulation_data
+ #
+ #_________________________________________________________________________________________
+
+ #
+ # mkdir: makes sure output directories exist before task
+ #
+ @follows(mkdir(gene_data_dir, simulation_data_dir))
+ def setup_simulation_data ():
+ """
+ create simulation files
+ """
+ for i in range(CNT_GENE_GWAS_FILES):
+ open(os.path.join(gene_data_dir, "%03d.gene" % i), "w")
+ open(os.path.join(gene_data_dir, "%03d.gwas" % i), "w")
+ #
+ # gene files without corresponding gwas and vice versa
+ open(os.path.join(gene_data_dir, "orphan1.gene"), "w")
+ open(os.path.join(gene_data_dir, "orphan2.gwas"), "w")
+ open(os.path.join(gene_data_dir, "orphan3.gwas"), "w")
+ #
+ for i in range(CNT_SIMULATION_FILES):
+ open(os.path.join(simulation_data_dir, "%03d.simulation" % i), "w")
+
+
+
+
+ #_________________________________________________________________________________________
+ #
+ # cleanup_simulation_data
+ #
+ #_________________________________________________________________________________________
+ def try_rmdir (d):
+ if os.path.exists(d):
+ try:
+ os.rmdir(d)
+ except OSError:
+ sys.stderr.write("Warning:\t%s is not empty and will not be removed.\n" % d)
+
+
+
+ def cleanup_simulation_data ():
+ """
+ cleanup files
+ """
+ sys.stderr.write("Cleanup working directory and simulation files.\n")
+ #
+ # cleanup gene and gwas files
+ #
+ for f in glob.glob(os.path.join(gene_data_dir, "*.gene")):
+ os.unlink(f)
+ for f in glob.glob(os.path.join(gene_data_dir, "*.gwas")):
+ os.unlink(f)
+ try_rmdir(gene_data_dir)
+ #
+ # cleanup simulation
+ #
+ for f in glob.glob(os.path.join(simulation_data_dir, "*.simulation")):
+ os.unlink(f)
+ try_rmdir(simulation_data_dir)
+ #
+ # cleanup working_dir
+ #
+ for f in glob.glob(os.path.join(working_dir, "simulation_results", "*.simulation_res")):
+ os.unlink(f)
+ try_rmdir(os.path.join(working_dir, "simulation_results"))
+ #
+ for f in glob.glob(os.path.join(working_dir, "*.mean")):
+ os.unlink(f)
+ try_rmdir(working_dir)
+
+
+ #_________________________________________________________________________________________
+ #
+ # Step 1:
+ #
+ # for n_file in NNN_pairs_of_input_files:
+ # for m_file in MMM_simulation_data:
+ #
+ # [n_file.gene,
+ # n_file.gwas,
+ # m_file.simulation] -> working_dir/n_file.m_file.simulation_res
+ #
+ #_________________________________________________________________________________________
+ def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+ simulation_files, simulation_file_roots = get_simulation_files()
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+ #
+ for sim_file, sim_file_root in izip(simulation_files, simulation_file_roots):
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+ #
+ result_file = "%s.%s.simulation_res" % (gene_file_root, sim_file_root)
+ result_file_path = os.path.join(working_dir, "simulation_results", result_file)
+ #
+ yield [gene, gwas, sim_file], result_file_path, gene_file_root, sim_file_root, result_file
+
+
+
+ #
+ # mkdir: makes sure output directories exist before task
+ #
+ @follows(mkdir(working_dir, os.path.join(working_dir, "simulation_results")))
+ @files(generate_simulation_params)
+ def gwas_simulation(input_files, result_file_path, gene_file_root, sim_file_root, result_file):
+ """
+ Dummy calculation of gene gwas vs simulation data
+ Normally runs in parallel on a computational cluster
+ """
+ (gene_file,
+ gwas_file,
+ simulation_data_file) = input_files
+ #
+ simulation_res_file = open(result_file_path, "w")
+ simulation_res_file.write("%s + %s -> %s\n" % (gene_file_root, sim_file_root, result_file))
+
+
+ #_________________________________________________________________________________________
+ #
+ # Step 2:
+ #
+ # Statistical summary per gene/gwas file pair
+ #
+ # for n_file in NNN_pairs_of_input_files:
+ # working_dir/simulation_results/n.*.simulation_res
+ # -> working_dir/n.mean
+ #
+ #_________________________________________________________________________________________
+
+
+ @collate(gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")
+ @posttask(lambda : sys.stdout.write("\nOK\n"))
+ def statistical_summary (result_files, summary_file):
+ """
+ Simulate statistical summary
+ """
+ summary_file = open(summary_file, "w")
+ for f in result_files:
+ summary_file.write(open(f).read())
+
+
+
+ pipeline_run([setup_simulation_data], multiprocess = 5, verbose = 2)
+ pipeline_run([statistical_summary], multiprocess = 5, verbose = 2)
+
+ # uncomment to printout flowchar
+ #
+ # pipeline_printout(sys.stdout, [statistical_summary], verbose=2)
+ # graph_printout ("flowchart.jpg", "jpg", [statistical_summary])
+ #
+
+ cleanup_simulation_data ()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([setup_simulation_data], multiprocess = 5, verbose = 2)
+ Make directories [temp_NxM/gene, temp_NxM/simulation] completed
+ Completed Task = setup_simulation_data_mkdir_1
+ Job completed
+ Completed Task = setup_simulation_data
+
+
+ >>> pipeline_run([statistical_summary], multiprocess = 5, verbose = 2)
+ Make directories [temp_NxM, temp_NxM/simulation_results] completed
+ Completed Task = gwas_simulation_mkdir_1
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/001.000.simulation_res, 001, 000, 001.000.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/000.000.simulation_res, 000, 000, 000.000.simulation_res] completed
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/001.001.simulation_res, 001, 001, 001.001.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/000.001.simulation_res, 000, 001, 000.001.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/000.002.simulation_res, 000, 002, 000.002.simulation_res] completed
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/001.002.simulation_res, 001, 002, 001.002.simulation_res] completed
+ Completed Task = gwas_simulation
+ Job = [[temp_NxM/simulation_results/000.000.simulation_res, temp_NxM/simulation_results/000.001.simulation_res, temp_NxM/simulation_results/000.002.simulation_res] -> temp_NxM/000.mean] completed
+ Job = [[temp_NxM/simulation_results/001.000.simulation_res, temp_NxM/simulation_results/001.001.simulation_res, temp_NxM/simulation_results/001.002.simulation_res] -> temp_NxM/001.mean] completed
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/originate.txt b/doc/_build/html/_sources/tutorials/new_tutorial/originate.txt
new file mode 100644
index 0000000..0c7f0fe
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/originate.txt
@@ -0,0 +1,92 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+
+.. index::
+ pair: originate; Tutorial
+
+.. _new_manual.originate:
+
+######################################################################################################
+|new_manual.originate.chapter_num|: Creating files with ``@originate``
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@originate syntax in detail <decorators.originate>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.originate.code`
+
+********************************************************************************************
+Simplifying our example with :ref:`@originate <decorators.originate>`
+********************************************************************************************
+
+ Our previous pipeline example started off with a set of files which we had to create first.
+
+ This is a common task: pipelines have to start *somewhere*.
+
+ Ideally, though, we would only want to create these starting files if they didn't already exist. In other words, we want a sort of ``@transform`` which makes files from nothing (``None``?).
+
+ This is exactly what :ref:`@originate <decorators.originate>` helps you to do.
+
+ Rewriting our pipeline with :ref:`@originate <decorators.originate>` gives the following three steps:
+
+
+ .. code-block:: python
+ :emphasize-lines: 6
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+
+ ::
+
+ Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+ Completed Task = create_initial_file_pairs
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+ Completed Task = first_task
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+ Completed Task = second_task
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/originate_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/originate_code.txt
new file mode 100644
index 0000000..215e47a
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/originate_code.txt
@@ -0,0 +1,71 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.originate.code:
+
+######################################################################################################
+|new_manual.originate.chapter_num|: Python Code for Creating files with ``@originate``
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.originate.chapter_num|: :ref:`@originate <new_manual.originate>`
+
+**********************************************
+Using ``@originate``
+**********************************************
+ ::
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+************************************
+Resulting Output
+************************************
+
+ ::
+
+ Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+ Completed Task = create_initial_file_pairs
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+ Completed Task = first_task
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+ Completed Task = second_task
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/output_file_names.txt b/doc/_build/html/_sources/tutorials/new_tutorial/output_file_names.txt
new file mode 100644
index 0000000..5523794
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/output_file_names.txt
@@ -0,0 +1,560 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: formatter; Tutorial
+ pair: suffix; Tutorial
+ pair: regex; Tutorial
+ pair: output file names; Tutorial
+
+.. _new_manual.output_file_names:
+
+############################################################################################################################################################################################################
+|new_manual.output_file_names.chapter_num|: Specifying output file names with :ref:`formatter() <decorators.formatter>` and :ref:`regex() <decorators.regex>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`suffix() <decorators.suffix>` syntax
+ * :ref:`formatter() <decorators.formatter>` syntax
+ * :ref:`regex() <decorators.regex>` syntax
+
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.output_file_names.code`
+
+
+
+***************************************
+Review
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ The most straightforward way to use Ruffus is to hold the intermediate results after each stage
+ in a series of files with related file names.
+
+ Part of telling Ruffus how these pipeline stages or :term:`task` functions are connected
+ together is to write simple rules for how to the file names for each stage follow on from each other.
+ Ruffus helps you to specify these file naming rules.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **Write down the file names of the data as it flows across your pipeline.**
+ Do these file names follow a *pattern* ?
+ * **Write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+.. _new_manual.suffix:
+
+**********************************************************************************************************************************************
+A different file name :ref:`suffix() <decorators.suffix>` for each pipeline stage
+**********************************************************************************************************************************************
+
+
+ The easiest and cleanest way to write Ruffus pipelines is to use a different suffix
+ for each stage of your pipeline.
+
+ We used this approach in :ref:`new_manual.introduction` and in :ref:`code <new_manual.transform_in_parallel.code>` from :ref:`new_manual.transform_in_parallel`:
+
+
+ .. code-block:: bash
+ :emphasize-lines: 1
+
+ #Task Name: File suffices
+ _________________________ ______________________
+ create_initial_file_pairs *.start
+ first_task *.output.1
+ second_task *.output.2
+
+
+ There is a long standing convention of using file suffices to denote file type: For example, a **"compile"** task might convert **source** files of type ``*.c`` to **object** files of type ``*.o``.
+
+ We can think of Ruffus tasks comprising :
+ * recipes in ``@transform(...)`` for transforming file names: changing ``.c`` to a ``.o`` (e.g. ``AA.c -> AA.o`` ``BB.c -> BB.o``)
+ * recipes in a task function ``def foo_bar()`` for transforming your data: from **source** ``.c`` to **object** ``.o``
+
+
+ Let us review the Ruffus syntax for doing this:
+
+ .. code-block:: bash
+ :emphasize-lines: 1,2,3
+
+ @transform( create_initial_file_pairs, # Input: Name of previous task(s)
+ suffix(".start"), # Matching suffix
+ ".output.1") # Replacement string
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #. **Input**:
+
+ The first parameter for ``@transform`` can be a mixture of one or more:
+ * previous tasks (e.g. ``create_initial_file_pairs``)
+ * file names (all python strings are treated as paths)
+ * glob specifications (e.g ``*.c``, ``/my/path/*.foo``)
+
+ Each element provides an input for the task. So if the previous task ``create_initial_file_pairs`` has five outputs, the next ``@transform`` task will accept
+ these as five separate inputs leading to five independent jobs.
+
+ #. :ref:`suffix() <decorators.suffix>`:
+
+ The second parameter ``suffix(".start")`` must match the end of the first string in each input.
+ For example, ``create_initial_file_pairs`` produces the list ``['job1.a.start', 'job1.b.start']``, then ``suffix(".start")`` must matches the first string, i.e. ``'job1.a.start'``.
+ If the input is nested structure, this would be iterated through recursively to find the first string.
+
+ .. note::
+
+ Inputs which do not match the suffix are discarded altogether.
+
+ #. **Replacement**:
+
+ The third parameter is the replacement for the suffix.
+ The pair of input strings in the step3 example produces the following output parameter
+
+ ::
+
+ input_parameters = ['job1.a.start', 'job1.b.start']
+ matching_input = 'job1.a.start'
+ output_parameter = 'job1.a.output.1'
+
+
+ When the pipeline is run, this results in the following equivalent call to ``first_task(...)``:
+
+ .. code-block:: python
+
+ first_task(['job1.a.start', 'job1.b.start'], 'job1.a.output.1'):
+
+ The replacement parameter can itself be a list or any arbitrary complicated structure:
+
+ .. code-block:: bash
+ :emphasize-lines: 1,2,3
+
+ @transform(create_initial_file_pairs, # Input
+ suffix(".a.start"), # Matching suffix
+ [".output.a.1", ".output.b.1", 45]) # Replacement list
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+ In which case, all the strings are used as replacements, other values are left untouched, and we obtain the following:
+
+ .. code-block:: bash
+ :emphasize-lines: 1,5,9
+
+ # job #1
+ input = ['job1.a.start', 'job1.b.start']
+ output = ['job1.output.a.1', 'job1.output.b.1', 45]
+
+ # job #2
+ input = ['job2.a.start', 'job2.b.start']
+ output = ['job2.output.a.1', 'job2.output.b.1', 45]
+
+ # job #3
+ input = ['job3.a.start', 'job3.b.start']
+ output = ['job3.output.a.1', 'job3.output.b.1', 45]
+
+
+ Note how task function is called with the value ``45`` *verbatim* because it is not a string.
+
+
+
+.. _new_manual.formatter:
+
+************************************************************************************************************************************************************
+ :ref:`formatter() <decorators.formatter>` manipulates pathnames and regular expression
+************************************************************************************************************************************************************
+
+ :ref:`suffix() <decorators.suffix>` replacement is the cleanest and easiest way to generate suitable output file names for each stage in a pipeline.
+ Often, however, we require more complicated manipulations to specify our file names.
+ For example,
+
+ * It is common to have to change directories from a *data* directory to a *working* directory as the first step of a pipeline.
+ * Data management can be simplified by separate files from each pipeline stage into their own directory.
+ * Information may have to be decoded from data file names, e.g. ``"experiment373.IBM.03March2002.txt"``
+
+
+ Though :ref:`formatter() <decorators.formatter>` is much more powerful, the principle and syntax are the same:
+ we take string elements from the **Input** and perform some replacements to generate the **Output** parameters.
+
+
+ :ref:`formatter() <decorators.formatter>`
+
+ * Allows easy manipulation of path subcomponents in the style of `os.path.split() <http://docs.python.org/2/library/os.path.html#os.path.split>`__, and `os.path.basename <http://docs.python.org/2/library/os.path.html#os.path.basename>`__
+ * Uses familiar python `string.format <http://docs.python.org/2/library/string.html#string-formatting>`__ syntax (See `string.format examples <http://docs.python.org/2/library/string.html#format-examples>`__. )
+ * Supports optional regular expression (`re <http://docs.python.org/2/library/re.html#re.MatchObject.group>`__) matches including named captures.
+ * Can refer to any file path (i.e. python string) in each input and is not limited like :ref:`suffix() <decorators.suffix>` to the first string.
+ * Can even refer to individual letters within a match
+
+
+========================
+Path name components
+========================
+
+ :ref:`formatter() <decorators.formatter>` breaks down each input pathname into path name components which can then be recombined in whichever way by the replacement string.
+
+ Given an example string of :
+
+ .. code-block:: python
+
+ input_string = "/directory/to/a/file.name.ext"
+ formatter()
+
+ the path components are:
+
+ * ``basename``: The `base name <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ *excluding* `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``"file.name"``
+ * ``ext`` : The `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``".ext"``
+ * ``path`` : The `dirname <http://docs.python.org/2/library/os.path.html#os.path.dirname>`__, ``"/directory/to/a"``
+ * ``subdir`` : A list of sub-directories in the ``path`` in reverse order, ``["a", "to", "directory", "/"]``
+ * ``subpath`` : A list of descending sub-paths in reverse order, ``["/directory/to/a", "/directory/to", "/directory", "/"]``
+
+
+ The replacement string refers to these components by using python `string.format <http://docs.python.org/2/library/string.html#string-formatting>`__ style curly braces. ``"{NAME}"``
+
+ We refer to an element from the Nth input string by index, for example:
+
+ * ``"{ext[0]}"`` is the extension of the first file name string in **Input**.
+ * ``"{basename[1]}"`` is the basename of the second file name in **Input**.
+ * ``"{basename[1][0:3]}"`` are the first three letters from the basename of the second file name in **Input**.
+
+
+ ``subdir``, ``subpath`` were designed to help you navigate directory hierachies with the minimum of fuss.
+ For example, you might want to graft a hierachical path to another location:
+ ``"{subpath[0][2]}/from/{subdir[0][0]}/{basename[0]}"`` neatly replaces just one directory (``"to"``) in the path with another (``"from"``):
+
+ .. code-block:: python
+
+ replacement_string = "{subpath[0][2]}/from/{subdir[0][0]}/{basename[0]}"
+
+ input_string = "/directory/to/a/file.name.ext"
+ result_string = "/directory/from/a/file.name.ext"
+
+
+.. _new_manual.formatter.regex:
+
+================================================
+Filter and parse using regular expressions
+================================================
+
+ `Regular expression <http://docs.python.org/2/library/re.html#re.MatchObject.group>`__ matches can be used with the similar syntax.
+ Our example string can be parsed using the following regular expression:
+
+ .. code-block:: python
+
+ input_string = "/directory/to/a/file.name.ext"
+ formatter(r"/directory/(.+)/(?P<MYFILENAME>)\.ext")
+
+ We capture part of the path using ``(.+)``, and the base name using ``(?P<MYFILENAME>)``.
+ These `matching subgroups <http://docs.python.org/2/library/re.html#re.MatchObject.group>`__ can be referred to by index
+ but for greater clarity the second named capture can also be referred to by name, i.e. ``{MYFILENAME}``.
+
+
+ The regular expression components for the first string can thus be referred to as follows:
+
+ * ``{0[0]}`` : The entire match captured by index, ``"/directory/to/a/file.name.ext"``
+ * ``{1[0]}`` : The first match captured by index, ``"to/a"``
+ * ``{2[0]}`` : The second match captured by index, ``"file.name"``
+ * ``{MYFILENAME[0]}`` : The match captured by name, ``"file.name"``
+
+
+ If each input consists of a list of paths such as ``['job1.a.start', 'job1.b.start', 'job1.c.start']``, we can match each of them separately
+ by using as many regular expressions as necessary. For example:
+
+ .. code-block:: python
+
+ input_string = ['job1.a.start', 'job1.b.start', 'job1.c.start']
+ # Regular expression matches for 1st, 2nd but not 3rd element
+ formatter(".+a.start", "b.start$")
+
+
+ Or if you only wanted regular expression matches for the second file name (string), pad with ``None``:
+
+ .. code-block:: python
+
+ input_string = ['job1.a.start', 'job1.b.start', 'job1.c.start']
+ # Regular expression matches for 2nd but not 1st or 3rd elements
+ formatter(None, "b.start$")
+
+================================================================================================
+Using :ref:`@transform() <decorators.transform>` with :ref:`formatter() <decorators.formatter>`
+================================================================================================
+
+ We can put these together in the following example:
+
+ .. code-block:: python
+ :emphasize-lines: 21,22
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1", 45])
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This produces:
+
+ .. code-block:: pycon
+
+ input_parameters = ['job1.a.start',
+ 'job1.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs1.output.a.1',
+ '/home/lg/src/temp/jobs1.output.b.1', 45]
+
+ input_parameters = ['job2.a.start',
+ 'job2.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs2.output.a.1',
+ '/home/lg/src/temp/jobs2.output.b.1', 45]
+
+
+
+ Notice that ``job3`` has ``'job3.c.start'`` as the second file.
+ This fails to match the regular expression and is discarded.
+
+ .. note::
+
+ Failed regular expression mismatches are ignored.
+
+ :ref:`formatter() <decorators.formatter>` regular expressions are thus very useful in filtering out all
+ files which do not match your specified criteria.
+
+ If your some of your task inputs have a mixture of different file types, a simple ``Formatter(".txt$")``, for example, will make
+ your code a lot simpler...
+
+
+
+================================================================================================
+string substitution for "extra" arguments
+================================================================================================
+
+ The first two arguments for Ruffus task functions are special because they are the **Input** and **Output**
+ parameters which link different stages of a pipeline.
+
+
+ Python strings in these arguments are names of data files whose modification times indicate whether the pipeline is up to date or not.
+
+ Other arguments to task functions are not passed down the pipeline but consumed.
+ Any python strings they contain do not need to be file names. These extra arguments are very useful
+ for passing data to pipelined tasks, such as shared values, loggers, programme options etc.
+
+ One helpful feature is that strings in these extra arguments are also subject to :ref:`formatter() <decorators.formatter>` string substitution.
+ This means you can leverage the parsing capabilities of Ruffus to decode any information about the pipeline data files,
+ These might include the directories you are running in and parts of the file name.
+
+ For example, if we would want to know which files go with which "job number" in the previous example:
+
+
+ .. code-block:: python
+ :emphasize-lines: 21,22
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # print job number as an extra argument
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"],
+
+ "{JOBNUMBER[0]}"
+ def first_task(input_files, output_parameters, job_number):
+ print job_number, ":", input_files
+
+
+ pipeline_run(verbose=0)
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ 1 : ['job1.a.start', 'job1.b.start']
+ 2 : ['job2.a.start', 'job2.b.start']
+
+
+
+.. _new_manual.output_file_names.formatter.zoo:
+
+================================================================================================
+Changing directories using :ref:`formatter() <decorators.formatter>` in a zoo...
+================================================================================================
+
+ Here is a more fun example. We would like to feed the denizens of a zoo. Unfortunately, the file names for
+ these are spread over several directories. Ideally, we would like their food supply to be grouped more
+ sensibly. And, of course, we only want to feed the animals, not the plants.
+
+ I have colour coded the input and output files for this task to show how we would like to rearrange them:
+
+ .. image:: ../../images/simple_tutorial_zoo_animals_formatter_example.jpg
+ :scale: 50
+
+ .. code-block:: python
+ :emphasize-lines: 7,22,26,27,28
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+ pipeline_run(verbose=0)
+
+
+ .. comment **
+
+ We can see that the food for each animal are now grouped by clade in the same directory, which makes a lot more sense...
+
+ Note how we used ``subpath[0][1]`` to move down one level of the file path to build a new file name.
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ Food for the wild crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles
+ Food for the tame dog = ./mammals/tame.dog.food will be placed in ./mammals
+ Food for the wild dog = ./mammals/wild.dog.food will be placed in ./mammals
+ Food for the handreared lion = ./mammals/handreared.lion.food will be placed in ./mammals
+ Food for the wild lion = ./mammals/wild.lion.food will be placed in ./mammals
+ Food for the wild tiger = ./mammals/wild.tiger.food will be placed in ./mammals
+
+
+.. _new_manual.regex:
+
+******************************************************************************
+ :ref:`regex() <decorators.regex>` manipulates via regular expressions
+******************************************************************************
+
+
+ If you are a hard core regular expressions fan, you may want to use :ref:`regex() <decorators.regex>` instead of :ref:`suffix() <decorators.suffix>` or :ref:`formatter() <decorators.formatter>`.
+
+ .. note::
+
+ :ref:`regex() <decorators.regex>` uses regular expressions like :ref:`formatter() <decorators.formatter>` but
+
+ * It only matches the first file name in the input. As described above, :ref:`formatter() <decorators.formatter>` can match any one or more of the input filename strings.
+ * It does not understand file paths so you may have to perform your own directory / file name parsing.
+ * String replacement uses syntax borrowed from `re.sub() <http://docs.python.org/2/library/re.html#re.sub>`__, rather than building a result from parsed regular expression (and file path) components
+
+ In general :ref:`formatter() <decorators.formatter>` is more powerful and was introduced from version 2.4 is intended to be a more user friendly replacement for :ref:`regex() <decorators.regex>`.
+
+ Let us see how the previous zoo example looks with :ref:`regex() <decorators.regex>`:
+
+
+ :ref:`formatter() <decorators.formatter>` code:
+
+ .. code-block:: python
+ :emphasize-lines: 4,6
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+ :ref:`regex() <decorators.regex>` code:
+
+ .. code-block:: python
+ :emphasize-lines: 4,6
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ r"\1/\g<clade>/\g<tame>.\2.food", # Replacement
+
+ r"\1/\g<clade>", # new_directory
+ r"\2", # animal_name
+ "\g<tame>") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+
+ The regular expression to parse the input file path safely was a bit hairy to write, and it is not
+ clear that it handles all edge conditions (e.g. files in the root directory). Apart from that, if the
+ limitations of :ref:`regex() <decorators.regex>` do not preclude its use, then the two approaches
+ are not so different in practice.
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/output_file_names_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/output_file_names_code.txt
new file mode 100644
index 0000000..7e70f58
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/output_file_names_code.txt
@@ -0,0 +1,248 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.output_file_names.code:
+
+############################################################################################################################################################################################################
+|new_manual.output_file_names.chapter_num|: Python Code for Specifying output file names with :ref:`formatter() <decorators.formatter>` and :ref:`regex() <decorators.regex>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`suffix() <decorators.suffix>` syntax
+ * :ref:`formatter() <decorators.formatter>` syntax
+ * :ref:`regex() <decorators.regex>` syntax
+ * Back to |new_manual.output_file_names.chapter_num|: :ref:`Specifying output file names <new_manual.output_file_names>`
+
+************************************************************************
+Example Code for :ref:`suffix() <decorators.suffix>`
+************************************************************************
+ .. code-block:: python
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ #
+ # suffix
+ #
+ @transform(create_initial_file_pairs, # name of previous task(s) (or list of files, or a glob)
+ suffix(".start"), # matching suffix of the "input file"
+ [".output.a.1", 45, ".output.b.1"]) # resulting suffix
+ def first_task(input_files, output_parameters):
+ print " input_parameters = ", input_files
+ print " output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run([first_task])
+
+
+
+************************************************************************
+Example Code for :ref:`formatter() <decorators.formatter>`
+************************************************************************
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1", 45])
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+****************************************************************************************************************
+Example Code for :ref:`formatter() <decorators.formatter>` with replacements in *extra* arguments
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # print job number as an extra argument
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"],
+
+ "{JOBNUMBER[0]}"
+ def first_task(input_files, output_parameters, job_number):
+ print job_number, ":", input_files
+
+
+ pipeline_run(verbose=0)
+
+
+****************************************************************************************************************
+Example Code for :ref:`formatter() <decorators.formatter>` in Zoos
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+
+ pipeline_run(verbose=0)
+
+
+ Results in:
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ Food for the wild crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles
+ Food for the tame dog = ./mammals/tame.dog.food will be placed in ./mammals
+ Food for the wild dog = ./mammals/wild.dog.food will be placed in ./mammals
+ Food for the handreared lion = ./mammals/handreared.lion.food will be placed in ./mammals
+ Food for the wild lion = ./mammals/wild.lion.food will be placed in ./mammals
+ Food for the wild tiger = ./mammals/wild.tiger.food will be placed in ./mammals
+
+
+
+****************************************************************************************************************
+Example Code for :ref:`regex() <decorators.regex>` in zoos
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ r"\1/\g<clade>/\g<tame>.\2.food", # Replacement
+
+ r"\1/\g<clade>", # new_directory
+ r"\2", # animal_name
+ "\g<tame>") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+
+ pipeline_run(verbose=0)
+
+
+ Results in:
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ Food for the wild crocodile = reptiles/wild.crocodile.food will be placed in reptiles
+ Food for the tame dog = mammals/tame.dog.food will be placed in mammals
+ Food for the wild dog = mammals/wild.dog.food will be placed in mammals
+ Food for the handreared lion = mammals/handreared.lion.food will be placed in mammals
+ Food for the wild lion = mammals/wild.lion.food will be placed in mammals
+ Food for the wild tiger = mammals/wild.tiger.food will be placed in mammals
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/parallel.txt b/doc/_build/html/_sources/tutorials/new_tutorial/parallel.txt
new file mode 100644
index 0000000..90a825f
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/parallel.txt
@@ -0,0 +1,63 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: @parallel; Tutorial
+
+.. _new_manual.deprecated_parallel:
+
+####################################################################################################################################################
+|new_manual.parallel.chapter_num|: Esoteric: Running jobs in parallel without files using :ref:`@parallel<decorators.parallel>`
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@parallel<decorators.parallel>` syntax in detail
+
+
+
+***************************************
+**@parallel**
+***************************************
+
+ **@parallel** supplies parameters for multiple **jobs** exactly like :ref:`@files<new_manual.deprecated_files>` except that:
+
+ #. The first two parameters are not treated like *inputs* and *ouputs* parameters,
+ and strings are not assumed to be file names
+ #. Thus no checking of whether each job is up-to-date is made using *inputs* and *outputs* files
+ #. No expansions of |glob|_ patterns or *output* from previous tasks is carried out.
+
+ This syntax is most useful when a pipeline stage does not involve creating or consuming any files, and
+ you wish to forego the conveniences of :ref:`@files<new_manual.deprecated_files>`, :ref:`@transform<new_manual.transform>` etc.
+
+ The following code performs some arithmetic in parallel:
+
+ ::
+
+ import sys
+ from ruffus import *
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\n" % (param1, param2, param1 + param2))
+
+ pipeline_run([parallel_task])
+
+ produces the following::
+
+ Task = parallel_task
+ Parallel task A: 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ Parallel task B: 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ Parallel task C: 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout.txt b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout.txt
new file mode 100644
index 0000000..02ad07d
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout.txt
@@ -0,0 +1,215 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: pipeline_printout; Tutorial
+
+.. _new_manual.pipeline_printout:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout.chapter_num|: Understanding how your pipeline works with :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+############################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` syntax
+ * :ref:`Python Code for this chapter <new_manual.pipeline_printout.code>`
+
+
+.. note::
+
+ * **Whether you are learning or developing ruffus pipelines, your best friend is** :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+ **This shows the exact parameters and files as they are passed through the pipeline.**
+
+ * **We also** *strongly* **recommend you use the** ``Ruffus.cmdline`` **convenience module which**
+ **will take care of all the command line arguments for you. See** :ref:`new_manual.cmdline`.
+
+
+
+=======================================
+Printing out which jobs will be run
+=======================================
+
+ :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` takes the same parameters as pipeline_run but just prints
+ the tasks which are and are not up-to-date.
+
+ The ``verbose`` parameter controls how much detail is displayed.
+
+ Let us take the pipelined code we previously wrote in
+ |new_manual.transform_in_parallel.chapter_num| :ref:`More on @transform-ing data and @originate <new_manual.transform_in_parallel.code>`
+ but call :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` instead of
+ :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`.
+ This lists the tasks which will be run in the pipeline:
+
+ ::
+
+ >>> import sys
+ >>> pipeline_printout(sys.stdout, [second_task])
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = create_initial_file_pairs
+ Task = first_task
+ Task = second_task
+ ________________________________________
+
+
+
+ To see the input and output parameters of each job in the pipeline, try increasing the verbosity from the default (``1``) to ``3``
+ (See :ref:`code <new_manual.pipeline_printout.code>`)
+
+ This is very useful for checking that the input and output parameters have been specified correctly.
+
+=============================================
+Determining which jobs are out-of-date or not
+=============================================
+
+ It is often useful to see which tasks are or are not up-to-date. For example, if we
+ were to run the pipeline in full, and then modify one of the intermediate files, the
+ pipeline would be partially out of date.
+
+
+ Let us start by run the pipeline in full but then modify ``job1.a.output.1`` so that the second task appears out-of-date:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ pipeline_run([second_task])
+
+ # "touch" job1.stage1
+ open("job1.a.output.1", "w").close()
+
+
+ Run :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` with a verbosity of ``5``.
+
+ This will tell you exactly why ``second_task(...)`` needs to be re-run:
+ because ``job1.a.output.1`` has a file modification time *after* ``job1.a.output.2`` (highlighted):
+
+
+ .. code-block:: pycon
+ :emphasize-lines: 9
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 5)
+
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = create_initial_file_pairs
+ Task = first_task
+
+ ________________________________________
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = second_task
+ Job = [job1.a.output.1
+ -> job1.a.output.2]
+ >>> # File modification times shown for out of date files
+ Job needs update:
+ Input files:
+ * 22 Jul 2014 15:29:19.33: job1.a.output.1
+ Output files:
+ * 22 Jul 2014 15:29:07.53: job1.a.output.2
+
+ Job = [job2.a.output.1
+ -> job2.a.output.2]
+ Job = [job3.a.output.1
+ -> job3.a.output.2]
+
+ ________________________________________
+
+
+ N.B. At a verbosity of 5, even jobs which are up-to-date in ``second_task`` are displayed.
+
+
+
+=============================================
+Verbosity levels
+=============================================
+
+ The verbosity levels for :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` and :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`
+ can be specified from ``verbose = 0`` (print out nothing) to the extreme verbosity of ``verbose=6``. A verbosity of above 10 is reserved for the internal
+ debugging of Ruffus
+
+ * level **0** : *nothing*
+ * level **1** : *Out-of-date Task names*
+ * level **2** : *All Tasks (including any task function docstrings)*
+ * level **3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * level **4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * level **5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * level **6** : *All jobs in All Tasks whether out of date or not*
+ * level **10**: *logs messages useful only for debugging ruffus pipeline code*
+
+.. _new_manual.pipeline_printout.verbose_abbreviated_path:
+
+==========================================================================================
+Abbreviating long file paths with ``verbose_abbreviated_path``
+==========================================================================================
+
+ Pipelines often produce interminable lists of deeply nested filenames. It would be nice to be able to abbreviate this
+ to just enough information to follow the progress.
+
+ The ``verbose_abbreviated_path`` parameter specifies that :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` and :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>` only display
+
+ 1) the ``NNN`` th top level sub-directories to be included, or that
+ 2) the message to be truncated to a specified ```MMM`` characters (to fit onto a line, for example). ``MMM`` is specified by setting ``verbose_abbreviated_path = -MMM``, i.e. negative values.
+
+ Note that the number of characters specified is just the separate lengths of the input and output parameters,
+ not the entire indented line. You many need to specify a smaller limit that you expect (e.g. ``60`` rather than `80`)
+
+ .. code-block:: python
+
+ pipeline_printout(verbose_abbreviated_path = NNN)
+ pipeline_run(verbose_abbreviated_path = -MMM)
+
+
+
+ ``verbose_abbreviated_path`` defaults to ``2``
+
+
+ For example:
+
+ Given ``["aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"]``
+
+
+ .. code-block:: python
+ :emphasize-lines: 1,4,8,19
+
+ # Original relative paths
+ "[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Full abspath
+ verbose_abbreviated_path = 0
+ "[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Specifed level of nested directories
+ verbose_abbreviated_path = 1
+ "[.../dddd.txt, .../gggg.txt]"
+
+ verbose_abbreviated_path = 2
+ "[.../cc/dddd.txt, .../ffff/gggg.txt]"
+
+ verbose_abbreviated_path = 3
+ "[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]"
+
+
+ # Truncated to MMM characters
+ verbose_abbreviated_path = -60
+ "<???> /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+
+=============================================
+Getting a list of all tasks in a pipeline
+=============================================
+
+ If you just wanted a list of all tasks (Ruffus decorated function names), then you can
+ just run Run :ref:`pipeline_get_task_names(...) <pipeline_functions.pipeline_get_task_names>`.
+
+ This doesn't touch any pipeline code or even check to see if the pipeline is connected up properly.
+
+ However, it is sometimes useful to allow users at the command line to choose from a list of
+ possible tasks as a target.
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_code.txt
new file mode 100644
index 0000000..f8eedf3
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_code.txt
@@ -0,0 +1,203 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.pipeline_printout.code:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout.chapter_num|: Python Code for Understanding how your pipeline works with :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` syntax
+ * Back to |new_manual.pipeline_printout.chapter_num|: :ref:`Understanding how your pipeline works <new_manual.pipeline_printout>`
+
+******************************************
+Display the initial state of the pipeline
+******************************************
+ ::
+
+ from ruffus import *
+ import sys
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ pipeline_printout(sys.stdout, [second_task], verbose = 1)
+ pipeline_printout(sys.stdout, [second_task], verbose = 3)
+
+************************************
+Normal Output
+************************************
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 1)
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = create_initial_file_pairs
+ Task = first_task
+ Task = second_task
+
+
+************************************
+High Verbosity Output
+************************************
+
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 4)
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = create_initial_file_pairs
+ Job = [None
+ -> job1.a.start
+ -> job1.b.start]
+ Job needs update: Missing files [job1.a.start, job1.b.start]
+ Job = [None
+ -> job2.a.start
+ -> job2.b.start]
+ Job needs update: Missing files [job2.a.start, job2.b.start]
+ Job = [None
+ -> job3.a.start
+ -> job3.b.start]
+ Job needs update: Missing files [job3.a.start, job3.b.start]
+
+ Task = first_task
+ Job = [[job1.a.start, job1.b.start]
+ -> job1.a.output.1]
+ Job needs update: Missing files [job1.a.start, job1.b.start, job1.a.output.1]
+ Job = [[job2.a.start, job2.b.start]
+ -> job2.a.output.1]
+ Job needs update: Missing files [job2.a.start, job2.b.start, job2.a.output.1]
+ Job = [[job3.a.start, job3.b.start]
+ -> job3.a.output.1]
+ Job needs update: Missing files [job3.a.start, job3.b.start, job3.a.output.1]
+
+ Task = second_task
+ Job = [job1.a.output.1
+ -> job1.a.output.2]
+ Job needs update: Missing files [job1.a.output.1, job1.a.output.2]
+ Job = [job2.a.output.1
+ -> job2.a.output.2]
+ Job needs update: Missing files [job2.a.output.1, job2.a.output.2]
+ Job = [job3.a.output.1
+ -> job3.a.output.2]
+ Job needs update: Missing files [job3.a.output.1, job3.a.output.2]
+
+ ________________________________________
+
+******************************************
+Display the partially up-to-date pipeline
+******************************************
+ Run the pipeline, modify ``job1.stage`` so that the second task is no longer up-to-date
+ and printout the pipeline stage again::
+
+ >>> pipeline_run([second_task], verbose=3)
+ Task enters queue = create_initial_file_pairs
+ Job = [None -> [job1.a.start, job1.b.start]]
+ Job = [None -> [job2.a.start, job2.b.start]]
+ Job = [None -> [job3.a.start, job3.b.start]]
+ Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+ Completed Task = create_initial_file_pairs
+ Task enters queue = first_task
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1]
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1]
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1]
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+ Completed Task = first_task
+ Task enters queue = second_task
+ Job = [job1.a.output.1 -> job1.a.output.2]
+ Job = [job2.a.output.1 -> job2.a.output.2]
+ Job = [job3.a.output.1 -> job3.a.output.2]
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+ Completed Task = second_task
+
+
+ # modify job1.stage1
+ >>> open("job1.a.output.1", "w").close()
+
+ At a verbosity of 6, even jobs which are up-to-date will be displayed::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 6)
+
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = create_initial_file_pairs
+ Job = [None
+ -> job1.a.start
+ -> job1.b.start]
+ Job = [None
+ -> job2.a.start
+ -> job2.b.start]
+ Job = [None
+ -> job3.a.start
+ -> job3.b.start]
+
+ Task = first_task
+ Job = [[job1.a.start, job1.b.start]
+ -> job1.a.output.1]
+ Job = [[job2.a.start, job2.b.start]
+ -> job2.a.output.1]
+ Job = [[job3.a.start, job3.b.start]
+ -> job3.a.output.1]
+
+ ________________________________________
+
+
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = second_task
+ Job = [job1.a.output.1
+ -> job1.a.output.2]
+ Job needs update:
+ Input files:
+ * 22 Jul 2014 15:29:19.33: job1.a.output.1
+ Output files:
+ * 22 Jul 2014 15:29:07.53: job1.a.output.2
+
+ Job = [job2.a.output.1
+ -> job2.a.output.2]
+ Job = [job3.a.output.1
+ -> job3.a.output.2]
+
+ ________________________________________
+
+
+
+ We can now see that the there is only one job in "second_task" which needs to be re-run
+ because 'job1.stage1' has been modified after 'job1.stage2'
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_graph.txt b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_graph.txt
new file mode 100644
index 0000000..5076efe
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_graph.txt
@@ -0,0 +1,170 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: pipeline_printout_graph; Tutorial
+
+.. _new_manual.pipeline_printout_graph:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout_graph.chapter_num|: Displaying the pipeline visually with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>` syntax
+ * :ref:`@graphviz(...) <decorators.graphviz>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.pipeline_printout_graph.code`
+
+=============================================
+Printing out a flowchart of our pipeline
+=============================================
+
+ It is all very well being able to trace the data flow through the pipeline as text.
+ Sometimes, however, we need a bit of eye-candy!
+
+ We can see a flowchart for our fledgling pipeline by executing:
+
+ ::
+
+ pipeline_printout_graph ( 'flowchart.svg',
+ 'svg',
+ [second_task],
+ no_key_legend = False)
+
+ .. image:: ../../images/simple_tutorial_stage5_flowchart.png
+ :scale: 70
+
+
+ Flowcharts can be printed in a large number of formats including ``jpg``, ``svg``, ``png`` and ``pdf``.
+
+
+ .. note::
+
+ Flowcharts rely on the ``dot`` programme from `Graphviz <http://www.graphviz.org/>`__.
+
+ Please make sure this is installed.
+
+ There are 8 standard colour schemes, but you can further customise all the colours to your satisfaction:
+
+
+ .. image:: ../../images/flowchart_colour_schemes.png
+
+ See :ref:`here <new_manual.flowchart_colours>` for example code.
+
+================================================================
+Command line options made easier with ``ruffus.cmdline``
+================================================================
+
+
+ If you are using ``ruffus.cmdline``, then you can easily ask for a flowchart from the command line:
+
+ .. code-block:: bash
+
+ your_script.py --flowchart pipeline_flow_chart.png
+
+
+ The output format is deduced from the extension but can be specified manually:
+
+ .. code-block:: bash
+
+ # specify format. Otherwise, deduced from the extension
+ your_script.py --flowchart pipeline_flow_chart.png --flowchart_format png
+
+ Print the flow chart horizontally or vertically...
+
+ .. code-block:: bash
+
+ # flowchart proceeds from left to right , rather than from top to bottom
+ your_script.py --flowchart pipeline_flow_chart.png --draw_graph_horizontally
+
+ ...with or without a key legend
+
+ .. code-block:: bash
+
+ # Draw key legend
+ your_script.py --flowchart pipeline_flow_chart.png --key_legend_in_graph
+
+
+=============================================
+Horribly complicated pipelines!
+=============================================
+ Flowcharts are especially useful if you have really complicated pipelines, such as
+
+ .. image:: ../../images/simple_tutorial_complex_flowchart.png
+ :scale: 70
+
+
+=============================================
+Circular dependency errors in pipelines!
+=============================================
+ Especially, if the pipeline is not set up properly, and vicious circular dependencies
+ are present:
+
+ .. image:: ../../images/simple_tutorial_complex_flowchart_error.png
+ :scale: 70
+
+==========================================================================================
+``@graphviz``: Customising the appearance of each task
+==========================================================================================
+
+ The graphic for each task can be further customised as you please by adding
+ `graphviz attributes <http://www.graphviz.org/doc/info/attrs.html>`__ such as the URL, shape, colour
+ directly to that node using the decorator ```@graphviz``.
+
+ For example, we can customise the graphic for ``myTask()`` to look like:
+
+ .. image:: ../../images/history_html_flowchart.png
+ :scale: 30
+
+ by adding the requisite attributes as follows:
+
+
+
+ .. code-block:: python
+
+
+ @graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ # Can use dictionary if you wish...
+ graphviz_params = {"URL":"http://cnn.com", "fontcolor": '"#FF00FF"'}
+ @graphviz(**graphviz_params)
+ def myTask(input,output):
+ pass
+
+ .. **
+
+
+ You can even using HTML formatting in task names, including specifying line wraps (as in the above example),
+ using the ``label`` parameter. However, HTML labels **must** be enclosed in ``<`` and ``>``.
+
+
+ .. code-block:: python
+
+ label = "<Line <BR/> wrapped task_name()>"
+
+ Otherwise, you can also opt to keep the task name and wrap it with a prefix and suffix:
+
+ .. code-block:: python
+
+ label_suffix = "??? ", label_prefix = ": What is this?"
+
+ The ``URL`` attribute allows the generation of clickable svg, and also client / server
+ side image maps usable in web pages.
+ See `Graphviz documentation <http://www.graphviz.org/content/output-formats#dimap>`__
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_graph_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_graph_code.txt
new file mode 100644
index 0000000..c836b83
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/pipeline_printout_graph_code.txt
@@ -0,0 +1,109 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.pipeline_printout_graph.code:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout_graph.chapter_num|: Python Code for Displaying the pipeline visually with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>` syntax
+ * Back to |new_manual.pipeline_printout_graph.chapter_num|: :ref:`Displaying the pipeline visually <new_manual.pipeline_printout_graph>`
+
+************************************
+Code
+************************************
+ .. code-block:: python
+ :emphasize-lines: 28, 51
+ :linenos:
+
+ from ruffus import *
+ import sys
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ # Print graph before running pipeline
+
+ #---------------------------------------------------------------
+ #
+ # Show flow chart and tasks before running the pipeline
+ #
+ print "Show flow chart and tasks before running the pipeline"
+ pipeline_printout_graph ( open("simple_tutorial_stage5_before.png", "w"),
+ "png",
+ [second_task],
+ minimal_key_legend=True)
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+ # modify job1.stage1
+ open("job1.a.output.1", "w").close()
+
+
+ # Print graph after everything apart from ``job1.a.output.1`` is update
+
+ #---------------------------------------------------------------
+ #
+ # Show flow chart and tasks after running the pipeline
+ #
+ print "Show flow chart and tasks after running the pipeline"
+ pipeline_printout_graph ( open("simple_tutorial_stage5_after.png", "w"),
+ "png",
+ [second_task],
+ no_key_legend=True)
+
+
+************************************
+Resulting Flowcharts
+************************************
+ +-------------------------------------------------------------+-----------------------------------------------------------------------+
+ | .. image:: ../../images/simple_tutorial_stage5_before.png | .. image:: ../../images/simple_tutorial_stage5_after.png |
+ | :alt: Before running the pipeline | :alt: After running the pipeline |
+ | :scale: 95 | :scale: 95 |
+ | :align: center | :align: center |
+ | | |
+ | .. centered:: Before | .. centered:: After |
+ | | |
+ +-------------------------------------------------------------+-----------------------------------------------------------------------+
+
+ +-------------------------------------------------------------------------------------------------------------------------------------+
+ | .. image:: ../../images/tutorial_key.png |
+ | :alt: Legend key |
+ | :scale: 50 |
+ | :align: center |
+ | |
+ | .. centered:: Legend |
+ | |
+ +-------------------------------------------------------------------------------------------------------------------------------------+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/posttask.txt b/doc/_build/html/_sources/tutorials/new_tutorial/posttask.txt
new file mode 100644
index 0000000..a468a27
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/posttask.txt
@@ -0,0 +1,122 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: posttask; Tutorial
+
+.. _new_manual.posttask:
+
+####################################################################################################################################################
+|new_manual.posttask.chapter_num|: Signal the completion of each stage of our pipeline with :ref:`@posttask <decorators.posttask>`
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@posttask <decorators.posttask>` syntax
+
+
+***********************
+Overview
+***********************
+
+
+
+ It is often useful to signal the completion of each task by specifying a specific
+ action to be taken or function to be called. This can range from
+ printing out some message, or `touching <http://en.wikipedia.org/wiki/Touch_(Unix)>`__ some sentinel file,
+ to emailing the author. This is particular useful if the :term:`task` is a recipe apply to an unspecified number
+ of parameters in parallel in different :term:`job`\ s. If the task is never run, or if it
+ fails, needless-to-say no task completion action will happen.
+
+
+ *Ruffus* uses the :ref:`@posttask <decorators.posttask>` decorator for this purpose.
+
+
+=================
+**@posttask**
+=================
+
+ We can signal the completion of each task by specifying
+ one or more function(s) using :ref:`@posttask <decorators.posttask>` ::
+
+ from ruffus import *
+
+ def task_finished():
+ print "hooray"
+
+ @posttask(task_finished)
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+ This is such a short function, we might as well write it in-line:
+
+ ::
+
+ @posttask(lambda: sys.stdout.write("hooray\n"))
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+
+.. note::
+
+ The function(s) provided to :ref:`@posttask <decorators.posttask>` will be called if the pipeline passes
+ through a task, even if none of its jobs are run because they are up-to-date.
+ This happens when a upstream task is out-of-date, and the execution passes through
+ this point in the pipeline. See the example in :ref:`new_manual.dependencies`
+ of this manual.
+
+
+.. index::
+ single: @posttask; touchfile (Manual)
+ single: touchfile ; @posttask (Manual)
+
+
+.. _new_manual.posttask.touch_file:
+
+============================================
+:ref:`touch_file<decorators.touch_file>`
+============================================
+
+ One way to note the completion of a task is to create some sort of
+ "flag" file. Each stage in a traditional ``make`` pipeline would contain a
+ ``touch completed.flag``.
+
+ This is such a useful idiom that *Ruffus* provides the shorthand :ref:`touch_file<decorators.touch_file>`:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+=======================================
+Adding several post task actions
+=======================================
+ You can, of course, add more than one different action to be taken on completion of the
+ task, either by stacking up as many :ref:`@posttask<decorators.posttask>` decorators
+ as necessary, or by including several functions in the same :ref:`@posttask <decorators.posttask>`:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @posttask(print_hooray, print_whoppee)
+ @posttask(print_hip_hip, touch_file("sentinel_flag"))
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/split.txt b/doc/_build/html/_sources/tutorials/new_tutorial/split.txt
new file mode 100644
index 0000000..dc866b2
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/split.txt
@@ -0,0 +1,233 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: split; Tutorial
+
+.. _new_manual.split:
+
+######################################################################################################
+|new_manual.split.chapter_num|: Splitting up large tasks / files with **@split**
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@split <decorators.split>` syntax
+ * :ref:`Example code for this chapter <new_manual.split.code>`
+
+
+**************************************************************************************
+Overview
+**************************************************************************************
+
+ A common requirement in computational pipelines is to split up a large task into
+ small jobs which can be run on different processors, (or sent to a computational
+ cluster). Very often, the number of jobs depends dynamically on the size of the
+ task, and cannot be known beforehand.
+
+ *Ruffus* uses the :ref:`@split <decorators.split>` decorator to indicate that
+ the :term:`task` function will produce an indeterminate number of independent *Outputs* from a single *Input*.
+
+**************************************************************************************
+Example: Calculate variance for a large list of numbers in parallel
+**************************************************************************************
+
+ Suppose we wanted to calculate the `variance <http://en.wikipedia.org/wiki/Variance>`__ for
+ 100,000 numbers, how can we parallelise the calculation so that we can get an answer as
+ speedily as possible?
+
+ We need to
+
+ * break down the problem into manageable chunks
+ * solve these in parallel, possibly on a computational cluster and then
+ * merge the partial solutions back together for a final result.
+
+
+ To complicate things, we usually do not want to hard-code the number of parallel chunks beforehand.
+ The degree of parallelism is often only apparent as we process our data.
+
+ **Ruffus** was designed to solve such problems which are common, for example, in bioinformatics and genomics.
+
+ A flowchart for our variance problem might look like this:
+
+ .. image:: ../../images/manual_split_merge_example.jpg
+ :scale: 30
+
+ (In this toy example, we create our own starting data in ``create_random_numbers()``.)
+
+
+**************************************************************************************
+Output files for :ref:`@split <decorators.split>`
+**************************************************************************************
+
+
+ The *Ruffus* decorator :ref:`@split<decorators.split>` is designed specifically with this run-time flexibility in mind:
+
+
+ .. code-block:: python
+
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ pass
+
+
+ This will split the incoming ``input_file_names`` into ``NNN`` number of *outputs* where ``NNN`` is not predetermined:
+
+ The *output* (second) parameter of :ref:`@split<decorators.split>` often contains a |glob|_ pattern like the ``*.chunks`` above.
+
+ Only **after** the task function has completed, will Ruffus match the **Output** parameter (``*.chunks``)
+ against the files which have been created by ``split_problem()`` (e.g. ``1.chunks``, ``2.chunks``, ``3.chunks``)
+
+**************************************************************************************
+Be careful in specifying **Output** globs
+**************************************************************************************
+
+ Note that it is your responsibility to keep the **Output** specification tight enough so that Ruffus does not
+ pick up extraneous files.
+
+ You can specify multiple |glob|_ patterns to match *all* the files which are the
+ result of the splitting task function. These can even cover different directories,
+ or groups of file names. This is a more extreme example:
+
+ ::
+
+ @split("input.file", ['a*.bits', 'b*.pieces', 'somewhere_else/c*.stuff'])
+ def split_function (input_filename, output_files):
+ "Code to split up 'input.file'"
+
+**************************************************************************************
+Clean up previous pipeline runs
+**************************************************************************************
+
+ Problem arise when the current directory contains results of previous pipeline runs.
+
+ * For example, if the previous analysis involved a large data set, there might be 3 chunks: ``1.chunks``, ``2.chunks``, ``3.chunks``.
+ * In the current analysis, there might be a smaller data set which divides into only 2 chunks, ``1.chunks`` and ``2.chunks``.
+ * Unfortunately, ``3.chunks`` from the previous run is still hanging around and will be included erroneously by the glob ``*.chunks``.
+
+
+ .. warning::
+
+ **Your first duty in** :ref:`@split <decorators.split>` **tasks functions should be to clean up**
+
+ To help you clean up thoroughly, Ruffus initialises the **output** parameter to all files which match specification.
+
+ The first order of business is thus invariably to cleanup ( delete with ``os.unlink``) all files in **Output**.
+
+ .. code-block:: python
+ :emphasize-lines: 11
+
+ #---------------------------------------------------------------
+ #
+ # split initial file
+ #
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ """
+ splits random numbers file into xxx files of chunk_size each
+ """
+ #
+ # clean up any files from previous runs
+ #
+ #for ff in glob.glob("*.chunks"):
+ for ff in input_file_names:
+ os.unlink(ff)
+
+ (The first time you run the example code, ``*.chunks`` will initialise ``output_files`` to an empty list. )
+
+.. _new_manual.split.one_to_many:
+
+**************************************************************************************
+1 to many
+**************************************************************************************
+
+ :ref:`@split <decorators.split>` is a one to many operator because its
+ outputs are a list of *independent* items.
+
+ If :ref:`@split <decorators.split>` generates 5 files, then this will lead to 5 jobs downstream.
+
+ This means we can just connect our old friend :ref:`@transform <decorators.transform>` to our pipeline
+ and the results of :ref:`@split <decorators.split>` will be analysed in parallel. This code should look
+ familiar:
+
+ .. code-block:: python
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(split_problem, suffix(".chunks"), ".sums")
+ def sum_of_squares (input_file_name, output_file_name):
+ pass
+
+
+ Which results in output like this:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[random_numbers.list] -> *.chunks] completed
+ Completed Task = split_problem
+ Job = [1.chunks -> 1.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [6.chunks -> 6.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Completed Task = sum_of_squares
+
+ Have a look at the :ref:`Example code for this chapter <new_manual.split.code>`
+
+.. _new_manual.split.nothing_to_many:
+
+**************************************************************************************
+Nothing to many
+**************************************************************************************
+
+
+ Normally we would use :ref:`@originate <new_manual.originate>` to create files from
+ scratch, for example at the beginning of the pipeline.
+
+ However, sometimes, it is not possible to determine ahead of time how many files you
+ will be creating from scratch. :ref:`@split<decorators.split>` can also be useful even in such cases:
+
+ .. code-block:: python
+ :emphasize-lines: 6
+
+ from random import randint
+ from ruffus import *
+ import os
+
+ # Create between 2 and 5 files
+ @split(None, "*.start")
+ def create_initial_files(no_input_file, output_files):
+ # cleanup first
+ for oo in output_files:
+ os.unlink(oo)
+ # make new files
+ for ii in range(randint(2,5)):
+ open("%d.start" % ii, "w")
+
+ @transform(create_initial_files, suffix(".start"), ".processed")
+ def process_files(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [None -> *.start] completed
+ Completed Task = create_initial_files
+ Job = [0.start -> 0.processed] completed
+ Job = [1.start -> 1.processed] completed
+ Completed Task = process_files
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/split_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/split_code.txt
new file mode 100644
index 0000000..eb4e725
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/split_code.txt
@@ -0,0 +1,115 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.split.code:
+
+##############################################################################################################
+|new_manual.split.chapter_num|: Python Code for Splitting up large tasks / files with **@split**
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@split syntax in detail <decorators.split>`
+ * Back to |new_manual.split.chapter_num|: :ref:`Splitting up large tasks / files with @split <new_manual.split>`
+
+*******************************************
+Splitting large jobs
+*******************************************
+
+ ::
+
+ from ruffus import *
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ import random, os, glob
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @originate("random_numbers.list")
+ def create_random_numbers(output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # split initial file
+ #
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ """
+ splits random numbers file into xxx files of chunk_size each
+ """
+ #
+ # clean up any files from previous runs
+ #
+ #for ff in glob.glob("*.chunks"):
+ for ff in input_file_names:
+ os.unlink(ff)
+ #
+ #
+ # create new file every chunk_size lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for input_file_name in input_file_names:
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(split_problem, suffix(".chunks"), ".sums")
+ def sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [None -> random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Job = [[random_numbers.list] -> *.chunks] completed
+ Completed Task = split_problem
+ Job = [1.chunks -> 1.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [6.chunks -> 6.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Completed Task = sum_of_squares
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/subdivide_collate.txt b/doc/_build/html/_sources/tutorials/new_tutorial/subdivide_collate.txt
new file mode 100644
index 0000000..ae913b9
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/subdivide_collate.txt
@@ -0,0 +1,234 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: @subdivide; Tutorial
+ pair: @collate; Tutorial
+
+.. _new_manual.subdivide_collate:
+
+############################################################################################################################################################################
+|new_manual.subdivide_collate.chapter_num|: :ref:`@subdivide <decorators.subdivide>` tasks to run efficiently and regroup with :ref:`@collate <decorators.collate>`
+############################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@subdivide <decorators.subdivide>` syntax
+ * :ref:`@collate <decorators.collate>` syntax
+
+
+***********************
+Overview
+***********************
+
+ In |new_manual.split.chapter_num| and |new_manual.merge.chapter_num|, we saw how a large
+ task can be :ref:`@split <new_manual.split>` into small jobs to be analysed efficiently
+ in parallel. Ruffus can then :ref:`@merge <new_manual.split>` these back together
+ to give a single, unified result.
+
+ This assumes that your pipeline is processing one item at a time. Usually, however, we
+ will have, for example, 10 large pieces of data in play, each of which has to be
+ subdivided into smaller pieces for analysis before being put back together.
+
+ This is the role of :ref:`@subdivide <decorators.subdivide>` and :ref:`@subdivide <decorators.collate>`.
+
+ Like :ref:`@split <decorators.split>`, the number of output files
+ :ref:`@subdivide <decorators.subdivide>` produces for *each* **Input** is not predetermined.
+
+ On the other hand, these output files should be named in such a way that they can
+ later be grouped back together later using :ref:`@subdivide <decorators.collate>`.
+
+ This will be clearer with some worked examples.
+
+
+.. _new_manual.subdivide:
+
+*********************************************************************
+:ref:`@subdivide <decorators.subdivide>` in parallel
+*********************************************************************
+
+ Let us start from 3 files with varying number of lines. We wish to process these two
+ lines at a time but we do not know ahead of time how long each file is:
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ from ruffus import *
+ import os, random, sys
+
+ # Create files a random number of lines
+ @originate(["a.start",
+ "b.start",
+ "c.start"])
+ def create_test_files(output_file):
+ cnt_lines = random.randint(1,3) * 2
+ with open(output_file, "w") as oo:
+ for ii in range(cnt_lines):
+ oo.write("data item = %d\n" % ii)
+ print " %s has %d lines" % (output_file, cnt_lines)
+
+
+ #
+ # subdivide the input files into NNN fragment files of 2 lines each
+ #
+ @subdivide( create_test_files,
+ formatter(),
+ "{path[0]}/{basename[0]}.*.fragment",
+ "{path[0]}/{basename[0]}")
+ def subdivide_files(input_file, output_files, output_file_name_stem):
+ #
+ # cleanup any previous results
+ #
+ for oo in output_files:
+ os.unlink(oo)
+ #
+ # Output files contain two lines each
+ # (new output files every even line)
+ #
+ cnt_output_files = 0
+ for ii, line in enumerate(open(input_file)):
+ if ii % 2 == 0:
+ cnt_output_files += 1
+ output_file_name = "%s.%d.fragment" % (output_file_name_stem, cnt_output_files)
+ output_file = open(output_file_name, "w")
+ print " Subdivide %s -> %s" % (input_file, output_file_name)
+ output_file.write(line)
+
+
+ #
+ # Analyse each fragment independently
+ #
+ @transform(subdivide_files, suffix(".fragment"), ".analysed")
+ def analyse_fragments(input_file, output_file):
+ print " Analysing %s -> %s" % (input_file, output_file)
+ with open(output_file, "w") as oo:
+ for line in open(input_file):
+ oo.write("analysed " + line)
+
+
+ This produces the following output:
+
+ .. code-block:: pycon
+ :emphasize-lines: 8,20,36
+
+ >>> pipeline_run(verbose = 1)
+ a.start has 2 lines
+ Job = [None -> a.start] completed
+ b.start has 6 lines
+ Job = [None -> b.start] completed
+ c.start has 6 lines
+ Job = [None -> c.start] completed
+ Completed Task = create_test_files
+
+ Subdivide a.start -> /home/lg/temp/a.1.fragment
+ Job = [a.start -> a.*.fragment, a] completed
+
+ Subdivide b.start -> /home/lg/temp/b.1.fragment
+ Subdivide b.start -> /home/lg/temp/b.2.fragment
+ Subdivide b.start -> /home/lg/temp/b.3.fragment
+ Job = [b.start -> b.*.fragment, b] completed
+
+ Subdivide c.start -> /home/lg/temp/c.1.fragment
+ Subdivide c.start -> /home/lg/temp/c.2.fragment
+ Subdivide c.start -> /home/lg/temp/c.3.fragment
+ Job = [c.start -> c.*.fragment, c] completed
+
+ Completed Task = subdivide_files
+
+ Analysing /home/lg/temp/a.1.fragment -> /home/lg/temp/a.1.analysed
+ Job = [a.1.fragment -> a.1.analysed] completed
+ Analysing /home/lg/temp/b.1.fragment -> /home/lg/temp/b.1.analysed
+ Job = [b.1.fragment -> b.1.analysed] completed
+
+ [ ...SEE EXAMPLE CODE FOR MORE LINES ...]
+
+ Completed Task = analyse_fragments
+
+
+ ``a.start`` has two lines and results in a single ``.fragment`` file,
+ while there are 3 ``b.*.fragment`` files because it has 6 lines.
+ Whatever their origin, all of the different fragment files are treated equally
+ in ``analyse_fragments()`` and processed (in parallel) in the same way.
+
+
+
+.. _new_manual.collate:
+
+*********************************************************************
+Grouping using :ref:`@collate <decorators.collate>`
+*********************************************************************
+
+ All that is left in our example is to reassemble the analysed fragments back together into
+ 3 sets of results corresponding to the original 3 pieces of starting data.
+
+ This is straightforward by eye: the file names all have the same pattern: ``[abc].*.analysed``:
+
+ ::
+
+ a.1.analysed -> a.final_result
+ b.1.analysed -> b.final_result
+ b.2.analysed -> ..
+ b.3.analysed -> ..
+ c.1.analysed -> c.final_result
+ c.2.analysed -> ..
+
+ :ref:`@collate <decorators.collate>` does something similar:
+
+ #. Specify a string substitution e.g. ``c.??.analysed -> c.final_result`` and
+ #. Ask *ruffus* to group together any **Input** (e.g. ``c.1.analysed``, ``c.2.analysed``)
+ that will result in the same **Output** (e.g. ``c.final_result``)
+
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ #
+ # ``XXX.??.analysed -> XXX.final_result``
+ # Group results using original names
+ #
+ @collate( analyse_fragments,
+
+ # split file name into [abc].NUMBER.analysed
+ formatter("/(?P<NAME>[abc]+)\.\d+\.analysed$"),
+
+ "{path[0]}/{NAME[0]}.final_result")
+ def recombine_analyses(input_file_names, output_file):
+ with open(output_file, "w") as oo:
+ for input_file in input_file_names:
+ print " Recombine %s -> %s" % (input_file, output_file)
+ for line in open(input_file):
+ oo.write(line)
+
+ This produces the following output:
+
+ .. code-block:: pycon
+ :emphasize-lines: 11
+
+ Recombine /home/lg/temp/a.1.analysed -> /home/lg/temp/a.final_result
+ Job = [[a.1.analysed] -> a.final_result] completed
+ Recombine /home/lg/temp/b.1.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.2.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.3.analysed -> /home/lg/temp/b.final_result
+ Job = [[b.1.analysed, b.2.analysed, b.3.analysed] -> b.final_result] completed
+ Recombine /home/lg/temp/c.1.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.2.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.3.analysed -> /home/lg/temp/c.final_result
+ Job = [[c.1.analysed, c.2.analysed, c.3.analysed] -> c.final_result] completed
+ Completed Task = recombine_analyses
+
+
+ .. warning::
+
+ * **Input** file names are grouped together not in a guaranteed order.
+
+ For example, the fragment files may not be sent to ``recombine_analyses(input_file_names, ...)``
+ in alphabetically or any other useful order.
+
+ You may want to sort **Input** before concatenation.
+
+ * All **Input** are grouped together if they have both the same **Output** *and* **Extra**
+ parameters. If any string substitution is specified in any of the other **Extra** parameters
+ to :ref:`@subdivide <decorators.subdivide>`, they must give the same answers for **Input**
+ in the same group.
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/subdivide_collate_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/subdivide_collate_code.txt
new file mode 100644
index 0000000..3160181
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/subdivide_collate_code.txt
@@ -0,0 +1,155 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.subdivide_collate.code:
+
+#############################################################################################################################################################################################################################################################################################################################
+|new_manual.subdivide_collate.chapter_num|: Python Code for :ref:`@subdivide <decorators.subdivide>` tasks to run efficiently and regroup with :ref:`@collate <decorators.collate>`
+#############################################################################################################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax
+ * :ref:`pipeline_run() <pipeline_functions.pipeline_run>` syntax
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` syntax
+ * Back to |new_manual.subdivide_collate.chapter_num|: :ref:`:ref:`@subdivide tasks to run efficiently and regroup with @collate <new_manual.subdivide_collate>`
+
+*****************************************************************************************************************************
+:ref:`@subdivide <decorators.subdivide>` and regroup with :ref:`@collate <decorators.collate>` example
+*****************************************************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 17
+
+ from ruffus import *
+ import os, random, sys
+
+ # Create files a random number of lines
+ @originate(["a.start",
+ "b.start",
+ "c.start"])
+ def create_test_files(output_file):
+ cnt_lines = random.randint(1,3) * 2
+ with open(output_file, "w") as oo:
+ for ii in range(cnt_lines):
+ oo.write("data item = %d\n" % ii)
+ print " %s has %d lines" % (output_file, cnt_lines)
+
+
+ #
+ # subdivide the input files into NNN fragment files of 2 lines each
+ #
+ @subdivide( create_test_files,
+ formatter(),
+ "{path[0]}/{basename[0]}.*.fragment",
+ "{path[0]}/{basename[0]}")
+ def subdivide_files(input_file, output_files, output_file_name_stem):
+ #
+ # cleanup any previous results
+ #
+ for oo in output_files:
+ os.unlink(oo)
+ #
+ # Output files contain two lines each
+ # (new output files every even line)
+ #
+ cnt_output_files = 0
+ for ii, line in enumerate(open(input_file)):
+ if ii % 2 == 0:
+ cnt_output_files += 1
+ output_file_name = "%s.%d.fragment" % (output_file_name_stem, cnt_output_files)
+ output_file = open(output_file_name, "w")
+ print " Subdivide %s -> %s" % (input_file, output_file_name)
+ output_file.write(line)
+
+
+ #
+ # Analyse each fragment independently
+ #
+ @transform(subdivide_files, suffix(".fragment"), ".analysed")
+ def analyse_fragments(input_file, output_file):
+ print " Analysing %s -> %s" % (input_file, output_file)
+ with open(output_file, "w") as oo:
+ for line in open(input_file):
+ oo.write("analysed " + line)
+
+
+ #
+ # Group results using original names
+ #
+ @collate( analyse_fragments,
+
+ # split file name into [abc].NUMBER.analysed
+ formatter("/(?P<NAME>[abc]+)\.\d+\.analysed$"),
+
+ "{path[0]}/{NAME[0]}.final_result")
+ def recombine_analyses(input_file_names, output_file):
+ with open(output_file, "w") as oo:
+ for input_file in input_file_names:
+ print " Recombine %s -> %s" % (input_file, output_file)
+ for line in open(input_file):
+ oo.write(line)
+
+
+
+
+ #pipeline_printout(sys.stdout, verbose = 3)
+
+
+ pipeline_run(verbose = 1)
+
+ Results in
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose = 1)
+
+ a.start has 2 lines
+ Job = [None -> a.start] completed
+ b.start has 6 lines
+ Job = [None -> b.start] completed
+ c.start has 6 lines
+ Job = [None -> c.start] completed
+ Completed Task = create_test_files
+
+ Subdivide a.start -> /home/lg/temp/a.1.fragment
+ Job = [a.start -> a.*.fragment, a] completed
+ Subdivide b.start -> /home/lg/temp/b.1.fragment
+ Subdivide b.start -> /home/lg/temp/b.2.fragment
+ Subdivide b.start -> /home/lg/temp/b.3.fragment
+ Job = [b.start -> b.*.fragment, b] completed
+ Subdivide c.start -> /home/lg/temp/c.1.fragment
+ Subdivide c.start -> /home/lg/temp/c.2.fragment
+ Subdivide c.start -> /home/lg/temp/c.3.fragment
+ Job = [c.start -> c.*.fragment, c] completed
+ Completed Task = subdivide_files
+
+ Analysing /home/lg/temp/a.1.fragment -> /home/lg/temp/a.1.analysed
+ Job = [a.1.fragment -> a.1.analysed] completed
+ Analysing /home/lg/temp/b.1.fragment -> /home/lg/temp/b.1.analysed
+ Job = [b.1.fragment -> b.1.analysed] completed
+ Analysing /home/lg/temp/b.2.fragment -> /home/lg/temp/b.2.analysed
+ Job = [b.2.fragment -> b.2.analysed] completed
+ Analysing /home/lg/temp/b.3.fragment -> /home/lg/temp/b.3.analysed
+ Job = [b.3.fragment -> b.3.analysed] completed
+ Analysing /home/lg/temp/c.1.fragment -> /home/lg/temp/c.1.analysed
+ Job = [c.1.fragment -> c.1.analysed] completed
+ Analysing /home/lg/temp/c.2.fragment -> /home/lg/temp/c.2.analysed
+ Job = [c.2.fragment -> c.2.analysed] completed
+ Analysing /home/lg/temp/c.3.fragment -> /home/lg/temp/c.3.analysed
+ Job = [c.3.fragment -> c.3.analysed] completed
+ Completed Task = analyse_fragments
+
+ Recombine /home/lg/temp/a.1.analysed -> /home/lg/temp/a.final_result
+ Job = [[a.1.analysed] -> a.final_result] completed
+ Recombine /home/lg/temp/b.1.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.2.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.3.analysed -> /home/lg/temp/b.final_result
+ Job = [[b.1.analysed, b.2.analysed, b.3.analysed] -> b.final_result] completed
+ Recombine /home/lg/temp/c.1.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.2.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.3.analysed -> /home/lg/temp/c.final_result
+ Job = [[c.1.analysed, c.2.analysed, c.3.analysed] -> c.final_result] completed
+ Completed Task = recombine_analyses
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/transform.txt b/doc/_build/html/_sources/tutorials/new_tutorial/transform.txt
new file mode 100644
index 0000000..9e743c0
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/transform.txt
@@ -0,0 +1,194 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: transform; Tutorial
+
+.. _new_manual.transform:
+
+############################################################################################################################################################################################################
+|new_manual.transform.chapter_num|: Transforming data in a pipeline with :ref:`@transform <decorators.transform>`
+############################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform <decorators.transform>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.transform.code`
+
+
+***************************************
+Review
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ Ruffus automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+ and tell Ruffus how these pipeline stages or :term:`task` functions are connected together.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **write down the file names of the data as it flows across your pipeline**
+ * **write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+
+************************************
+Task functions as recipes
+************************************
+
+ Each :term:`task` function of the pipeline is a recipe or
+ `rule <http://www.gnu.org/software/make/manual/make.html#Rule-Introduction>`_
+ which can be applied repeatedly to our data.
+
+ For example, one can have
+
+ * a ``compile()`` *task* which will compile any number of source code files, or
+ * a ``count_lines()`` *task* which will count the number of lines in any file or
+ * an ``align_dna()`` *task* which will align the DNA of many chromosomes.
+
+
+.. index::
+ pair: one to one @transform; Tutorial
+
+******************************************************************************
+:ref:`@transform <decorators.transform>` is a 1 to 1 operation
+******************************************************************************
+
+
+ ``@transform`` is a 1:1 operation because for each input, it generates one output.
+
+ .. image:: ../../images/transform_1_to_1_example.png
+ :scale: 50
+
+
+ This is obvious when you count the number of jobs at each step. In our example pipeline, there are always
+ three jobs moving through in step at each stage (:term:`task`).
+
+ Each **Input** or **Output** is not limited, however, to a single filename. Each job can accept, for example,
+ a pair of files as its **Input**, or generate more than one file or a dictionary or numbers as its **Output**.
+
+ When each job outputs a pair of files, this does not generate two jobs downstream. It just means that the successive
+ :term:`task` in the pipeline will receive a list or tuple of files as its input parameter.
+
+ .. note::
+
+ The different sort of decorators in Ruffus determine the *topology* of your pipeline,
+ i.e. how the jobs from different tasks are linked together seamlessly.
+
+ :ref:`@transform <decorators.transform>` always generates one **Output** for one **Input**.
+
+ In the later parts of the tutorial, we will encounter more decorators which can *split up*, or *join together* or *group* inputs.
+
+ In other words, using other decorators **Input** and **Output** can have **many to one**, **many to many** etc. relationships.
+
+=======================================
+A pair of files as the **Input**
+=======================================
+
+ Let us rewrite our previous example so that the **Input** of the first task
+ are `matching pairs <http://en.wikipedia.org/wiki/DNA_sequencing_theory#Pairwise_end-sequencing>`__
+ of DNA sequence files, processed in tandem.
+
+
+ .. code-block:: python
+ :emphasize-lines: 6-8,17-19,29-31
+
+ from ruffus import *
+
+ starting_files = [("a.1.fastq", "a.2.fastq"),
+ ("a.1.fastq", "a.2.fastq"),
+ ("a.1.fastq", "a.2.fastq")]
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".1.fastq"), # suffix = .1.fastq
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_files,
+ output_file):
+ # remember there are two input files now
+ ii1 = open(input_files[0])
+ ii2 = open(input_files[1])
+ oo = open(output_file, "w")
+
+
+ The only changes are to the first task:
+
+ .. code-block:: pycon
+
+ pipeline_run()
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Completed Task = map_dna_sequence
+
+
+ :ref:`suffix <decorators.suffix>` always matches only the first file name in each **Input**.
+
+.. index::
+ pair: input / output parameters; Tutorial
+
+************************************
+**Input** and **Output** parameters
+************************************
+
+ **Ruffus** chains together different tasks by taking the **Output** from one job
+ and plugging it automatically as the **Input** of the next.
+
+ The first two parameters of each job are the **Input** and **Output** parameters respectively.
+
+ In the above example, we have:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
+
+
+ .. table:: Parameters for ``summarise_bam_file()``
+
+ ================ ==================== =============================================
+ **Inputs** **Outputs** **Extra**
+ ================ ==================== =============================================
+ ``"a.bam"`` ``"a.statistics"`` ``"use_linear_model"``
+ ``"b.bam"`` ``"b.statistics"`` ``"use_linear_model"``
+ ``"c.bam"`` ``"c.statistics"`` ``"use_linear_model"``
+ ================ ==================== =============================================
+
+
+
+ **Extra** parameters are for the consumption of ``summarise_bam_file()`` and will not passed to the next task.
+
+ Ruffus was designed for pipelines which save intermediate data in files. This is not
+ compulsory but saving your data in files at each step provides many advantages:
+
+ #. Ruffus can use file system time stamps to check if your pipeline is up to date
+ #. Your data is persistent across runs
+ #. This is a good way to pass large amounts of data across processes and computational nodes
+
+ Nevertheless, *all* the :term:`task` parameters can include anything which suits your workflow, from lists of files, to numbers,
+ sets or tuples. *Ruffus* imposes few constraints on what *you*
+ would like to send to each stage of your pipeline.
+
+
+ *Ruffus* does, however, assume that if the **Input** and **Output** parameter contains strings, these will be interpreted as file names
+ required by and produced by that job. As we shall see, the modification times of these file names
+ indicate whether that part of the pipeline is up to date or needs to be rerun.
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/transform_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/transform_code.txt
new file mode 100644
index 0000000..2f2457d
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/transform_code.txt
@@ -0,0 +1,99 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.transform.code:
+
+##############################################################################################################
+|new_manual.introduction.chapter_num|: Python Code for Transforming data in a pipeline with ``@transform``
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.transform.chapter_num|: :ref:`Transforming data in a pipeline with @transform <new_manual.transform>`
+
+*******************************************
+Your first Ruffus script
+*******************************************
+
+ .. code-block:: python
+
+ #
+ # The starting data files would normally exist beforehand!
+ # We create some empty files for this example
+ #
+ starting_files = [("a.1.fastq", "a.2.fastq"),
+ ("b.1.fastq", "b.2.fastq"),
+ ("c.1.fastq", "c.2.fastq")]
+
+
+ for ff_pair in starting_files:
+ open(ff_pair[0], "w")
+ open(ff_pair[1], "w")
+
+
+ from ruffus import *
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".1.fastq"), # suffix = .1.fastq
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_files,
+ output_file):
+ # remember there are two input files now
+ ii1 = open(input_files[0])
+ ii2 = open(input_files[1])
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 2 sam->bam
+ #
+ @transform(map_dna_sequence, # Input = previous stage
+ suffix(".sam"), # suffix = .sam
+ ".bam") # Output suffix = .bam
+ def compress_sam_file(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ @transform(compress_sam_file, # Input = previous stage
+ suffix(".bam"), # suffix = .bam
+ ".statistics", # Output suffix = .statistics
+ "use_linear_model") # Extra statistics parameter
+ def summarise_bam_file(input_file,
+ output_file,
+ extra_stats_parameter):
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ pipeline_run()
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Job = [[b.1.fastq, b.2.fastq] -> b.sam] completed
+ Job = [[c.1.fastq, c.2.fastq] -> c.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [a.sam -> a.bam] completed
+ Job = [b.sam -> b.bam] completed
+ Job = [c.sam -> c.bam] completed
+ Completed Task = compress_sam_file
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/transform_in_parallel.txt b/doc/_build/html/_sources/tutorials/new_tutorial/transform_in_parallel.txt
new file mode 100644
index 0000000..57d0337
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/transform_in_parallel.txt
@@ -0,0 +1,394 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: transforming in parallel; Tutorial
+
+.. _new_manual.transform_in_parallel:
+
+######################################################################################################
+|new_manual.transform_in_parallel.chapter_num|: More on ``@transform``-ing data
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform <decorators.transform>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.transform_in_parallel.code`
+
+***************************************
+Review
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ *Ruffus* automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+ and tell *Ruffus* how these pipeline stages or :term:`task` functions are connected together.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **write down the file names of the data as it flows across your pipeline**
+ * **write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+ :ref:`new_manual.introduction` described the bare bones of a simple *Ruffus* pipeline.
+
+ Using the *Ruffus* :ref:`@transform <decorators.transform>` decorator, we were able to
+ specify the data files moving through our pipeline so that our specified task functions
+ could be invoked.
+
+ This may seem like a lot of effort and complication for something so simple: a couple of
+ simple python function calls we could have invoked ourselves.
+ However, By letting *Ruffus* manage your pipeline parameters, you will get the following features
+ for free:
+
+ #. Only out-of-date parts of the pipeline will be re-run
+ #. Multiple jobs can be run in parallel (on different processors if possible)
+ #. Pipeline stages can be chained together automatically. This means you can apply your
+ pipeline just as easily to 1000 files as to 3.
+
+
+
+************************************
+Running pipelines in parallel
+************************************
+ Even though three sets of files have been specified for our initial pipeline, and they can be
+ processed completely independently, by default *Ruffus* runs each of them serially in succession.
+
+ To ask *Ruffus* to run them in parallel, all you have to do is to add a ``multiprocess`` parameter to ``pipeline_run``:
+
+ ::
+
+ >>> pipeline_run(multiprocess = 5)
+
+ In this case, we are telling *Ruffus* to run a maximum of 5 jobs at the same time. Since we only have
+ three sets of data, that is as much parallelism as we are going to get...
+
+
+
+.. _new_manual.only_rerun_out_of_date:
+
+**************************************************
+Up-to-date jobs are not re-run unnecessarily
+**************************************************
+
+ A job will be run only if the output file timestamps are out of date.
+ If you ran our example code a second time, nothing would happen because all the work is already complete.
+
+ We can check the details by asking *Ruffus* for more ``verbose`` output
+
+ ::
+
+ >>> pipeline_run(verbose = 4)
+ Task = map_dna_sequence
+ All jobs up to date
+ Task = compress_sam_file
+ All jobs up to date
+ Task = summarise_bam_file
+ All jobs up to date
+
+
+ Nothing happens because:
+ * ``a.sam`` was created later than ``a.1.fastq`` and ``a.2.fastq``, and
+ * ``a.bam`` was created later than ``a.sam`` and
+ * ``a.statistics`` was created later than ``a.bam``.
+
+ and so on...
+
+
+ Let us see what happens if we recreated the file ``a.1.fastq`` so that it appears as if 1 out of the original data files is out of date
+ ::
+
+ open("a.1.fastq", "w")
+ pipeline_run(multiprocess = 5)
+
+
+ The up to date jobs are cleverly ignored and only the out of date files are reprocessed.
+
+ .. code-block:: pycon
+ :emphasize-lines: 3,4,7,8,11,12
+
+ >>> open("a.1.fastq", "w")
+ >>> pipeline_run(verbose=2)
+ Job = [[b.1.fastq, b.2.fastq] -> b.sam] # unnecessary: already up to date
+ Job = [[c.1.fastq, c.2.fastq] -> c.sam] # unnecessary: already up to date
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [b.sam -> b.bam] # unnecessary: already up to date
+ Job = [c.sam -> c.bam] # unnecessary: already up to date
+ Job = [a.sam -> a.bam] completed
+ Completed Task = compress_sam_file
+ Job = [b.bam -> b.statistics, use_linear_model] # unnecessary: already up to date
+ Job = [c.bam -> c.statistics, use_linear_model] # unnecessary: already up to date
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
+
+
+
+.. index::
+ pair: output_from; referring to functions before they are defined
+ pair: output_from; defining tasks out of order
+
+.. _new_manual.output_from:
+
+***************************************
+Defining pipeline tasks out of order
+***************************************
+
+ The examples so far assumes that all your pipelined tasks are defined in order.
+ (``first_task`` before ``second_task``). This is usually the most sensible way to arrange your code.
+
+ If you wish to refer to tasks which are not yet defined, you can do so by quoting the function name as a string and wrapping
+ it with the :ref:`indicator class <decorators.indicator_objects>` :ref:`output_from(...) <decorators.output_from>` so that *Ruffus*
+ knowns this is a :term:`task` name, not a file name
+
+ .. code-block:: python
+ :emphasize-lines: 5
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ # task name string wrapped in output_from(...)
+ @transform(output_from("first_task"), suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+ You can also refer to tasks (functions) in other modules, in which case the full
+ qualified name must be used:
+
+ ::
+
+ @transform(output_from("other_module.first_task"), suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ pass
+
+
+
+.. index::
+ pair: @transform; multiple dependencies
+
+.. _new_manual.transform.multiple_dependencies:
+
+***************************************
+Multiple dependencies
+***************************************
+
+ Each task can depend on more than one antecedent simply by chaining to a list in :ref:`@transform <decorators.transform>`
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # third_task depends on both first_task() and second_task()
+ #
+ @transform([first_task, second_task], suffix(".output.1"), ".output2")
+ def third_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ ``third_task()`` depends on and follows both ``first_task()`` and ``second_task()``. However, these latter two tasks are independent of each other
+ and can and will run in parallel. This can be clearly shown for our example if we added a little randomness to the run time of each job:
+
+ .. code-block:: python
+
+ time.sleep(random.random())
+
+ The execution of ``first_task()`` and ``second_task()`` jobs will be interleaved and they finish in no particular order:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+
+
+ .. note::
+
+ See the :ref:`example code <new_manual.transform.multiple_dependencies.code>`
+
+
+.. index::
+ pair: @follow; imposing order with
+
+.. _new_manual.follows:
+
+***************************************
+:ref:`@follows <decorators.follows>`
+***************************************
+
+ If there is some extrinsic reason one non-dependent task has to precede the other, then this can be specified explicitly using :ref:`@follows <decorators.follows>`:
+
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # @follows specifies a preceding task
+ #
+ @follows("first_task")
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+
+
+ :ref:`@follows <decorators.follows>` specifies either a preceding task (e.g. ``first_task``), or if
+ it has not yet been defined, the name (as a string) of a task function (e.g. ``"first_task"``).
+
+ With the addition of :ref:`@follows <decorators.follows>`, all the jobs
+ of ``second_task()`` start *after* those from ``first_task()`` have finished:
+
+ .. code-block:: pycon
+
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+
+
+
+.. index::
+ single: @follows; mkdir (Manual)
+ single: mkdir; @follows (Manual)
+
+.. _new_manual.follows.mkdir:
+
+************************************************************************************************************************************************************
+Making directories automatically with :ref:`@follows <decorators.follows>` and :ref:`mkdir <decorators.mkdir>`
+************************************************************************************************************************************************************
+
+ :ref:`@follows <decorators.follows>` is also useful for making sure one or more destination directories
+ exist before a task is run.
+
+ *Ruffus* provides special syntax to support this, using the special
+ :ref:`mkdir <decorators.mkdir>` indicator class. For example:
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # @follows specifies both a preceding task and a directory name
+ #
+ @follows("first_task", mkdir("output/results/here"))
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+
+ Before ``second_task()`` is run, the ``output/results/here`` directory will be created if necessary.
+
+
+.. index::
+ pair: inputs parameters; globs
+ pair: globs in input parameters; Tutorial
+
+.. _new_manual.globs_as_input:
+
+
+******************************************************************************
+Globs in the **Input** parameter
+******************************************************************************
+
+ * As a syntactic convenience, *Ruffus* also allows you to specify a |glob|_ pattern (e.g. ``*.txt``) in the
+ **Input** parameter.
+ * |glob|_ patterns will be automatically specify all matching file names as the **Input**.
+ * Any strings within **Input** which contain the letters: ``*?[]`` will be treated as a |glob|_ pattern.
+
+ The first function in our initial *Ruffus* pipeline example could have been written as:
+
+ .. code-block:: python
+ :emphasize-lines: 4
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform("*.fasta", # Input = glob
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ""
+
+
+.. index::
+ pair: Mixing tasks, globs and file names; Tutorial
+
+
+******************************************************************************
+Mixing Tasks and Globs in the **Input** parameter
+******************************************************************************
+
+ |glob|_ patterns, references to tasks and file names strings
+ can be mixed freely in (nested) python lists and tuples in the **Input** parameter.
+
+ For example, a task function can chain to the **Output** from multiple upstream tasks:
+
+ .. code-block:: python
+
+ @transform([task1, task2, # Input = multiple tasks
+ "aa*.fasta", + all files matching glob
+ "zz.fasta"] + file name
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ""
+
+ In all cases, *Ruffus* tries to do the right thing, and to make the simple or
+ obvious case require the simplest, least onerous syntax.
+
+ If sometimes *Ruffus* does not behave the way you expect, please write to the authors:
+ it may be a bug!
+
+ :ref:`new_manual.pipeline_printout` and
+ :ref:`new_manual.cmdline` will show you how to
+ to make sure that your intentions are reflected in *Ruffus* code.
+
diff --git a/doc/_build/html/_sources/tutorials/new_tutorial/transform_in_parallel_code.txt b/doc/_build/html/_sources/tutorials/new_tutorial/transform_in_parallel_code.txt
new file mode 100644
index 0000000..3160e2e
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/new_tutorial/transform_in_parallel_code.txt
@@ -0,0 +1,366 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.transform_in_parallel.code:
+
+######################################################################################################
+|new_manual.transform_in_parallel.chapter_num|: Python Code for More on ``@transform``-ing data
+######################################################################################################
+
+.. seealso::
+
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.transform_in_parallel.chapter_num|: :ref:`More on @transform-ing data and @originate <new_manual.transform_in_parallel>`
+
+*******************************************
+Producing several items / files per job
+*******************************************
+
+ ::
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @transform(first_task, suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+=============================
+Resulting Output
+=============================
+
+ ::
+
+ >>> pipeline_run([second_task])
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Completed Task = second_task
+
+
+
+*******************************************
+Defining tasks function out of order
+*******************************************
+
+ .. code-block:: python
+ :emphasize-lines: 22
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+
+ #---------------------------------------------------------------
+ #
+ # second task defined first
+ #
+ # task name string wrapped in output_from(...)
+ @transform(output_from("first_task"), suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+=============================
+Resulting Output
+=============================
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([second_task])
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Completed Task = second_task
+
+.. _new_manual.transform.multiple_dependencies.code:
+
+*******************************************
+Multiple dependencies
+*******************************************
+
+ .. code-block:: python
+ :emphasize-lines: 58
+
+ from ruffus import *
+ import time
+ import random
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+ second_task_params = [
+ ['job4.a.start', 'job4.b.start'],
+ ['job5.a.start', 'job5.b.start'],
+ ['job6.a.start', 'job6.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params + second_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+ #---------------------------------------------------------------
+ #
+ # third task
+ #
+ # depends on both first_task() and second_task()
+ @transform([first_task, second_task], suffix(".output.1"), ".output2")
+ def third_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([third_task], multiprocess = 6)
+
+=============================
+Resulting Output
+=============================
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Job = [[job4.a.output.1, job4.a.output.extra.1] -> job4.a.output2] completed
+ Job = [[job5.a.output.1, job5.a.output.extra.1] -> job5.a.output2] completed
+ Job = [[job6.a.output.1, job6.a.output.extra.1] -> job6.a.output2] completed
+ Completed Task = third_task
+
+
+*******************************************
+Multiple dependencies after @follows
+*******************************************
+
+ .. code-block:: python
+ :emphasize-lines: 31
+
+ from ruffus import *
+ import time
+ import random
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+ second_task_params = [
+ ['job4.a.start', 'job4.b.start'],
+ ['job5.a.start', 'job5.b.start'],
+ ['job6.a.start', 'job6.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params + second_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @follows("first_task")
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+ #---------------------------------------------------------------
+ #
+ # third task
+ #
+ # depends on both first_task() and second_task()
+ @transform([first_task, second_task], suffix(".output.1"), ".output2")
+ def third_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([third_task], multiprocess = 6)
+
+=======================================================================================
+Resulting Output: ``first_task`` completes before ``second_task``
+=======================================================================================
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Job = [[job4.a.output.1, job4.a.output.extra.1] -> job4.a.output2] completed
+ Job = [[job5.a.output.1, job5.a.output.extra.1] -> job5.a.output2] completed
+ Job = [[job6.a.output.1, job6.a.output.extra.1] -> job6.a.output2] completed
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/simple_tutorial.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/simple_tutorial.txt
new file mode 100644
index 0000000..113e126
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/simple_tutorial.txt
@@ -0,0 +1,71 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial:
+
+
+############################################################
+A simple tutorial: 8 steps to *Ruffus*
+############################################################
+
+***************************************
+Table of Contents
+***************************************
+
+============
+Features
+============
+
+The **Ruffus** provides automatic support for
+
+ * Managing dependencies
+ * Parallel jobs
+ * Re-starting from arbitrary points, especially after errors
+ * Display of the pipeline as a flowchart
+ * Reporting
+
+
+ | This tutorial has seven steps which cover all the core functionality of *Ruffus*.
+ | Don't worry if steps 1 and 2 seem a bit slow: Once you get used to **Ruffus**
+ steps 4-8 will be a breeze.
+
+ You can click on "previous" and "next" at the top and bottom of each page to
+ navigate through the tutorial.
+
+
+============================
+The first steps (1-4)
+============================
+
+ The first half of the tutorial will show you how to:
+
+ 1. :ref:`Chain tasks (functions) together into a pipeline <Simple_Tutorial_1st_step>`
+ 2. :ref:`Provide parameters to run jobs in parallel <Simple_Tutorial_2nd_step>`
+ 3. :ref:`Tracing through your new pipeline <Simple_Tutorial_3rd_step>`
+ 4. :ref:`Using flowcharts <Simple_Tutorial_4th_step_graphical>`
+
+============================
+A worked example (steps 5-8)
+============================
+
+ The second half of the tutorial is a worked example to calculate
+ the sample variance of 10,000 random numbers. This shows you how to:
+
+ 5. :ref:`Split up a large problem into smaller chunks <Simple_Tutorial_5th_step>`
+ 6. :ref:`Calculate partial solutions in parallel <Simple_Tutorial_6th_step>`
+ 7. :ref:`Re-combine the partial solutions into the final result <Simple_Tutorial_7th_step>`
+ 8. :ref:`Automatically signal the completion of each step of our pipeline <Simple_Tutorial_8th_step>`
+
+
+ This covers the core functionality of *Ruffus*.
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/simple_tutorial_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/simple_tutorial_code.txt
new file mode 100644
index 0000000..b562cd7
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/simple_tutorial_code.txt
@@ -0,0 +1,34 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_code:
+
+
+############################################################
+Code for the simple tutorial: 8 steps to *Ruffus*
+############################################################
+
+ * :ref:`A simple tutorial<Simple_Tutorial>`
+
+
+***************************************
+Table of Contents
+***************************************
+
+
+ :ref:`Chain tasks (functions) together into a pipeline <Simple_Tutorial_1st_step>`
+ :ref:`Provide parameters to run jobs in parallel <Simple_Tutorial_2nd_step>`
+ :ref:`Tracing through your new pipeline <Simple_Tutorial_3rd_step>`
+ :ref:`Using flowcharts <Simple_Tutorial_4th_step_graphical>`
+ :ref:`Split up a large problem into smaller chunks <Simple_Tutorial_5th_step>`
+ :ref:`Calculate partial solutions in parallel <Simple_Tutorial_6th_step>`
+ :ref:`Re-combine the partial solutions into the final result <Simple_Tutorial_7th_step>`
+ :ref:`Automatically signal the completion of each step of our pipeline <Simple_Tutorial_8th_step>`
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step1_follows.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step1_follows.txt
new file mode 100644
index 0000000..21a4ae1
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step1_follows.txt
@@ -0,0 +1,258 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_1st_step:
+
+ * :ref:`Simple tutorial overview <Simple_Tutorial>`
+
+###################################################################
+Step 1: An introduction to Ruffus pipelines
+###################################################################
+
+************************************
+Overview
+************************************
+
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="731.3pt"
+ height="83pt"
+ viewBox="0 0 731.3 83">
+ <defs id="defs3287">
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Lend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-1.1,0,0,-1.1,-1.1,0)" id="path4118" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Lend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.8,0,0,-0.8,-10,0)" id="path4100" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-4" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-2" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-7" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-23" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-3" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ </defs>
+ <g transform="translate(-14.608261,-32.693481)" id="layer1">
+ <rect width="89.826035" height="65.392792" x="21.063463" y="39.148708" id="rect3309" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.41040453" />
+ <text x="64.540756" y="62.738293" id="text3311" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="67.314194" y="62.738293" id="tspan3313" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align [...]
+ <text x="118.47811" y="104.62877" id="text4956" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="118.47811" y="104.62877" id="tspan4958" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;fon [...]
+ <text x="345.62097" y="104.98591" id="text4956-1" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="345.62097" y="104.98591" id="tspan4958-7" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000 [...]
+ <text x="575.62097" y="103.03347" id="text4956-2" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="575.62097" y="103.03347" id="tspan4958-3" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000 [...]
+ <path d="m 110.71429,72.362182 87.14285,0" id="path5080" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="206.10674" y="39.191959" id="rect3309-6" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="273.11218" y="65.772057" id="text3311-2-2" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="273.11218" y="65.772057" id="tspan3313-4-2" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;te [...]
+ <path d="m 338.57143,72.362177 87.14285,0" id="path5080-9" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="433.96387" y="39.191959" id="rect3309-6-4" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="500.96933" y="65.772057" id="text3311-2-2-1" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="500.96933" y="65.772057" id="tspan3313-4-2-1" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:norma [...]
+ <path d="m 566.42857,72.362178 87.14285,0" id="path5080-9-8" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <flowRoot id="flowRoot5373" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><flowRegion id="flowRegion5375"><rect width="56.42857" height="339.28571" x="214.28572" y="123.07647" id="rect5377" /></flowRegion><flowPara id="flowPara5379"></flowPara></flowR [...]
+ <text x="700.25507" y="65.071579" id="text3311-2-8" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="700.25507" y="65.071579" id="tspan3315-5-7" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;te [...]
+ </g>
+ </svg>
+
+ Computational pipelines transform your data in stages until the final result is produced. One easy way to understand pipelines is by imagining your data flowing across a series of pipes until it reaches its final destination. Even quite complicated processes can be simplified if we broke things down into simple stages. Of course, it helps if we can visualise the whole process.
+
+ Ruffus is a way of automating the plumbing in your pipeline: You supply the python functions which perform the data transformation, and tell Ruffus how these pipeline ``task`` functions are connected up. Ruffus will make sure that the right data flows down your pipeline in the right way at the right time.
+
+
+ .. note::
+
+ Ruffus refers to each stage of your pipeline as a :term:`task`.
+
+***************************************
+A gentle introduction to Ruffus syntax
+***************************************
+
+ | Let us start with the usual "Hello World" programme.
+ | We have the following two python functions which
+ we would like to turn into an automatic pipeline:
+
+
+ ::
+
+ def first_task():
+ print "Hello "
+
+ def second_task():
+ print "world"
+
+
+ The simplest **Ruffus** pipeline would look like this:
+
+ .. ::
+
+ from ruffus import *
+
+ def first_task():
+ print "Hello "
+
+ @follows(first_task)
+ def second_task():
+ print "world"
+
+ pipeline_run([second_task])
+
+
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="411pt"
+ height="166pt"
+ viewBox="0 0 411 166">
+ <rect width="193.46577" height="153.25462" x="113.1341" y="6.25" id="rect3523-1" style="fill:#eeffcc" /><g transform="matrix(0.74399708,0,0,0.74399708,123.4741,7.2693622)" id="g6703" style="font-size:14px;font-family:monospace"><text x="0" y="14" id="text6705" xml:space="preserve"><tspan id="tspan6707" style="font-weight:bold;fill:#008000">from</tspan> <tspan id="tspan6709" style="font-weight:bold;fill:#0e84b5">ruffus</tspan> <tspan id="tspan6711" style="font-weight:bold;fill:#00 [...]
+ <text x="0" y="33" id="text6715" xml:space="preserve" />
+ <text x="0" y="52" id="text6717" xml:space="preserve"><tspan id="tspan6719" style="font-weight:bold;fill:#008000">def</tspan> <tspan id="tspan6721" style="font-weight:bold;fill:#0060b0">first_task</tspan>():</text>
+ <text x="0" y="71" id="text6723" xml:space="preserve"> <tspan id="tspan6725" style="font-weight:bold;fill:#008000">print</tspan> "Hello "</text>
+ <text x="0" y="90" id="text6727" xml:space="preserve" />
+ <text x="0" y="109" id="text6729" xml:space="preserve"><tspan id="tspan6731" style="font-weight:bold;fill:#505050">@follows</tspan>(first_task)</text>
+ <text x="0" y="128" id="text6733" xml:space="preserve"><tspan id="tspan6735" style="font-weight:bold;fill:#008000">def</tspan> <tspan id="tspan6737" style="font-weight:bold;fill:#0060b0">second_task</tspan>():</text>
+ <text x="0" y="147" id="text6739" xml:space="preserve"> <tspan id="tspan6741" style="font-weight:bold;fill:#008000">print</tspan> "world"</text>
+ <text x="0" y="166" id="text6743" xml:space="preserve" />
+ <text x="0" y="185" id="text6745" xml:space="preserve">pipeline_run([second_task])</text>
+ <text x="0" y="204" id="text6747" xml:space="preserve" />
+ </g><g transform="matrix(0,-1.0740862,0.50028548,0,83.609122,151.75772)" id="g3645-7" style="fill:#ff0000;stroke:#ff0000;stroke-opacity:1"><line x1="125.896" y1="53.333" x2="125.896" y2="15.667" id="line3647-4" style="fill:#ff0000;stroke:#ff0000;stroke-opacity:1" /><g id="g3649-0" style="fill:#ff0000;stroke:#ff0000;stroke-opacity:1"><line stroke-miterlimit="10" x1="125.896" y1="49.028" x2="125.896" y2="15.667" id="line3651-9" style="fill:#ff0000;stroke:#ff0000;stroke-miterlimit:1 [...]
+ <path d="m 295.24733,142.14802 c 0,3.84316 -10.60785,6.95911 -23.6936,6.95911 H 138.38975 c -13.08581,0 -23.69366,-3.11595 -23.69366,-6.95911 l 0,0 c 0,-3.84321 10.60785,-6.95927 23.69366,-6.95927 h 133.16146 c 13.08587,0 23.69612,3.11606 23.69612,6.95927 l 0,0 z" id="path3671-9" style="fill:none;stroke:#ff0000;stroke-width:1.07262194;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none" /><path d="m 254.65378,15.578563 c 0,3.87736 -8.2265,7.02106 -18.37473,7.02106 H 133.00 [...]
+ <line style="fill:#ff0000;stroke:#ff0000;stroke-width:0.73304141;stroke-opacity:1" id="line3647-8" y2="83.510956" x2="78.575699" y1="83.510956" x1="97.419533" /><g transform="matrix(0,-1.0740862,0.50028548,0,70.737661,218.73401)" id="g3649-6" style="fill:#008000;stroke:#008000;stroke-opacity:1"><line style="fill:#008000;stroke:#008000;stroke-miterlimit:10;stroke-opacity:1" id="line3651-5" y2="15.667" x2="125.896" y1="49.028" x1="125.896" stroke-miterlimit="10" /><g id="g3653-0" s [...]
+ <text x="270.09064" y="129.21878" transform="scale(1.1082192,0.90234857)" id="text7608-0" xml:space="preserve" style="font-size:43.24214554px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="270.09064" y="129.21878" id="tspan7610-6">}</tspan></text>
+ <text x="330.33087" y="60.88369" id="text7633" xml:space="preserve" style="font-size:15.01670647px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="330.33087" y="60.88369" id="tspan7635" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#0000ff;font-fa [...]
+ <text x="6.2617145" y="89.451149" id="text7600" xml:space="preserve" style="font-size:15.01670647px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="6.2617145" y="89.451149" id="tspan7602" style="font-size:12.87146473px;fill:#008000;font-family:arial;-inkscape-font-specification:arial">2. D [...]
+ </svg>
+
+
+ The functions which do the actual work of each stage of the pipeline remain unchanged.
+ The role of **Ruffus** is to make sure these functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if desired.
+
+ There are three simple parts to building a **ruffus** pipeline
+
+ #. importing ruffus
+ #. "Decorating" functions which are part of the pipeline
+ #. Running the pipeline!
+
+.. index::
+ pair: decorators; Tutorial
+
+
+****************************
+"Decorators"
+****************************
+
+ You need to tag or :term:`decorator` existing code to tell **Ruffus** that they are part
+ of the pipeline.
+
+ .. note::
+
+ python :term:`decorator`\ s are ways to tag or mark out functions.
+
+ They start with a ``@`` prefix and take a number of parameters in parenthesis.
+
+ .. :: .. image:: ../../images/simple_tutorial_decorator_syntax.png
+
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="249.5pt" height="67.5pt" viewBox="0 0 249.5 67.5">
+ <g transform="scale(1)">
+ <rect x="4.5" y="14.667" fill="#eeffcc" stroke="#016735" stroke-width="0.25" stroke-miterlimit="10" width="157" height="52.833"/>
+ <rect x="3.25" y="14.667" fill="#eeffcc" width="159.5" height="52.833"/>
+ <text transform="matrix(1 0 0 1 14.5 33.6177)"><tspan x="0" y="0" font-family="'Courier'" font-weight="bold" font-size="12">@follows</tspan><tspan x="57.609" y="0" font-family="'Courier'" font-size="12">(first_task)</tspan><tspan x="0" y="14.4" fill="#006838" font-family="'Courier'" font-weight="bold" font-size="12">def</tspan><tspan x="21.604" y="14.4" font-family="'Courier'" font-size="12"> second_task():</tspan><tspan x="0" y="28.8" font-family="'Courier'" font-size= [...]
+ <path fill="none" stroke="#ED1C24" stroke-miterlimit="10" d="M73.25,29.762c0,4.688-3.731,8.488-8.333,8.488H18.083
+ c-4.602,0-8.333-3.8-8.333-8.488l0,0c0-4.688,3.731-8.488,8.333-8.488h46.834C69.519,21.274,73.25,25.075,73.25,29.762L73.25,29.762
+ z"/>
+ <g>
+ <g>
+ <line fill="none" stroke="#FF0000" stroke-miterlimit="10" x1="74.775" y1="20.142" x2="106" y2="7.5"/>
+ <g>
+ <path fill="#ED1C24" d="M71.978,21.274c1.514-0.044,3.484,0.127,4.854,0.6l-1.689-1.881l-0.095-2.526
+ C74.392,18.759,73.097,20.253,71.978,21.274z"/>
+ </g>
+ </g>
+ </g>
+ <text transform="matrix(1 0 0 1 107.75 11.5)" fill="#FF0000" " font-size="12">Decorator</text>
+ <text transform="matrix(1 0 0 1 170.75 50.75)"><tspan x="0" y="0" fill="#0000FF" font-size="12">Normal Python </tspan><tspan x="0" y="14.4" fill="#0000FF" font-size="12">Function</tspan></text>
+ <g>
+ <line fill="#0000FF" x1="166.5" y1="46.5" x2="147" y2="46.5"/>
+ <g>
+ <line fill="none" stroke="#0000FF" stroke-miterlimit="10" x1="166.5" y1="46.5" x2="150.018" y2="46.5"/>
+ <g>
+ <path fill="#0000FF" d="M147,46.5c1.42-0.527,3.182-1.426,4.273-2.378l-0.86,2.378l0.86,2.377
+ C150.182,47.925,148.42,47.026,147,46.5z"/>
+ </g>
+ </g>
+ </g>
+ </g>
+ </svg>
+
+ The **ruffus** decorator :ref:`@follows <decorators.follows>` makes sure that
+ ``second_task`` follows ``first_task``.
+
+
+ | Multiple :term:`decorator`\ s can be used for each :term:`task` function to add functionality
+ to *Ruffus* pipeline functions.
+ | However, the decorated python functions can still be
+ called normally, outside of *Ruffus*.
+ | *Ruffus* :term:`decorator`\ s can be added to (stacked on top of) any function in any order.
+
+ * :ref:`More on @follows in the in the Ruffus `Manual <manual.follows>`
+ * :ref:`@follows syntax in detail <decorators.follows>`
+
+
+.. index::
+ pair: pipeline_run; Tutorial
+
+****************************
+Running the pipeline
+****************************
+
+ We run the pipeline by specifying the **last** stage (:term:`task` function) of your pipeline.
+ Ruffus will know what other functions this depends on, following the appropriate chain of
+ dependencies automatically, making sure that the entire pipeline is up-to-date.
+
+ Because ``second_task`` depends on ``first_task``, both functions are executed in order.
+
+ ::
+
+ >>> pipeline_run([second_task], verbose = 1)
+
+ Ruffus by default prints out the ``verbose`` progress through the pipelined code,
+ interleaved with the **Hello** printed by ``first_task`` and **World** printed
+ by ``second_task``.
+
+
+
+ .. ::
+
+ >>> pipeline_run([second_task], verbose = 1)
+ Start Task = first_task
+ Hello
+ Job completed
+ Completed Task = first_task
+ Start Task = second_task
+ world
+ Job completed
+ Completed Task = second_task
+
+
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="375pt" height="108pt" viewBox="0 0 375 108">
+ <rect width="359.146" height="95.347786" x="7.8544765" y="6.3284979" id="rect3521" style="fill:none;stroke:#016735;stroke-width:0.18506026;stroke-miterlimit:10" /><rect width="362.35596" height="95.347786" x="6.2499924" y="6.3284979" id="rect3523" style="fill:#eeffcc" />
+ <text x="9.2210703" y="18.304934" id="text3345" xml:space="preserve" style="font-size:10.39404392px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="9.2210703" y="18.304934" id="tspan3347"><tspan id="tspan3365" style="font-weight:bold;fill:#ff0000;-inkscape-font-specification:Monospace [...]
+ <text x="392.0932" y="73.633965" transform="scale(0.78097325,1.2804536)" id="text3373" xml:space="preserve" style="font-size:17.92634964px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="392.0932" y="73.633965" id="tspan3375"> </tspan></text>
+ </svg>
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step2.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step2.txt
new file mode 100644
index 0000000..06c8f3b
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step2.txt
@@ -0,0 +1,517 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_2nd_step:
+
+.. index::
+ pair: @transform; Tutorial
+
+###################################################################
+Step 2: ``@transform``-ing data in a pipeline
+###################################################################
+
+ * :ref:`Simple tutorial overview <Simple_Tutorial>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 2 <Simple_Tutorial_2nd_step_code>`
+
+***************************************
+Overview
+***************************************
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="731.3pt"
+ height="83pt"
+ viewBox="0 0 731.3 83">
+ <defs id="defs3287">
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Lend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-1.1,0,0,-1.1,-1.1,0)" id="path4118" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Lend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.8,0,0,-0.8,-10,0)" id="path4100" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-4" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-2" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-7" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-23" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-3" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ </defs>
+ <g transform="translate(-14.608261,-32.693481)" id="layer1">
+ <rect width="89.826035" height="65.392792" x="21.063463" y="39.148708" id="rect3309" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.41040453" />
+ <text x="64.540756" y="62.738293" id="text3311" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="67.314194" y="62.738293" id="tspan3313" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align [...]
+ <text x="118.47811" y="104.62877" id="text4956" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="118.47811" y="104.62877" id="tspan4958" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;fon [...]
+ <text x="345.62097" y="104.98591" id="text4956-1" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="345.62097" y="104.98591" id="tspan4958-7" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000 [...]
+ <text x="575.62097" y="103.03347" id="text4956-2" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="575.62097" y="103.03347" id="tspan4958-3" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000 [...]
+ <path d="m 110.71429,72.362182 87.14285,0" id="path5080" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="206.10674" y="39.191959" id="rect3309-6" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="273.11218" y="65.772057" id="text3311-2-2" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="273.11218" y="65.772057" id="tspan3313-4-2" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;te [...]
+ <path d="m 338.57143,72.362177 87.14285,0" id="path5080-9" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="433.96387" y="39.191959" id="rect3309-6-4" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="500.96933" y="65.772057" id="text3311-2-2-1" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="500.96933" y="65.772057" id="tspan3313-4-2-1" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:norma [...]
+ <path d="m 566.42857,72.362178 87.14285,0" id="path5080-9-8" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <flowRoot id="flowRoot5373" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><flowRegion id="flowRegion5375"><rect width="56.42857" height="339.28571" x="214.28572" y="123.07647" id="rect5377" /></flowRegion><flowPara id="flowPara5379"></flowPara></flowR [...]
+ <text x="700.25507" y="65.071579" id="text3311-2-8" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="700.25507" y="65.071579" id="tspan3315-5-7" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;te [...]
+ </g>
+ </svg>
+
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ Ruffus automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+ and tell Ruffus how these pipeline stages or :term:`task` functions are connected together.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **write down the file names of the data as it flows across your pipeline**
+ * **write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+ By letting **Ruffus** manage your pipeline parameters, you will get the following features
+ for free:
+
+ #. only out-of-date parts of the pipeline will be re-run
+ #. multiple jobs can be run in parallel (on different processors if possible)
+ #. pipeline stages can be chained together automatically
+
+
+
+
+************************************
+ at transform
+************************************
+ Let us start with the simplest pipeline with a single *input* data file **transform**\ed
+ into a single *output* file. We will add some arbitrary extra parameters as well.
+
+ The :ref:`@transform <decorators.transform>` decorator tells Ruffus that this
+ task function **transforms** each and every piece of input data into a corresponding output.
+
+ In other words, inputs and outputs have a **1 to 1** relationship.
+
+ .. note::
+
+ In the second part of the tutorial, we will encounter more decorators which can *split up*, or *join together* or *group* inputs.
+
+ In other words, inputs and output can have **many to one**, **many to many** etc. relationships.
+
+
+
+ Let us provide **input**\s and **output**\s to our new pipeline:
+
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="385pt"
+ height="210pt"
+ viewBox="0 0 385 210">
+ <defs id="defs3568">
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4497" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Lend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.8,0,0,-0.8,-10,0)" id="path4473" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="TriangleOutL" style="overflow:visible">
+ <path d="m 5.77,0 -8.65,5 0,-10 8.65,5 z" transform="scale(0.8,0.8)" id="path4612" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Send" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-0.3,0,0,-0.3,0.69,0)" id="path4503" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Mend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.4,0,0,-0.4,-4,0)" id="path4479" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Lend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-1.1,0,0,-1.1,-1.1,0)" id="path4491" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ </defs>
+ <g transform="matrix(1.0077068,0,0,1,9.8598546,3.6099157)" id="g3519">
+ <rect width="359.146" height="174.006" x="6.244" y="27.667" id="rect3521" style="fill:none;stroke:#016735;stroke-width:0.25;stroke-miterlimit:10" />
+ <rect width="364.86499" height="174.006" x="3.385" y="27.667" id="rect3523" style="fill:#eeffcc" />
+ </g>
+ <path d="m 84.221497,82.954914 c 0,3.583 -3.942,6.488 -8.804,6.488 h -49.481 c -4.862,0 -8.803999,-2.905 -8.803999,-6.488 l 0,0 c 0,-3.582998 3.941999,-6.487998 8.803999,-6.487998 h 49.481 c 4.862,0 8.804,2.905 8.804,6.487998 l 0,0 z" id="path3641" style="opacity:0.2;fill:#ed1c24" />
+ <path d="m 190.77333,82.954914 c 0,3.583 -6.09517,6.488 -13.61411,6.488 h -76.51461 c -7.518948,0 -13.614113,-2.905 -13.614113,-6.488 l 0,0 c 0,-3.582998 6.095165,-6.487998 13.614113,-6.487998 h 76.51322 c 7.51894,0 13.6155,2.905 13.6155,6.487998 l 0,0 z" id="path3671" style="opacity:0.2;fill:#00a14b" />
+ <path d="m 368.12884,82.954914 c 0,3.583 -3.66127,6.488 -8.17701,6.488 h -45.95795 c -4.5166,0 -8.17787,-2.905 -8.17787,-6.488 l 0,0 c 0,-3.582998 3.66127,-6.487998 8.17787,-6.487998 h 45.95795 c 4.51574,0 8.17701,2.905 8.17701,6.487998 l 0,0 z" id="path3687" style="opacity:0.2;fill:#00a14b" />
+ <path d="m 300.38251,97.842914 c 0,3.406006 -2.537,6.167006 -5.667,6.167006 H 90.049497 c -3.129,0 -5.667,-2.761 -5.667,-6.167006 l 0,0 c 0,-3.406 2.537,-6.167 5.667,-6.167 H 294.71651 c 3.129,10e-4 5.666,2.762 5.666,6.167 l 0,0 z" id="path3689" style="opacity:0.2;fill:#00a14b" />
+ <text x="22.156881" y="58.294636" transform="scale(1.0042467,0.99577126)" id="text3295" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="138.88107" transform="scale(1.0042467,0.99577126)" id="text3317" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="179.17427" transform="scale(1.0042467,0.99577126)" id="text3327" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="206.03641" transform="scale(1.0042467,0.99577126)" id="text3331" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="54.504337" transform="scale(1.0042467,0.99577126)" id="text3285-5" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <g transform="matrix(1,0,0,1.442061,16.381543,-1.5326283)" id="g3645">
+ <line x1="125.896" y1="53.333" x2="125.896" y2="15.667" id="line3647" style="fill:#00ff00" />
+ <g id="g3649">
+ <line stroke-miterlimit="10" x1="125.896" y1="49.028" x2="125.896" y2="15.667" id="line3651" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3653">
+ <polygon points="128.888,48.153 125.897,53.333 122.905,48.153 " id="polygon3655" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <g transform="matrix(1,0,0,1.3096241,27.941781,-1.0861523)" id="g3659">
+ <line x1="267.23001" y1="70.667" x2="267.23001" y2="15.667" id="line3661" style="fill:#00ff00" />
+ <g id="g3663">
+ <line stroke-miterlimit="10" x1="267.23001" y1="66.361" x2="267.23001" y2="15.667" id="line3665" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3667">
+ <polygon points="270.222,65.486 267.23,70.667 264.238,65.486 " id="polygon3669" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <g transform="matrix(1,0,0,1.4502473,19.50593,-3.2189853)" id="g3675">
+ <line x1="313.56299" y1="53.333" x2="313.56299" y2="15.667" id="line3677" style="fill:#00ff00" />
+ <g id="g3679">
+ <line stroke-miterlimit="10" x1="313.56299" y1="49.028" x2="313.56299" y2="15.667" id="line3681" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3683">
+ <polygon points="316.556,48.153 313.564,53.333 310.572,48.153 " id="polygon3685" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <text x="4.0742145" y="13.589844" id="text3629" style="font-size:12px;fill:#ff0000;font-family:ArialMT">Decorator</text>
+ <text x="114.1965" y="15.776917" id="text3643" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Inputs</text>
+ <text x="207.08836" y="15.776917" id="text3657" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Extra parameters</text>
+ <text x="313.42374" y="15.776917" id="text3673" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Outputs</text>
+ <text x="22.156881" y="44.86356" transform="scale(1.0042467,0.99577126)" id="text3285" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3287" style="font-weight:bold;fill:#008000">from</tspan> <tspan id="tspan3289" style="font-weight:bold;fill:#0e84b5">ruffus</tspan> <tspan id="tspan3291" style="font-weight:bold;fill:#008000">import</tspan> <tspan id="tspan3293" style="fill:#303030">*</tspan></text>
+ <text x="22.156881" y="64.113289" transform="scale(1.0042467,0.99577126)" id="text3329-1" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace">first_task_params = 'job1.input'</text>
+ <text x="22.156881" y="86.186874" transform="scale(1.0042467,0.99577126)" id="text3297" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3299" style="font-weight:bold;fill:#505050">@transform</tspan>(first_task_params, <tspan id="tspan3548" style="font-weight:bold;fill:#ff0000">suffix</tspan>(".input"), ".output1", </text>
+ <text x="22.156881" y="99.61795" transform="scale(1.0042467,0.99577126)" id="text3301" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"> "some_extra.string.for_example", <tspan id="tspan3303" style="font-weight:bold;fill:#0000d0">14</tspan>)</text>
+ <text x="22.156881" y="113.04904" transform="scale(1.0042467,0.99577126)" id="text3305" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3307" style="font-weight:bold;fill:#008000">def</tspan> <tspan id="tspan3309" style="font-weight:bold;fill:#0060b0">first_task</tspan>(input_file, output_file,</text>
+ <text x="22.156881" y="126.48013" transform="scale(1.0042467,0.99577126)" id="text3311" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"> extra_parameter_str, extra_parameter_num):</text>
+ <text x="22.156881" y="139.91115" transform="scale(1.0042467,0.99577126)" id="text3313" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"> <tspan id="tspan3315" style="font-weight:bold;fill:#008000">pass</tspan></text>
+ <text x="22.156881" y="166.77328" transform="scale(1.0042467,0.99577126)" id="text3319" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3321" style="fill:#808080"># make sure the input file is there</tspan></text>
+ <text x="22.156881" y="180.20436" transform="scale(1.0042467,0.99577126)" id="text3323" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3325" style="fill:#007020">open</tspan>('job1.input', "w")</text>
+ <text x="22.156881" y="199.1844" transform="scale(1.0042467,0.99577126)" id="text3329" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace">pipeline_run([first_task])</text>
+ <path d="m 11.798134,17.75418 0,40.879392 11.506636,16.489166" id="path3696" style="fill:none;stroke:#ff0000;stroke-width:1.25536001;stroke-linecap:butt;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend)" />
+ </svg>
+
+
+ The ``@transform`` decorator tells Ruffus to generate the appropriate arguments for our python function:
+
+ * The input file name is as given: ``job1.input``
+ * The output file name is the input file name with its **suffix** of ``.input`` replaced with ``.output1``
+ * There are two extra parameters, a string and a number.
+
+ This is exactly equivalent to the following function call:
+
+ ::
+
+ first_task('job1.input', 'job1.output1', "some_extra.string.for_example", 14)
+
+
+ Even though this (empty) function doesn't do anything just yet, the output from **Ruffus** ``pipeline_run`` will show that that this part of the pipeline completed successfully:
+
+ .. raw:: html
+
+ <svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0" width="374.86499pt" height="92.02504pt" viewBox="0 0 374.86498 92.025041">
+ <rect width="359.146" height="58.154449" x="7.8589935" y="28.798326" id="rect3521" style="fill:none;stroke:#016735;stroke-width:0.14452712;stroke-miterlimit:10" /><rect width="364.86499" height="58.154449" x="4.9999938" y="28.798326" id="rect3523" style="fill:#eeffcc" />
+ <g transform="matrix(1,0,0,0.72872639,-16.352384,4.6212592)" id="g3645">
+ <line x1="125.896" y1="53.333" x2="125.896" y2="15.667" id="line3647" style="fill:#00ff00" />
+ <g id="g3649">
+ <line stroke-miterlimit="10" x1="125.896" y1="49.028" x2="125.896" y2="15.667" id="line3651" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3653">
+ <polygon points="128.888,48.153 125.897,53.333 122.905,48.153 " id="polygon3655" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <g transform="matrix(1,0,0,0.73146564,-0.38500643,4.5843285)" id="g3659">
+ <line x1="267.23001" y1="70.667" x2="267.23001" y2="15.667" id="line3661" style="fill:#00ff00" />
+ <g id="g3663">
+ <line stroke-miterlimit="10" x1="267.23001" y1="66.361" x2="267.23001" y2="15.667" id="line3665" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3667">
+ <polygon points="267.23,70.667 264.238,65.486 270.222,65.486 " id="polygon3669" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <path d="m 153.04485,51.262472 c 0,3.583 -4.15868,6.488 -9.28879,6.488 H 91.550787 c -5.130114,0 -9.288794,-2.905 -9.288794,-6.488 l 0,0 c 0,-3.583 4.15868,-6.488 9.288794,-6.488 h 52.204323 c 5.13012,0 9.28974,2.905 9.28974,6.488 l 0,0 z" id="path3671" style="opacity:0.2;fill:#00a14b" />
+ <g transform="matrix(1,0,0,0.72872639,-103.64072,4.6212592)" id="g3675">
+ <line x1="313.56299" y1="53.333" x2="313.56299" y2="15.667" id="line3677" style="fill:#00ff00" />
+ <g id="g3679">
+ <line stroke-miterlimit="10" x1="313.56299" y1="49.028" x2="313.56299" y2="15.667" id="line3681" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3683">
+ <polygon points="316.556,48.153 313.564,53.333 310.572,48.153 " id="polygon3685" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <path d="m 250.58388,51.262472 c 0,3.583 -4.83746,6.488 -10.80388,6.488 h -60.72201 c -5.96757,0 -10.80503,-2.905 -10.80503,-6.488 l 0,0 c 0,-3.583 4.83746,-6.488 10.80503,-6.488 H 239.78 c 5.96642,0 10.80388,2.905 10.80388,6.488 l 0,0 z" id="path3687" style="opacity:0.2;fill:#00a14b" />
+ <path d="m 295.61399,65.440811 c 0,3.406 -2.537,6.167 -5.667,6.167 H 85.280993 c -3.129,0 -5.667,-2.761 -5.667,-6.167 l 0,0 c 0,-3.406 2.537,-6.167 5.667,-6.167 H 289.94799 c 3.129,10e-4 5.666,2.762 5.666,6.167 l 0,0 z" id="path3689" style="opacity:0.2;fill:#00a14b" />
+ <text x="92.396126" y="13.742188" id="text3643" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Inputs</text>
+ <text x="193.83928" y="13.742188" id="text3673" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Outputs</text>
+ <text x="256.93237" y="13.742188" id="text3657" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Extra parameters</text>
+ <text x="9.3237839" y="40.037392" id="text3040" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace"><tspan id="tspan3042" style="font-weight:bold;fill:#ff0000">>>></tspan> pipeline_run([first_task])</text>
+ <text x="9.3237839" y="54.044189" id="text3046" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace"> Job <tspan id="tspan3048" style="fill:#666666">=</tspan> [job1<tspan id="tspan3050" style="fill:#666666">.</tspan>input <tspan id="tspan3052" style="fill:#666666">-></tspan> job1<tspan id="tspan3056" style="fill:#666666">.</tspan>output1,</text>
+ <text x="58.633194" y="67.193367" id="text3160" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace"> some_extra.string.for_example, 14] completed</text>
+ <text x="9.3237839" y="82.432899" id="text3058" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace">Completed Task <tspan id="tspan3060" style="fill:#666666">=</tspan> first_task</text>
+ </svg>
+
+
+
+************************************
+Task functions as recipes
+************************************
+ This may seem like a lot of effort and complication for something so simple: a normal python function call.
+ However, now that we have annotated a task, we can start using it as part of our computational pipeline:
+
+
+ Each :term:`task` function of the pipeline is a recipe or
+ `rule <http://www.gnu.org/software/make/manual/make.html#Rule-Introduction>`_
+ which can be applied repeatedly to our data.
+
+ For example, one can have
+
+ * a ``compile()`` *task* which will compile any number of source code files, or
+ * a ``count_lines()`` *task* which will count the number of lines in any file or
+ * an ``align_dna()`` *task* which will align the DNA of many chromosomes.
+
+ .. note ::
+
+ **Key Ruffus Terminology**:
+
+ A :term:`task` is an annotated python function which represents a recipe or stage of your pipeline.
+
+ A :term:`job` is each time your recipe is applied to a piece of data, i.e. each time Ruffus calls your function.
+
+ Each **task** or pipeline recipe can thus have many **jobs** each of which can work in parallel on different data.
+
+ In the original example, we have made a single output file by supplying a single input parameter.
+ We shall use much the same syntax to apply the same recipe to *multiple* input files.
+ Instead of providing a single *input*, and a single *output*, we are going to specify
+ the parameters for *three* jobs at once:
+
+ ::
+
+ # previously,
+ # first_task_params = 'job1.input'
+ first_task_params = [
+ 'job1.input',
+ 'job2.input'
+ 'job3.input'
+ ]
+
+ # make sure the input files are there
+ open('job1.input', "w")
+ open('job2.input', "w")
+ open('job3.input', "w")
+
+ pipeline_run([first_task])
+
+
+ .. :: .. image:: ../../images/simple_tutorial_files3.png
+
+
+
+ Just by changing the inputs from a single file to a list of three files, we now have a pipeline which runs independently on three pieces of data.
+ The results should look familiar:
+
+ ::
+
+ >>> pipeline_run([first_task])
+ Job = [job1.input -> job1.output1,
+ some_extra.string.for_example, 14] completed
+ Job = [job2.input -> job2.output1,
+ some_extra.string.for_example, 14] completed
+ Job = [job3.input -> job3.output1,
+ some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+
+
+************************************
+Multiple steps
+************************************
+
+ Best of all, it is easy to add another step to our initial pipeline.
+
+ We have to
+
+ * add another ``@transform`` decorated function (``second_task()``),
+ * specify ``first_task()`` as the source:
+ * use a ``suffix`` which matches the output from ``first_task()``
+
+
+ ::
+
+ @transform(first_task, suffix(".output1"), ".output2")
+ def second_task(input_file, output_file):
+ # make output file
+ open(output_file, "w")
+
+ * call ``pipeline_run()`` with the correct final task (``second_task()``)
+
+
+ The full source code can be found :ref:`here <Simple_Tutorial_2nd_step_code>`
+
+ With very little effort, we now have three independent pieces of information coursing through our pipeline.
+ Because ``second_task()`` *transforms* the output from ``first_task()``, it magically knows its dependencies and
+ that it too has to work on three jobs.
+
+
+************************************
+Multi-tasking
+************************************
+
+ Though, three jobs have been specified in parallel, **Ruffus** defaults to running
+ each of them successively. With modern CPUs, it is often a lot faster to run parts
+ of your pipeline in parallel, all at the same time.
+
+ To do this, all you have to do is to add a multiprocess parameter to pipeline_run:
+
+ ::
+
+ >>> pipeline_run([second_task], multiprocess = 5)
+
+ In this case, ruffus will try to run up to 5 jobs at the same time. Since our second
+ task only has three jobs, these will be started simultaneously.
+
+
+
+**************************************************
+Up-to-date jobs are not re-run unnecessarily
+**************************************************
+
+ A job will be run only if the output file timestamps are out of date.
+ If you ran the same code a second time,
+
+ ::
+
+ >>> pipeline_run([second_task])
+
+
+ Nothing would happen because:
+ * ``job1.output2`` is more recent than ``job1.output1`` and
+ * ``job2.output2`` is more recent than ``job2.output1`` and
+ * ``job3.output2`` is more recent than ``job3.output1``.
+
+ Let us see what happens when just 1 out of 3 pieces of data is modified
+ ::
+
+ open("job1.input1", "w")
+ pipeline_run([second_task], verbose =2, multiprocess = 5)
+
+
+ You would see that only the out of date jobs (highlighted) have been re-run:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6
+
+ >>> pipeline_run([second_task], verbose =2, multiprocess = 5)
+ Job = [job1.input -> job1.output1, some_extra.string.for_example, 14] completed
+ Job = [job3.input -> job3.output1, some_extra.string.for_example, 14] unnecessary: already up to date
+ Job = [job2.input -> job2.output1, some_extra.string.for_example, 14] unnecessary: already up to date
+ Completed Task = first_task
+ Job = [job1.output1 -> job1.output2] completed
+ Job = [job2.output1 -> job2.output2] unnecessary: already up to date
+ Job = [job3.output1 -> job3.output2] unnecessary: already up to date
+ Completed Task = second_task
+
+
+.. index::
+ pair: input / output parameters; Tutorial
+
+***************************************
+Intermediate files
+***************************************
+
+ In the above examples, the *input* and *output* parameters are file names.
+ Ruffus was designed for pipelines which save intermediate data in files. This is not
+ compulsory but saving your data in files at each step provides a few advantages:
+
+ #) Ruffus can use file system time stamps to check if your pipeline is up to date
+ #) Your data is persistent across runs
+ #) This is a good way to pass large amounts of data across processes and computational nodes
+
+ Otherwise, task parameters could be all sorts of data, from lists of files, to numbers,
+ sets or tuples. Ruffus imposes few constraints on what *you*
+ would like to send to each stage of your pipeline.
+
+ **Ruffus** does, however, assume that all strings in your *input* and *output*
+ parameters represent file names.
+
+ *input* parameters which contains a |glob|_ pattern (e.g. ``*.txt``) are expanded to the matching file names.
+
+
+***************************************
+ at transform is a 1 to 1 operation
+***************************************
+
+
+ ``@transform`` is a 1:1 operation because it keeps the number of jobs constant
+ entering and leaving the task. Each job can accept, for example, a pair of files as its input,
+ or generate more than one output files.
+
+ Let us see this in action using the previous example:
+ * ``first_task_params`` is changed to 3 *pairs* of file names
+ * ``@transform`` for ``first_task`` is modified to produce *pairs* of file names
+ * ``.output.1``
+ * ``.output.extra.1``
+
+
+ ::
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.input', 'job1.b.input'],
+ ['job2.a.input', 'job2.b.input'],
+ ['job3.a.input', 'job3.b.input'],
+ ]
+
+ for input_file_pairs in first_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".input"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pairs,
+ extra_parameter_str, extra_parameter_num):
+ # make both pairs of output files
+ for output_file in output_file_pairs:
+ open(output_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @transform(first_task, suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ # make output file
+ open(output_file, "w")
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+ This gives the following results:
+
+ ::
+
+ >>> pipeline_run([pipeline_task])
+
+
+ We see that apart from having a file pair where previously there was a single file,
+ little else has changed. We still have three pieces of data going through the
+ pipeline in three parallel jobs.
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step2_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step2_code.txt
new file mode 100644
index 0000000..6325cc7
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step2_code.txt
@@ -0,0 +1,72 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_2nd_step_code:
+
+###################################################################
+Code for Step 2: Passing parameters to the pipeline
+###################################################################
+* :ref:`Up <Simple_Tutorial>`
+* :ref:`Back <Simple_Tutorial_2nd_step>`
+* :ref:`@transform syntax <decorators.transform>` in detail
+
+************************************
+Code
+************************************
+::
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # Create input files
+ #
+ first_task_params = [
+ 'job1.input',
+ 'job2.input',
+ 'job3.input'
+ ]
+
+ for input_file in first_task_params:
+ open(input_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".input"), ".output1",
+ "some_extra.string.for_example", 14)
+ def first_task(input_file, output_file,
+ extra_parameter_str, extra_parameter_num):
+ # make output file
+ open(output_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @transform(first_task, suffix(".output1"), ".output2")
+ def second_task(input_file, output_file):
+ # make output file
+ open(output_file, "w")
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([second_task])
+ Job = [job1.input -> job1.output1, some_extra.string.for_example, 14] completed
+ Job = [job2.input -> job2.output1, some_extra.string.for_example, 14] completed
+ Job = [job3.input -> job3.output1, some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [job1.output1 -> job1.output2] completed
+ Job = [job2.output1 -> job2.output2] completed
+ Job = [job3.output1 -> job3.output2] completed
+ Completed Task = second_task
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step3_run_pipeline.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step3_run_pipeline.txt
new file mode 100644
index 0000000..5d31ab2
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step3_run_pipeline.txt
@@ -0,0 +1,161 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_3rd_step:
+
+
+
+###################################################################
+Step 3: Understanding how your pipeline works
+###################################################################
+ * :ref:`Simple tutorial overview <Simple_Tutorial>`
+ * :ref:`pipeline functions <pipeline_functions>` in detail
+
+
+.. note::
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 3 <Simple_Tutorial_3nd_step_code>`
+
+.. index::
+ pair: pipeline_printout; Tutorial
+
+
+
+The trickiest part of developing pipelines is understanding how your
+data flows through the pipeline.
+
+Parameters and files are passed from one task to another down the chain
+of pipelined functions.
+
+Whether you are learning how to use **ruffus**, or trying out a new
+feature in **ruffus**, or just have a horrendously complicated pipeline
+to debug (we have colleagues with >100 criss-crossing pipelined stages),
+your best friend is :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+
+
+
+=======================================
+Printing out which jobs will be run
+=======================================
+
+ :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` takes the same parameters as pipeline_run but just prints
+ the tasks which are and are not up-to-date.
+
+ The ``verbose`` parameter controls how much detail is displayed.
+
+ Let us take the two step :ref:`pipelined code<Simple_Tutorial_3nd_step_code>` we have previously written,
+ but call :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` instead of
+ :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`.
+ This lists the tasks which will be run in the pipeline:
+
+
+ .. image:: ../../images/simple_tutorial_pipeline_printout1.png
+
+ .. ::
+
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task])
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = first_task
+ Task = second_task
+ ________________________________________
+
+
+
+ To see the input and output parameters of each job in the pipeline, we can increase the verbosity from the default (1) to 3:
+
+ .. image:: ../../images/simple_tutorial_pipeline_printout2.png
+
+ .. ::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 3)
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = first_task
+ Job = [None
+ ->job1.stage1]
+ Job needs update: Missing file job1.stage1
+ Job = [None
+ ->job2.stage1]
+ Job needs update: Missing file job2.stage1
+
+ Task = second_task
+ Job = [job1.stage1
+ ->job1.stage2, 1st_job]
+ Job needs update: Missing file job1.stage1
+ Job = [job2.stage1
+ ->job2.stage2, 2nd_job]
+ Job needs update: Missing file job2.stage1
+
+ ________________________________________
+
+
+ This is very useful for checking that the input and output parameters have been specified
+ correctly.
+
+=============================================
+Determining which jobs are out-of-date or not
+=============================================
+
+ It is often useful to see which tasks are or are not up-to-date. For example, if we
+ were to run the pipeline in full, and then modify one of the intermediate files, the
+ pipeline would be partially out of date.
+
+
+ Let us start by run the pipeline in full but then modify ``job1.stage`` so that the second task is no longer up-to-date::
+
+ pipeline_run([second_task])
+
+ # modify job1.stage1
+ open("job1.stage1", "w").close()
+
+
+ At a verbosity of 5, even jobs which are up-to-date will be displayed.
+ We can now see that the there is only one job in ``second_task(...)`` which needs to be re-run
+ because ``job1.stage1`` has been modified after ``job1.stage2`` (highlighted in blue):
+
+
+ .. image:: ../../images/simple_tutorial_pipeline_printout3.png
+
+ .. ::
+
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 5)
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = first_task
+ Job = [None
+ ->job1.stage1]
+ Job up-to-date
+ Job = [None
+ ->job2.stage1]
+ Job up-to-date
+
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = second_task
+ Job = [job1.stage1
+ ->job1.stage2, 1st_job]
+ Job needs update:
+ Need update file times= [
+ [(1269025787.0, 'job1.stage1')],
+ [(1269025785.0, 'job1.stage2')] ]
+ Job = [job2.stage1
+ ->job2.stage2, 2nd_job]
+ Job up-to-date
+
+ ________________________________________
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step3_run_pipeline_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step3_run_pipeline_code.txt
new file mode 100644
index 0000000..3a8ec01
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step3_run_pipeline_code.txt
@@ -0,0 +1,112 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_3nd_step_code:
+
+
+###################################################################
+Code for Step 3: Displaying the pipeline visually
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`pipeline functions <pipeline_functions>` in detail
+* :ref:`Back to Step 3 <Simple_Tutorial_3rd_step>`
+
+******************************************
+Display the initial state of the pipeline
+******************************************
+ ::
+
+ from ruffus import *
+ import sys
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ task1_param = [
+ [ None, 'job1.stage1'], # 1st job
+ [ None, 'job2.stage1'], # 2nd job
+ ]
+
+ @files(task1_param)
+ def first_task(no_input_file, output_file):
+ open(output_file, "w")
+ #
+ # pretend we have worked hard
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ task2_param = [
+ [ 'job1.stage1', "job1.stage2", " 1st_job"], # 1st job
+ [ 'job2.stage1', "job2.stage2", " 2nd_job"], # 2nd job
+ ]
+
+ @follows(first_task)
+ @files(task2_param)
+ def second_task(input_file, output_file, extra_parameter):
+ open(output_file, "w")
+ print extra_parameter
+
+ pipeline_printout(sys.stdout, [second_task], verbose = 3)
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task])
+
+ Task = first_task
+ Job = [None -> job1.stage1]
+ Job = [None -> job2.stage1]
+
+ Task = second_task
+ Job = [job1.stage1 -> job1.stage2, 1st_job]
+ Job = [job2.stage1 -> job2.stage2, 2nd_job]
+
+******************************************
+Display the partially up-to-date pipeline
+******************************************
+ Run the pipeline, modify ``job1.stage`` so that the second task is no longer up-to-date
+ and printout the pipeline stage again::
+
+ pipeline_run([second_task])
+
+ # modify job1.stage1
+ open("job1.stage1", "w").close()
+
+
+ At a verbosity of 5, even jobs which are up-to-date will be displayed::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 5)
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = first_task
+ Job = [None
+ ->job1.stage1]
+ Job up-to-date
+ Job = [None
+ ->job2.stage1]
+ Job up-to-date
+
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = second_task
+ Job = [job1.stage1
+ ->job1.stage2, 1st_job]
+ Job needs update: Need update file times= [[(1269025787.0, 'job1.stage1')], [(1269025785.0,
+ 'job1.stage2')]]
+ Job = [job2.stage1
+ ->job2.stage2, 2nd_job]
+ Job up-to-date
+
+ ________________________________________
+
+ We can now see that the there is only one job in "second_task" which needs to be re-run
+ because 'job1.stage1' has been modified after 'job1.stage2'
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically.txt
new file mode 100644
index 0000000..a5b652e
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically.txt
@@ -0,0 +1,71 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_4th_step_graphical:
+
+
+
+###################################################################
+Step 4: Displaying the pipeline visually
+###################################################################
+ * :ref:`Simple tutorial overview <Simple_Tutorial>`
+ * :ref:`pipeline functions <pipeline_functions>` in detail
+
+.. note::
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 4 <Simple_Tutorial_4th_step_graphical_code>`
+
+.. index::
+ pair: pipeline_printout; Tutorial
+
+
+
+.. index::
+ pair: pipeline_printout_graph; Tutorial
+
+=============================================
+Printing out a flowchart of our pipeline
+=============================================
+
+ It is all very well being able to trace the data flow through the pipeline.
+ Sometimes, however, we need a bit of eye-candy.
+
+ .. csv-table::
+ :widths: 1,99
+ :class: borderless
+
+ ".. image:: ../../images/simple_tutorial_step4.png", "
+ We can see this flowchart of our fledgling pipeline by executing:
+ ::
+
+ pipeline_printout_graph ( 'flowchart.svg',
+ 'svg',
+ [second_task],
+ no_key_legend = False)
+
+ Flowcharts can be printed in a large number of formats including jpg, svg,
+ png and pdf provided that the ``dot`` programme from
+ `Graphviz <http://www.graphviz.org/>`_ is installed.
+
+ For this simple case, we have ommitted the legend key which distinguishes between the
+ different states of the various tasks. (See below for the legend key.)
+ "
+
+
+
+=============================================
+Horribly complicated pipelines!
+=============================================
+ Flowcharts are especially useful if you have really complicated pipelines, such as
+
+ .. image:: ../../images/simple_tutorial_complex_flowchart.png
+
+
+=============================================
+Circular dependency errors in pipelines!
+=============================================
+ Especially, if the pipeline is not set up properly, and vicious circular dependencies
+ are present:
+
+
+ .. image:: ../../images/simple_tutorial_complex_flowchart_error.png
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.txt
new file mode 100644
index 0000000..8fb1018
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.txt
@@ -0,0 +1,104 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_4th_step_graphical_code:
+
+
+###################################################################
+Code for Step 4: Displaying the pipeline visually
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`pipeline functions <pipeline_functions>` in detail
+* :ref:`Back to Step 4 <Simple_Tutorial_4th_step_graphical>`
+
+************************************
+Code
+************************************
+ ::
+
+ from ruffus import *
+ import time
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ task1_param = [
+ [ None, 'job1.stage1'], # 1st job
+ [ None, 'job2.stage1'], # 2nd job
+ ]
+
+ @files(task1_param)
+ def first_task(no_input_file, output_file):
+ open(output_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ task2_param = [
+ [ 'job1.stage1', "job1.stage2", " 1st_job"], # 1st job
+ [ 'job2.stage1', "job2.stage2", " 2nd_job"], # 2nd job
+ ]
+
+ @follows(first_task)
+ @files(task2_param)
+ def second_task(input_file, output_file, extra_parameter):
+ open(output_file, "w")
+ print extra_parameter
+
+ #---------------------------------------------------------------
+ #
+ # Show flow chart and tasks before running the pipeline
+ #
+ print "Show flow chart and tasks before running the pipeline"
+ pipeline_printout_graph ( open("flowchart_before.png", "w"),
+ "png",
+ [second_task],
+ no_key_legend=True)
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+ # modify job1.stage1
+ open("job1.stage1", "w").close()
+
+
+ #---------------------------------------------------------------
+ #
+ # Show flow chart and tasks after running the pipeline
+ #
+ print "Show flow chart and tasks after running the pipeline"
+ pipeline_printout_graph ( open("flowchart_after.png", "w"),
+ "png",
+ [second_task],
+ no_key_legend=True)
+
+
+************************************
+Resulting Flowcharts
+************************************
+ +-------------------------------------------------------------+-----------------------------------------------------------------------+
+ | .. image:: ../../images/simple_tutorial_stage4_before.png | .. image:: ../../images/simple_tutorial_stage4_after.png |
+ | :alt: Before running the pipeline | :alt: After running the pipeline |
+ | :scale: 50 | :scale: 50 |
+ | :align: center | :align: center |
+ | | |
+ | .. centered:: Before | .. centered:: After |
+ | | |
+ +-------------------------------------------------------------+-----------------------------------------------------------------------+
+
+ +-------------------------------------------------------------------------------------------------------------------------------------+
+ | .. image:: ../../images/tutorial_key.jpg |
+ | :alt: Legend key |
+ | :scale: 75 |
+ | :align: center |
+ | |
+ | .. centered:: Legend |
+ | |
+ +-------------------------------------------------------------------------------------------------------------------------------------+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step5_split.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step5_split.txt
new file mode 100644
index 0000000..8dec152
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step5_split.txt
@@ -0,0 +1,112 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_5th_step:
+.. _tutorial.split:
+.. index::
+ pair: @split; Tutorial
+
+
+###################################################################
+Step 5: Splitting up large tasks / files
+###################################################################
+ * :ref:`Simple tutorial overview <Simple_Tutorial>`
+ * :ref:`@split in detail <decorators.split>`
+
+ .. note::
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 5 <Simple_Tutorial_5th_step_code>`
+
+ | The second half of this tutorial is a worked example to calculate
+ the sample variance of 10,000 random numbers.
+ | This is similar to many computational projects: we are tackling a big problem
+ by splitting it up into many tiny problems solved in parallel. We can then
+ merge our piecemeal solutions into our final answer. These
+ `embarassingly parallel <http://en.wikipedia.org/wiki/Embarrassingly_parallel>`_
+ problems motivated the original design of **Ruffus**.
+
+ **Ruffus** has three dedicated decorators to handle these problems with ease:
+
+ * :ref:`@split<decorators.split>` to break up the big problem
+ * :ref:`@transfrom<decorators.split>` to solve the parts in parallel
+ * :ref:`@merge<decorators.split>` to merge our piecemeal solutions into the final answer.
+
+
+**************************************************************************************
+Splitting up a long list of random numbers to calculate their variance
+**************************************************************************************
+
+ .. csv-table::
+ :widths: 1,99
+ :class: borderless
+
+ ".. centered::
+ Step 5 from:
+
+ .. image:: ../../images/simple_tutorial_step5.png", "
+ Suppose we had a list of 100,000 random numbers in the file ``random_numbers.list``:
+
+ ::
+
+ import random
+ f = open('random_numbers.list', 'w')
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write('%g\n' % (random.random() * 100.0))
+
+
+ We might want to calculate the sample variance more quickly by splitting them
+ into ``NNN`` parcels of 1000 numbers each and working on them in parallel.
+ In this case we known that ``NNN == 100`` but usually the number of resulting files
+ is only apparent after we have finished processing our starting file."
+
+
+ Our pipeline function needs to take the random numbers file ``random_numbers.list``,
+ read the random numbers from it, and write to a new file every 100 lines.
+
+ The *Ruffus* decorator :ref:`@split<decorators.split>` is designed specifically for
+ splitting up input into an indeterminate ``NNN`` number of output files:
+
+ .. image:: ../../images/simple_tutorial_split.png
+
+ .. ::
+
+ ::
+
+ @split("random_numbers.list", "*.chunks")
+ def step_5_split_numbers_into_chunks (input_file_name, output_files):
+ #
+ """code goes here"""
+
+
+ Ruffus will set
+
+ | ``input_file_name`` to ``"random_numbers.list"``
+ | ``output_files`` to all files which match ``*.chunks`` (i.e. ``"1.chunks"``, ``"2.chunks"`` etc.).
+
+ The first time you run this function ``*.chunks`` will return an empty list because
+ no ``.chunks`` files have been created, resulting in the following:
+
+ ::
+
+ step_5_split_numbers_into_chunks ("random_numbers.list", [])
+
+ After that ``*.chunks`` will match the list of current ``.chunks`` files created by
+ the previous pipeline run. Some of these files will be out of date or superfluous.
+ These file names are usually only useful for removing detritus from previous runs
+ (have a look at :ref:`step_5_split_numbers_into_chunks(...) <Simple_Tutorial_5th_step_code>`).
+
+ .. note ::
+
+ The great value of specifying correctly the list of *output* files will become apparent in the next
+ step of this tutorial when we shall see how pipeline tasks can be "chained" together conveniently.
+
+ Remember to specify ``globs`` patterns which match *all* the files you are splitting up. You can
+ cover different directories, or groups of file names by using a list of ``globs``:
+ e.g. ::
+
+ @split("input.file", ['a*.bits', 'b*.pieces', 'somewhere_else/c*.stuff'])
+ def split_function (input_filename, output_files):
+ "Code to split up 'input.file'"
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step5_split_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step5_split_code.txt
new file mode 100644
index 0000000..9afe18c
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step5_split_code.txt
@@ -0,0 +1,81 @@
+.. _Simple_Tutorial_5th_step_code:
+.. include:: ../../global.inc
+
+###################################################################
+Code for Step 5: Splitting up large tasks / files
+###################################################################
+ * :ref:`Simple tutorial overview <Simple_Tutorial>`
+ * :ref:`@split in detail <decorators.split>`
+ * :ref:`back to step 5 <Simple_Tutorial_5th_step>`
+
+************************************
+Code
+************************************
+ ::
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ from ruffus import *
+ import time
+
+ import random
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @files(None, "random_numbers.list")
+ def create_random_numbers(input_file_name, output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # Split initial file
+ #
+ @follows(create_random_numbers)
+ @split("random_numbers.list", "*.chunks")
+ def step_5_split_numbers_into_chunks (input_file_name, output_files):
+ """
+ Splits random numbers file into XXX files of CHUNK_SIZE each
+ """
+ #
+ # clean up files from previous runs
+ #
+ for f in glob.glob("*.chunks"):
+ os.unlink(f)
+ #
+ # create new file every CHUNK_SIZE lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ pipeline_run([step_5_split_numbers_into_chunks], verbose = 2)
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([step_5_split_numbers_into_chunks], verbose = 2)
+
+ Start Task = create_random_numbers
+
+ Job = [None -> random_numbers.list] Missing file random_numbers.list
+ Job = [None -> random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Start Task = step_5_split_numbers_into_chunks
+ Splits random numbers file into XXX files of CHUNK_SIZE each
+ Job = [random_numbers.list -> *.chunks] Missing output file
+ Job = [random_numbers.list -> *.chunks] completed
+ Completed Task = step_5_split_numbers_into_chunks
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step6_transform.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step6_transform.txt
new file mode 100644
index 0000000..7845c62
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step6_transform.txt
@@ -0,0 +1,89 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_6th_step:
+.. _tutorial.transform:
+
+.. index::
+ pair: @transform; Tutorial
+
+
+
+###################################################################
+Step 6: Running jobs in parallel
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`@transform in detail <decorators.transform>`
+
+.. note::
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 6 <Simple_Tutorial_6th_step_code>`
+
+**************************************************************************************
+Calculating sums and sum of squares in parallel
+**************************************************************************************
+ Now that we have many smaller lists of numbers in separate files, we can calculate their sums and
+ sum of squares in parallel.
+
+ All we need is a function which takes a ``*.chunk`` file, reads the numbers, calculates
+ the answers and writes them back out to a corresponding ``*.sums`` file.
+
+ *Ruffus* magically takes care of applying this task function to all the different
+ data files in parallel.
+
+ .. image:: ../../images/simple_tutorial_transform.png
+
+ .. ::
+ ::
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(step_5_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_6_calculate_sum_of_squares (input_file_name, output_file_name):
+ #
+ # calculate sums and sums of squares for all values in the input_file_name
+ # writing to output_file_name
+ ""
+
+
+
+ | The first thing to note about this example is that the *input* files are not specified
+ as a |glob|_ (e.g. ``*.chunk``) but as the preceding task.
+ | *Ruffus* will take all
+ the files produced by ``step_5_split_numbers_into_chunks()`` and feed them as the *input*
+ into step 6.
+
+ This handy shortcut also means that **Ruffus** knows that ``step_6_calculate_sum_of_squares``
+ depends on ``step_5_split_numbers_into_chunks`` and an additional ``@follows`` directive
+ is unnecessary.
+
+ The use of :ref:`suffix<decorators.transform.suffix_string>` within the decorator tells
+ *Ruffus* to take all *input* files with the ``.chunks`` suffix and substitute a ``.sums``
+ suffix to generate the corresponding *output* file name.
+
+
+ Thus if ``step_5_split_numbers_into_chunks`` created
+ ::
+
+ "1.chunks"
+ "2.chunks"
+ "3.chunks"
+
+ This would result in the following function calls:
+
+ ::
+
+ step_6_calculate_sum_of_squares ("1.chunk", "1.sums")
+ step_6_calculate_sum_of_squares ("2.chunk", "2.sums")
+ step_6_calculate_sum_of_squares ("3.chunk", "3.sums")
+
+ # etc...
+
+
+
+ .. note::
+
+ It is possible to generate *output* filenames using more powerful regular expressions
+ as well. See the :ref:`@transform <decorators.transform>` syntax documentation for more details.
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step6_transform_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step6_transform_code.txt
new file mode 100644
index 0000000..2521c87
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step6_transform_code.txt
@@ -0,0 +1,104 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_6th_step_code:
+
+###################################################################
+Code for Step 6: Running jobs in parallel
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`@transform in detail <decorators.transform>`
+* :ref:`back to step 6 <Simple_Tutorial_6th_step>`
+
+************************************
+Code
+************************************
+ ::
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ from ruffus import *
+ import time
+
+ import random
+ import glob
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @files(None, "random_numbers.list")
+ def create_random_numbers(input_file_name, output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # Split initial file
+ #
+ @follows(create_random_numbers)
+ @split("random_numbers.list", "*.chunks")
+ def step_5_split_numbers_into_chunks (input_file_name, output_files):
+ """
+ Splits random numbers file into XXX files of CHUNK_SIZE each
+ """
+ #
+ # clean up files from previous runs
+ #
+ for f in glob.glob("*.chunks"):
+ os.unlink(f)
+ #
+ #
+ # create new file every CHUNK_SIZE lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(step_5_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_6_calculate_sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+ pipeline_run([step_6_calculate_sum_of_squares], verbose = 1)
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([step_6_calculate_sum_of_squares], verbose = 1)
+ Job = [None -> random_numbers.list] unnecessary: already up to date
+ Completed Task = create_random_numbers
+ Job = [random_numbers.list -> *.chunks] unnecessary: already up to date
+ Completed Task = step_5_split_numbers_into_chunks
+ Job = [6.chunks -> 6.sums] completed
+ Job = [1.chunks -> 1.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Completed Task = step_6_calculate_sum_of_squares
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step7_merge.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step7_merge.txt
new file mode 100644
index 0000000..d2752f5
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step7_merge.txt
@@ -0,0 +1,80 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_7th_step:
+
+.. index::
+ pair: @merge; Tutorial
+
+
+###################################################################
+Step 7: Merging results back together
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`@merge in detail <decorators.merge>`
+
+.. note::
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 7 <Simple_Tutorial_7th_step_code>`
+
+
+Now that we have all the partial solutions in ``*.sums``, we can merge them
+together to generate the final answer: the variance of all 100,000 random
+numbers.
+
+**************************************************************************************
+Calculating variances from the sums and sum of squares of all chunks
+**************************************************************************************
+
+ If we add up all the sums, and sum of squares we calculated previously, we can
+ obtain the variance as follows::
+
+ variance = (sum_squared - sum * sum / N)/N
+
+ where ``N`` is the number of values
+
+ See the `wikipedia <http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ entry for a discussion of
+ why this is a very naive approach!
+
+ To do this, all we have to do is merge together all the values in ``*.sums``, i.e.
+ add up the ``sums`` and ``sum_squared`` for each chunk. We can then apply the above (naive) formula.
+
+ Merging files is straightforward in **Ruffus**:
+
+ .. image:: ../../images/simple_tutorial_merge1.png
+
+ .. ::
+
+ ::
+
+ @merge(step_6_calculate_sum_of_squares, "variance.result")
+ def step_7_calculate_variance (input_file_names, output_file_name):
+ #
+ # add together sums and sums of squares from each input_file_name
+ # calculate variance and write to output_file_name
+ ""
+
+
+ The :ref:`@merge <decorators.merge>` decorator tells *Ruffus* to take all the files from the step 6 task (i.e. ``*.sums``),
+ and produced a merged file in the form of ``"variance.result"``.
+
+ Thus if ``step_6_calculate_sum_of_squares`` created
+ | ``1.sums`` and
+ | ``2.sums`` etc.
+
+ This would result in the following function call:
+
+ .. image:: ../../images/simple_tutorial_merge2.png
+
+ .. ::
+
+ ::
+
+ step_7_calculate_variance (["1.sums", "2.sums"], "variance.result")
+
+
+ The final result is, of course, in ``"variance.result"``.
+
+
+
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step7_merge_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step7_merge_code.txt
new file mode 100644
index 0000000..279919f
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step7_merge_code.txt
@@ -0,0 +1,138 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_7th_step_code:
+
+###################################################################
+Code for Step 7: Merging results back together
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`@merge in detail <decorators.merge>`
+* :ref:`back to step 7 <Simple_Tutorial_7th_step>`
+
+************************************
+Code
+************************************
+ ::
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ from ruffus import *
+ import time
+
+ import random
+ import glob
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @files(None, "random_numbers.list")
+ def create_random_numbers(input_file_name, output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # Split initial file
+ #
+ @follows(create_random_numbers)
+ @split("random_numbers.list", "*.chunks")
+ def step_5_split_numbers_into_chunks (input_file_name, output_files):
+ """
+ Splits random numbers file into XXX files of CHUNK_SIZE each
+ """
+ #
+ # clean up files from previous runs
+ #
+ for f in glob.glob("*.chunks"):
+ os.unlink(f)
+ #
+ # create new file every CHUNK_SIZE lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(step_5_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_6_calculate_sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk
+ #
+ @merge(step_6_calculate_sum_of_squares, "variance.result")
+ def step_7_calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ output = open(output_file_name, "w")
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = map(float, open(input_file_name).readlines())
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ print >>output, variance
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([step_7_calculate_variance], [create_random_numbers], verbose = 1)
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ pipeline_run([step_7_calculate_variance], [create_random_numbers], verbose = 1)
+ Job = [None -> random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Job = [random_numbers.list -> *.chunks] completed
+ Completed Task = step_5_split_numbers_into_chunks
+ Job = [6.chunks -> 6.sums] completed
+ Job = [1.chunks -> 1.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Completed Task = step_6_calculate_sum_of_squares
+ Job = [[6.sums, 5.sums, 1.sums, 4.sums, 3.sums, 2.sums, 8.sums, 7.sums, 10.sums, 9.sums] -> variance.result] completed
+ Completed Task = step_7_calculate_variance
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step8_posttask.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step8_posttask.txt
new file mode 100644
index 0000000..ff98bbf
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step8_posttask.txt
@@ -0,0 +1,119 @@
+.. include:: ../../global.inc
+.. index::
+ pair: @posttask; Tutorial
+
+.. _Simple_Tutorial_8th_step:
+
+
+###################################################################
+Step 8: Signal the completion of each stage of our pipeline
+###################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`@posttask<decorators.posttask>` in detail
+
+.. note::
+ Remember to look at the example code:
+
+ * :ref:`Python Code for step 8 <Simple_Tutorial_8th_step_code>`
+
+Let us finish by celebrating the success of our modest pipeline example.
+
+**************************************************************************************
+Running some code to show that a stage of the pipeline has finished
+**************************************************************************************
+
+ A common requirement is to take some extra action when a particular
+ :term:`task` or stage of a pipeline is complete.
+
+ This can range from printing out some message, or ``touching`` some sentinel file,
+ to emailing the author.
+
+ This is particular useful if the :term:`task` is a recipe apply to an unspecified number
+ of parameters in parallel in different :term:`job`\s
+
+ The "extra action" can be added to a *Ruffus* pipeline using the :ref:`@posttask<decorators.posttask>`
+ decorator.
+
+ Let us print a "hooray" message to show that we have finished calculating variances.
+
+ .. image:: ../../images/simple_tutorial_posttask.png
+
+ .. ::
+
+ ::
+
+ def print_hooray():
+ sys.stdout.write("hooray\n")
+
+ @posttask(print_hooray)
+ @merge(step_6_calculate_sum_of_squares, "variance.result")
+ def step_7_calculate_variance (input_file_names, output_file_name):
+ ""
+
+ This is such a short function, we can even write it in-line:
+
+ ::
+
+ @posttask(lambda: sys.stdout.write("hooray\n"))
+ @merge(step_6_calculate_sum_of_squares, "variance.result")
+ def step_7_calculate_variance (input_file_names, output_file_name):
+ ""
+
+.. index::
+ single: @posttask; touchfile (Tutorial)
+ single: touchfile ; @posttask (Tutorial)
+
+**************************************************************************************
+*Touching* a sentinel file after finishing a pipeline stage
+**************************************************************************************
+ | Very often we would like to mark the competion of a pipeline stage by using the
+ date/time stamp of a "sentinel" file.
+ | This is such a common requirement that *Ruffus* even has special syntax for this
+ in the form of :ref:`touch_file<decorators.touch_file>`.
+
+
+ ::
+
+ @posttask(touch_file("sentinel_flag"))
+ def your_pipeline_function (input_file_names, output_file_name):
+ ""
+
+ The file ``sentinel_flag`` will be created (if it did not exist) or its
+ date/time stamp changed to the current time whenever this stage of the pipeline is
+ completed.
+
+
+**************************************************************************************
+Adding several post task actions
+**************************************************************************************
+ You can, of course, add more than one different action to be taken on completion of the
+ task, either by stacking up :ref:`@posttask<decorators.posttask>` decorators or by including
+ several functions in the same **@posttask**:
+
+ ::
+
+ @posttask(print_hooray, print_whopee)
+ @posttask(touch_file("sentinel_flag"))
+ def your_pipeline_function (input_file_names, output_file_name):
+ ""
+
+**************************************************************************************
+Finding out more about **Ruffus**
+**************************************************************************************
+
+ This wraps up our short tutorial on the **Ruffus**.
+
+ Here are a few useful topics you may be interested in:
+
+ * :ref:`How to summarise disparate input by category <manual.collate>`
+ * :ref:`How to log pipeline progress <manual.logging>`
+ * :ref:`How exceptions are handled <manual.exceptions>`
+
+ To find out more about **Ruffus**, you can read the :ref:`manual<manual.introduction>`
+ or just start using **Ruffus**.
+
+ Email the authors at ruffus_lib at llew.org.uk if you have any comments or suggestions.
+
+ Happy pipelining!
+
+
diff --git a/doc/_build/html/_sources/tutorials/simple_tutorial/step8_posttask_code.txt b/doc/_build/html/_sources/tutorials/simple_tutorial/step8_posttask_code.txt
new file mode 100644
index 0000000..fbecb00
--- /dev/null
+++ b/doc/_build/html/_sources/tutorials/simple_tutorial/step8_posttask_code.txt
@@ -0,0 +1,162 @@
+.. include:: ../../global.inc
+.. _Simple_Tutorial_8th_step_code:
+
+#########################################################################
+Code for Step 8: Signal the completion of each stage of our pipeline
+#########################################################################
+* :ref:`Simple tutorial overview <Simple_Tutorial>`
+* :ref:`@posttask in detail <decorators.posttask>`
+* :ref:`back to step 8 <Simple_Tutorial_8th_step>`
+
+************************************
+Code
+************************************
+ ::
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+ working_dir = "temp_tutorial8/"
+
+
+
+ import time, sys, os
+ from ruffus import *
+
+ import random
+ import glob
+
+
+
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @follows(mkdir(working_dir))
+ @files(None, working_dir + "random_numbers.list")
+ def create_random_numbers(input_file_name, output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # Split initial file
+ #
+ @follows(create_random_numbers)
+ @split(working_dir + "random_numbers.list", working_dir + "*.chunks")
+ def step_5_split_numbers_into_chunks (input_file_name, output_files):
+ """
+ Splits random numbers file into XXX files of CHUNK_SIZE each
+ """
+ #
+ # clean up files from previous runs
+ #
+ for f in glob.glob("*.chunks"):
+ os.unlink(f)
+ #
+ # create new file every CHUNK_SIZE lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open(working_dir + "%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(step_5_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_6_calculate_sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+
+ def print_hooray_again():
+ print "hooray again"
+
+ def print_whoppee_again():
+ print "whoppee again"
+
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk
+ #
+ @posttask(lambda: sys.stdout.write("hooray\n"))
+ @posttask(print_hooray_again, print_whoppee_again, touch_file("done"))
+ @merge(step_6_calculate_sum_of_squares, "variance.result")
+ def step_7_calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ output = open(output_file_name, "w")
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = map(float, open(input_file_name).readlines())
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ print >>output, variance
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([step_7_calculate_variance], verbose = 1)
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >> pipeline_run([step_7_calculate_variance], verbose = 1)
+ Make directories [temp_tutorial8/] completed
+ Completed Task = create_random_numbers_mkdir_1
+ Job = [None -> temp_tutorial8/random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Job = [temp_tutorial8/random_numbers.list -> temp_tutorial8/*.chunks] completed
+ Completed Task = step_5_split_numbers_into_chunks
+ Job = [temp_tutorial8/1.chunks -> temp_tutorial8/1.sums] completed
+ Job = [temp_tutorial8/10.chunks -> temp_tutorial8/10.sums] completed
+ Job = [temp_tutorial8/2.chunks -> temp_tutorial8/2.sums] completed
+ Job = [temp_tutorial8/3.chunks -> temp_tutorial8/3.sums] completed
+ Job = [temp_tutorial8/4.chunks -> temp_tutorial8/4.sums] completed
+ Job = [temp_tutorial8/5.chunks -> temp_tutorial8/5.sums] completed
+ Job = [temp_tutorial8/6.chunks -> temp_tutorial8/6.sums] completed
+ Job = [temp_tutorial8/7.chunks -> temp_tutorial8/7.sums] completed
+ Job = [temp_tutorial8/8.chunks -> temp_tutorial8/8.sums] completed
+ Job = [temp_tutorial8/9.chunks -> temp_tutorial8/9.sums] completed
+ Completed Task = step_6_calculate_sum_of_squares
+ Job = [[temp_tutorial8/1.sums, temp_tutorial8/10.sums, temp_tutorial8/2.sums, temp_tutorial8/3.sums, temp_tutorial8/4.sums, temp_tutorial8/5.sums, temp_tutorial8/6.sums, temp_tutorial8/7.sums, temp_tutorial8/8.sums, temp_tutorial8/9.sums] -> variance.result] completed
+ hooray again
+ whoppee again
+ hooray
+ Completed Task = step_7_calculate_variance
+
diff --git a/doc/_build/html/_sources/why_ruffus.txt b/doc/_build/html/_sources/why_ruffus.txt
new file mode 100644
index 0000000..3e26396
--- /dev/null
+++ b/doc/_build/html/_sources/why_ruffus.txt
@@ -0,0 +1,37 @@
+.. Design:
+
+.. include:: global.inc
+
+.. index::
+ pair: Ruffus; Etymology
+ pair: Ruffus; Name origins
+
+.. _design.why_ruffus:
+
+###############################
+Why *Ruffus*?
+###############################
+
+**Cylindrophis ruffus** is the name of the
+`red-tailed pipe snake <http://en.wikipedia.org/wiki/Cylindrophis_ruffus>`_ (bad python-y pun)
+which can be found in `Hong Kong <http://www.discoverhongkong.com/eng/index.html>`_ where the original author comes from.
+
+
+*Ruffus* is a shy creature, and pretends to be a cobra or a `banded krait <http://en.wikipedia.org/wiki/File:Bandedkrait.jpg>`__ by putting up its red tail and ducking its
+head in its coils when startled.
+
++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| .. image:: images/wikimedia_cyl_ruffus.jpg | .. image:: images/wikimedia_bandedkrait.jpg |
+| | :scale: 77 |
++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| * Not venomous | * Deadly poisonous |
+| * `Mostly Harmless <http://en.wikipedia.org/wiki/Mostly_Harmless>`_ | * `Seriously unfriendly <http://en.wikipedia.org/wiki/List_of_races_and_species_in_The_Hitchhiker's_Guide_to_the_Galaxy#Ravenous_Bugblatter_Beast_of_Traal>`_ |
++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
+Be careful not to step on one when running down country park lanes at full speed
+in Hong Kong: this snake is a `rare breed <http://www.hkras.org/eng/info/hkspp.htm>`_!
+
+*Ruffus* does most of its work at night and sleeps during the day: typical of many (but alas not all) python programmers!
+
+The original `red-tail pipe <http://upload.wikimedia.org/wikipedia/commons/a/a1/Cyl_ruffus_061212_2025_tdp.jpg>`__ and `banded krait <http://en.wikipedia.org/wiki/File:AB_054_Banded_Krait.JPG>`__ images are from wikimedia.
diff --git a/doc/_build/html/_static/ajax-loader.gif b/doc/_build/html/_static/ajax-loader.gif
new file mode 100644
index 0000000..61faf8c
Binary files /dev/null and b/doc/_build/html/_static/ajax-loader.gif differ
diff --git a/doc/_build/html/_static/basic.css b/doc/_build/html/_static/basic.css
new file mode 100644
index 0000000..a04c8e1
--- /dev/null
+++ b/doc/_build/html/_static/basic.css
@@ -0,0 +1,540 @@
+/*
+ * basic.css
+ * ~~~~~~~~~
+ *
+ * Sphinx stylesheet -- basic theme.
+ *
+ * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/* -- main layout ----------------------------------------------------------- */
+
+div.clearer {
+ clear: both;
+}
+
+/* -- relbar ---------------------------------------------------------------- */
+
+div.related {
+ width: 100%;
+ font-size: 90%;
+}
+
+div.related h3 {
+ display: none;
+}
+
+div.related ul {
+ margin: 0;
+ padding: 0 0 0 10px;
+ list-style: none;
+}
+
+div.related li {
+ display: inline;
+}
+
+div.related li.right {
+ float: right;
+ margin-right: 5px;
+}
+
+/* -- sidebar --------------------------------------------------------------- */
+
+div.sphinxsidebarwrapper {
+ padding: 10px 5px 0 10px;
+}
+
+div.sphinxsidebar {
+ float: left;
+ width: 230px;
+ margin-left: -100%;
+ font-size: 90%;
+}
+
+div.sphinxsidebar ul {
+ list-style: none;
+}
+
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points {
+ margin-left: 20px;
+ list-style: square;
+}
+
+div.sphinxsidebar ul ul {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+div.sphinxsidebar form {
+ margin-top: 10px;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar #searchbox input[type="text"] {
+ width: 170px;
+}
+
+div.sphinxsidebar #searchbox input[type="submit"] {
+ width: 30px;
+}
+
+img {
+ border: 0;
+}
+
+/* -- search page ----------------------------------------------------------- */
+
+ul.search {
+ margin: 10px 0 0 20px;
+ padding: 0;
+}
+
+ul.search li {
+ padding: 5px 0 5px 20px;
+ background-image: url(file.png);
+ background-repeat: no-repeat;
+ background-position: 0 7px;
+}
+
+ul.search li a {
+ font-weight: bold;
+}
+
+ul.search li div.context {
+ color: #888;
+ margin: 2px 0 0 30px;
+ text-align: left;
+}
+
+ul.keywordmatches li.goodmatch a {
+ font-weight: bold;
+}
+
+/* -- index page ------------------------------------------------------------ */
+
+table.contentstable {
+ width: 90%;
+}
+
+table.contentstable p.biglink {
+ line-height: 150%;
+}
+
+a.biglink {
+ font-size: 1.3em;
+}
+
+span.linkdescr {
+ font-style: italic;
+ padding-top: 5px;
+ font-size: 90%;
+}
+
+/* -- general index --------------------------------------------------------- */
+
+table.indextable {
+ width: 100%;
+}
+
+table.indextable td {
+ text-align: left;
+ vertical-align: top;
+}
+
+table.indextable dl, table.indextable dd {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+table.indextable tr.pcap {
+ height: 10px;
+}
+
+table.indextable tr.cap {
+ margin-top: 10px;
+ background-color: #f2f2f2;
+}
+
+img.toggler {
+ margin-right: 3px;
+ margin-top: 3px;
+ cursor: pointer;
+}
+
+div.modindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+div.genindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+/* -- general body styles --------------------------------------------------- */
+
+a.headerlink {
+ visibility: hidden;
+}
+
+h1:hover > a.headerlink,
+h2:hover > a.headerlink,
+h3:hover > a.headerlink,
+h4:hover > a.headerlink,
+h5:hover > a.headerlink,
+h6:hover > a.headerlink,
+dt:hover > a.headerlink {
+ visibility: visible;
+}
+
+div.body p.caption {
+ text-align: inherit;
+}
+
+div.body td {
+ text-align: left;
+}
+
+.field-list ul {
+ padding-left: 1em;
+}
+
+.first {
+ margin-top: 0 !important;
+}
+
+p.rubric {
+ margin-top: 30px;
+ font-weight: bold;
+}
+
+img.align-left, .figure.align-left, object.align-left {
+ clear: left;
+ float: left;
+ margin-right: 1em;
+}
+
+img.align-right, .figure.align-right, object.align-right {
+ clear: right;
+ float: right;
+ margin-left: 1em;
+}
+
+img.align-center, .figure.align-center, object.align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.align-left {
+ text-align: left;
+}
+
+.align-center {
+ text-align: center;
+}
+
+.align-right {
+ text-align: right;
+}
+
+/* -- sidebars -------------------------------------------------------------- */
+
+div.sidebar {
+ margin: 0 0 0.5em 1em;
+ border: 1px solid #ddb;
+ padding: 7px 7px 0 7px;
+ background-color: #ffe;
+ width: 40%;
+ float: right;
+}
+
+p.sidebar-title {
+ font-weight: bold;
+}
+
+/* -- topics ---------------------------------------------------------------- */
+
+div.topic {
+ border: 1px solid #ccc;
+ padding: 7px 7px 0 7px;
+ margin: 10px 0 10px 0;
+}
+
+p.topic-title {
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 10px;
+}
+
+/* -- admonitions ----------------------------------------------------------- */
+
+div.admonition {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ padding: 7px;
+}
+
+div.admonition dt {
+ font-weight: bold;
+}
+
+div.admonition dl {
+ margin-bottom: 0;
+}
+
+p.admonition-title {
+ margin: 0px 10px 5px 0px;
+ font-weight: bold;
+}
+
+div.body p.centered {
+ text-align: center;
+ margin-top: 25px;
+}
+
+/* -- tables ---------------------------------------------------------------- */
+
+table.docutils {
+ border: 0;
+ border-collapse: collapse;
+}
+
+table.docutils td, table.docutils th {
+ padding: 1px 8px 1px 5px;
+ border-top: 0;
+ border-left: 0;
+ border-right: 0;
+ border-bottom: 1px solid #aaa;
+}
+
+table.field-list td, table.field-list th {
+ border: 0 !important;
+}
+
+table.footnote td, table.footnote th {
+ border: 0 !important;
+}
+
+th {
+ text-align: left;
+ padding-right: 5px;
+}
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px;
+}
+
+table.citation td {
+ border-bottom: none;
+}
+
+/* -- other body styles ----------------------------------------------------- */
+
+ol.arabic {
+ list-style: decimal;
+}
+
+ol.loweralpha {
+ list-style: lower-alpha;
+}
+
+ol.upperalpha {
+ list-style: upper-alpha;
+}
+
+ol.lowerroman {
+ list-style: lower-roman;
+}
+
+ol.upperroman {
+ list-style: upper-roman;
+}
+
+dl {
+ margin-bottom: 15px;
+}
+
+dd p {
+ margin-top: 0px;
+}
+
+dd ul, dd table {
+ margin-bottom: 10px;
+}
+
+dd {
+ margin-top: 3px;
+ margin-bottom: 10px;
+ margin-left: 30px;
+}
+
+dt:target, .highlighted {
+ background-color: #fbe54e;
+}
+
+dl.glossary dt {
+ font-weight: bold;
+ font-size: 1.1em;
+}
+
+.field-list ul {
+ margin: 0;
+ padding-left: 1em;
+}
+
+.field-list p {
+ margin: 0;
+}
+
+.refcount {
+ color: #060;
+}
+
+.optional {
+ font-size: 1.3em;
+}
+
+.versionmodified {
+ font-style: italic;
+}
+
+.system-message {
+ background-color: #fda;
+ padding: 5px;
+ border: 3px solid red;
+}
+
+.footnote:target {
+ background-color: #ffa;
+}
+
+.line-block {
+ display: block;
+ margin-top: 1em;
+ margin-bottom: 1em;
+}
+
+.line-block .line-block {
+ margin-top: 0;
+ margin-bottom: 0;
+ margin-left: 1.5em;
+}
+
+.guilabel, .menuselection {
+ font-family: sans-serif;
+}
+
+.accelerator {
+ text-decoration: underline;
+}
+
+.classifier {
+ font-style: oblique;
+}
+
+abbr, acronym {
+ border-bottom: dotted 1px;
+ cursor: help;
+}
+
+/* -- code displays --------------------------------------------------------- */
+
+pre {
+ overflow: auto;
+ overflow-y: hidden; /* fixes display issues on Chrome browsers */
+}
+
+td.linenos pre {
+ padding: 5px 0px;
+ border: 0;
+ background-color: transparent;
+ color: #aaa;
+}
+
+table.highlighttable {
+ margin-left: 0.5em;
+}
+
+table.highlighttable td {
+ padding: 0 0.5em 0 0.5em;
+}
+
+tt.descname {
+ background-color: transparent;
+ font-weight: bold;
+ font-size: 1.2em;
+}
+
+tt.descclassname {
+ background-color: transparent;
+}
+
+tt.xref, a tt {
+ background-color: transparent;
+ font-weight: bold;
+}
+
+h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt {
+ background-color: transparent;
+}
+
+.viewcode-link {
+ float: right;
+}
+
+.viewcode-back {
+ float: right;
+ font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+ margin: -1px -10px;
+ padding: 0 10px;
+}
+
+/* -- math display ---------------------------------------------------------- */
+
+img.math {
+ vertical-align: middle;
+}
+
+div.body div.math p {
+ text-align: center;
+}
+
+span.eqno {
+ float: right;
+}
+
+/* -- printout stylesheet --------------------------------------------------- */
+
+ at media print {
+ div.document,
+ div.documentwrapper,
+ div.bodywrapper {
+ margin: 0 !important;
+ width: 100%;
+ }
+
+ div.sphinxsidebar,
+ div.related,
+ div.footer,
+ #top-link {
+ display: none;
+ }
+}
\ No newline at end of file
diff --git a/doc/_build/html/_static/comment-bright.png b/doc/_build/html/_static/comment-bright.png
new file mode 100644
index 0000000..551517b
Binary files /dev/null and b/doc/_build/html/_static/comment-bright.png differ
diff --git a/doc/_build/html/_static/comment-close.png b/doc/_build/html/_static/comment-close.png
new file mode 100644
index 0000000..09b54be
Binary files /dev/null and b/doc/_build/html/_static/comment-close.png differ
diff --git a/doc/_build/html/_static/comment.png b/doc/_build/html/_static/comment.png
new file mode 100644
index 0000000..92feb52
Binary files /dev/null and b/doc/_build/html/_static/comment.png differ
diff --git a/doc/_build/html/_static/default.css b/doc/_build/html/_static/default.css
new file mode 100644
index 0000000..e534a07
--- /dev/null
+++ b/doc/_build/html/_static/default.css
@@ -0,0 +1,256 @@
+/*
+ * default.css_t
+ * ~~~~~~~~~~~~~
+ *
+ * Sphinx stylesheet -- default theme.
+ *
+ * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+ at import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: sans-serif;
+ font-size: 100%;
+ background-color: #11303d;
+ color: #000;
+ margin: 0;
+ padding: 0;
+}
+
+div.document {
+ background-color: #1c4e63;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 0 0 0 230px;
+}
+
+div.body {
+ background-color: #ffffff;
+ color: #000000;
+ padding: 0 20px 30px 20px;
+}
+
+div.footer {
+ color: #ffffff;
+ width: 100%;
+ padding: 9px 0 9px 0;
+ text-align: center;
+ font-size: 75%;
+}
+
+div.footer a {
+ color: #ffffff;
+ text-decoration: underline;
+}
+
+div.related {
+ background-color: #133f52;
+ line-height: 30px;
+ color: #ffffff;
+}
+
+div.related a {
+ color: #ffffff;
+}
+
+div.sphinxsidebar {
+}
+
+div.sphinxsidebar h3 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.4em;
+ font-weight: normal;
+ margin: 0;
+ padding: 0;
+}
+
+div.sphinxsidebar h3 a {
+ color: #ffffff;
+}
+
+div.sphinxsidebar h4 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.3em;
+ font-weight: normal;
+ margin: 5px 0 0 0;
+ padding: 0;
+}
+
+div.sphinxsidebar p {
+ color: #ffffff;
+}
+
+div.sphinxsidebar p.topless {
+ margin: 5px 10px 10px 10px;
+}
+
+div.sphinxsidebar ul {
+ margin: 10px;
+ padding: 0;
+ color: #ffffff;
+}
+
+div.sphinxsidebar a {
+ color: #98dbcc;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+
+
+/* -- hyperlink styles ------------------------------------------------------ */
+
+a {
+ color: #355f7c;
+ text-decoration: none;
+}
+
+a:visited {
+ color: #355f7c;
+ text-decoration: none;
+}
+
+a:hover {
+ text-decoration: underline;
+}
+
+
+
+/* -- body styles ----------------------------------------------------------- */
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: 'Trebuchet MS', sans-serif;
+ background-color: #f2f2f2;
+ font-weight: normal;
+ color: #20435c;
+ border-bottom: 1px solid #ccc;
+ margin: 20px -20px 10px -20px;
+ padding: 3px 0 3px 10px;
+}
+
+div.body h1 { margin-top: 0; font-size: 200%; }
+div.body h2 { font-size: 160%; }
+div.body h3 { font-size: 140%; }
+div.body h4 { font-size: 120%; }
+div.body h5 { font-size: 110%; }
+div.body h6 { font-size: 100%; }
+
+a.headerlink {
+ color: #c60f0f;
+ font-size: 0.8em;
+ padding: 0 4px 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ background-color: #c60f0f;
+ color: white;
+}
+
+div.body p, div.body dd, div.body li {
+ text-align: justify;
+ line-height: 130%;
+}
+
+div.admonition p.admonition-title + p {
+ display: inline;
+}
+
+div.admonition p {
+ margin-bottom: 5px;
+}
+
+div.admonition pre {
+ margin-bottom: 5px;
+}
+
+div.admonition ul, div.admonition ol {
+ margin-bottom: 5px;
+}
+
+div.note {
+ background-color: #eee;
+ border: 1px solid #ccc;
+}
+
+div.seealso {
+ background-color: #ffc;
+ border: 1px solid #ff6;
+}
+
+div.topic {
+ background-color: #eee;
+}
+
+div.warning {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre {
+ padding: 5px;
+ background-color: #eeffcc;
+ color: #333333;
+ line-height: 120%;
+ border: 1px solid #ac9;
+ border-left: none;
+ border-right: none;
+}
+
+tt {
+ background-color: #ecf0f3;
+ padding: 0 1px 0 1px;
+ font-size: 0.95em;
+}
+
+th {
+ background-color: #ede;
+}
+
+.warning tt {
+ background: #efc2c2;
+}
+
+.note tt {
+ background: #d6d6d6;
+}
+
+.viewcode-back {
+ font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+ background-color: #f4debf;
+ border-top: 1px solid #ac9;
+ border-bottom: 1px solid #ac9;
+}
\ No newline at end of file
diff --git a/doc/_build/html/_static/doctools.js b/doc/_build/html/_static/doctools.js
new file mode 100644
index 0000000..8614442
--- /dev/null
+++ b/doc/_build/html/_static/doctools.js
@@ -0,0 +1,235 @@
+/*
+ * doctools.js
+ * ~~~~~~~~~~~
+ *
+ * Sphinx JavaScript utilities for all documentation.
+ *
+ * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/**
+ * select a different prefix for underscore
+ */
+$u = _.noConflict();
+
+/**
+ * make the code below compatible with browsers without
+ * an installed firebug like debugger
+if (!window.console || !console.firebug) {
+ var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
+ "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
+ "profile", "profileEnd"];
+ window.console = {};
+ for (var i = 0; i < names.length; ++i)
+ window.console[names[i]] = function() {};
+}
+ */
+
+/**
+ * small helper function to urldecode strings
+ */
+jQuery.urldecode = function(x) {
+ return decodeURIComponent(x).replace(/\+/g, ' ');
+};
+
+/**
+ * small helper function to urlencode strings
+ */
+jQuery.urlencode = encodeURIComponent;
+
+/**
+ * This function returns the parsed url parameters of the
+ * current request. Multiple values per key are supported,
+ * it will always return arrays of strings for the value parts.
+ */
+jQuery.getQueryParameters = function(s) {
+ if (typeof s == 'undefined')
+ s = document.location.search;
+ var parts = s.substr(s.indexOf('?') + 1).split('&');
+ var result = {};
+ for (var i = 0; i < parts.length; i++) {
+ var tmp = parts[i].split('=', 2);
+ var key = jQuery.urldecode(tmp[0]);
+ var value = jQuery.urldecode(tmp[1]);
+ if (key in result)
+ result[key].push(value);
+ else
+ result[key] = [value];
+ }
+ return result;
+};
+
+/**
+ * highlight a given string on a jquery object by wrapping it in
+ * span elements with the given class name.
+ */
+jQuery.fn.highlightText = function(text, className) {
+ function highlight(node) {
+ if (node.nodeType == 3) {
+ var val = node.nodeValue;
+ var pos = val.toLowerCase().indexOf(text);
+ if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) {
+ var span = document.createElement("span");
+ span.className = className;
+ span.appendChild(document.createTextNode(val.substr(pos, text.length)));
+ node.parentNode.insertBefore(span, node.parentNode.insertBefore(
+ document.createTextNode(val.substr(pos + text.length)),
+ node.nextSibling));
+ node.nodeValue = val.substr(0, pos);
+ }
+ }
+ else if (!jQuery(node).is("button, select, textarea")) {
+ jQuery.each(node.childNodes, function() {
+ highlight(this);
+ });
+ }
+ }
+ return this.each(function() {
+ highlight(this);
+ });
+};
+
+/**
+ * Small JavaScript module for the documentation.
+ */
+var Documentation = {
+
+ init : function() {
+ this.fixFirefoxAnchorBug();
+ this.highlightSearchWords();
+ this.initIndexTable();
+ },
+
+ /**
+ * i18n support
+ */
+ TRANSLATIONS : {},
+ PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; },
+ LOCALE : 'unknown',
+
+ // gettext and ngettext don't access this so that the functions
+ // can safely bound to a different name (_ = Documentation.gettext)
+ gettext : function(string) {
+ var translated = Documentation.TRANSLATIONS[string];
+ if (typeof translated == 'undefined')
+ return string;
+ return (typeof translated == 'string') ? translated : translated[0];
+ },
+
+ ngettext : function(singular, plural, n) {
+ var translated = Documentation.TRANSLATIONS[singular];
+ if (typeof translated == 'undefined')
+ return (n == 1) ? singular : plural;
+ return translated[Documentation.PLURALEXPR(n)];
+ },
+
+ addTranslations : function(catalog) {
+ for (var key in catalog.messages)
+ this.TRANSLATIONS[key] = catalog.messages[key];
+ this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
+ this.LOCALE = catalog.locale;
+ },
+
+ /**
+ * add context elements like header anchor links
+ */
+ addContextElements : function() {
+ $('div[id] > :header:first').each(function() {
+ $('<a class="headerlink">\u00B6</a>').
+ attr('href', '#' + this.id).
+ attr('title', _('Permalink to this headline')).
+ appendTo(this);
+ });
+ $('dt[id]').each(function() {
+ $('<a class="headerlink">\u00B6</a>').
+ attr('href', '#' + this.id).
+ attr('title', _('Permalink to this definition')).
+ appendTo(this);
+ });
+ },
+
+ /**
+ * workaround a firefox stupidity
+ */
+ fixFirefoxAnchorBug : function() {
+ if (document.location.hash && $.browser.mozilla)
+ window.setTimeout(function() {
+ document.location.href += '';
+ }, 10);
+ },
+
+ /**
+ * highlight the search words provided in the url in the text
+ */
+ highlightSearchWords : function() {
+ var params = $.getQueryParameters();
+ var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
+ if (terms.length) {
+ var body = $('div.body');
+ window.setTimeout(function() {
+ $.each(terms, function() {
+ body.highlightText(this.toLowerCase(), 'highlighted');
+ });
+ }, 10);
+ $('<p class="highlight-link"><a href="javascript:Documentation.' +
+ 'hideSearchWords()">' + _('Hide Search Matches') + '</a></p>')
+ .appendTo($('#searchbox'));
+ }
+ },
+
+ /**
+ * init the domain index toggle buttons
+ */
+ initIndexTable : function() {
+ var togglers = $('img.toggler').click(function() {
+ var src = $(this).attr('src');
+ var idnum = $(this).attr('id').substr(7);
+ $('tr.cg-' + idnum).toggle();
+ if (src.substr(-9) == 'minus.png')
+ $(this).attr('src', src.substr(0, src.length-9) + 'plus.png');
+ else
+ $(this).attr('src', src.substr(0, src.length-8) + 'minus.png');
+ }).css('display', '');
+ if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) {
+ togglers.click();
+ }
+ },
+
+ /**
+ * helper function to hide the search marks again
+ */
+ hideSearchWords : function() {
+ $('#searchbox .highlight-link').fadeOut(300);
+ $('span.highlighted').removeClass('highlighted');
+ },
+
+ /**
+ * make the url absolute
+ */
+ makeURL : function(relativeURL) {
+ return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL;
+ },
+
+ /**
+ * get the current relative url
+ */
+ getCurrentURL : function() {
+ var path = document.location.pathname;
+ var parts = path.split(/\//);
+ $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() {
+ if (this == '..')
+ parts.pop();
+ });
+ var url = parts.join('/');
+ return path.substring(url.lastIndexOf('/') + 1, path.length - 1);
+ }
+};
+
+// quick alias for translations
+_ = Documentation.gettext;
+
+$(document).ready(function() {
+ Documentation.init();
+});
diff --git a/doc/_build/html/_static/down-pressed.png b/doc/_build/html/_static/down-pressed.png
new file mode 100644
index 0000000..6f7ad78
Binary files /dev/null and b/doc/_build/html/_static/down-pressed.png differ
diff --git a/doc/_build/html/_static/down.png b/doc/_build/html/_static/down.png
new file mode 100644
index 0000000..3003a88
Binary files /dev/null and b/doc/_build/html/_static/down.png differ
diff --git a/doc/_build/html/_static/example_scripts/complicated_example.py b/doc/_build/html/_static/example_scripts/complicated_example.py
new file mode 100755
index 0000000..3c626a8
--- /dev/null
+++ b/doc/_build/html/_static/example_scripts/complicated_example.py
@@ -0,0 +1,527 @@
+#!/usr/bin/env python
+"""
+
+ complicated_example.py
+
+"""
+
+import os, sys
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from time import sleep
+import random
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = ["summarise_all"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+
+parser.add_option("-d", "--data_dir", dest="data_dir",
+ default="%s/data_for_complicated_example" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with starting data [*.fa].")
+parser.add_option("-w", "--working_dir", dest="working_dir",
+ default="/working_dir",
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+
+
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="store_true", default=False,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-D", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#_________________________________________________________________________________________
+#
+# Helper function:
+#
+# split_gene_files
+#
+#_________________________________________________________________________________________
+def split_gene_files ( gene_file_name,
+ job_completion_flag_file_name,
+ split_output_dir):
+ """
+ Helper function to simulate splitting gene files into "chunks" suitable for
+ parallel jobs on a computational cluster
+
+ The number of output files is only known at runtime
+ because the number of "chunks" depend on the size
+ of starting the gene sets
+
+ We simulate this using a random number from 20->50
+ """
+
+ #
+ # make output directory
+ #
+ if not os.path.exists(split_output_dir):
+ os.makedirs(split_output_dir)
+
+ # save number of chunks for later tasks
+ number_of_output_files = int(random.uniform(20, 50))
+
+ for index in range(number_of_output_files):
+ open("%s/%d.fa" % (split_output_dir, index), "w")
+ open(job_completion_flag_file_name, "w")
+
+
+#_________________________________________________________________________________________
+#
+# get_unknown_gene_set_names
+# get_species_names
+#
+#
+# functions for getting unknown gene set names and species names
+#
+#_________________________________________________________________________________________
+import glob, re
+def get_chunked_gene_file_names (dir_name):
+ """
+ Get list of gene file names
+ Helper function for getting unknown gene set names, and species names
+ """
+ regex = re.compile(r".+/(.+).genes.fa")
+ gene_set_names = []
+ for file_name in glob.glob("%s/%s/*.genes.fa" % (d_dir, dir_name)):
+ m = regex.search(file_name)
+ gene_set_names.append(m.group(1))
+ return gene_set_names
+def get_unknown_gene_set_names ():
+ return get_chunked_gene_file_names("unknown_genes")
+def get_species_names ():
+ return get_chunked_gene_file_names("all_genes_in_each_species")
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+d_dir = options.data_dir
+w_dir = options.working_dir
+
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 1:
+#
+# split_unknown_gene_set
+#
+# data_dir/unknown_genes/XXX.genes.fa
+# ->working_dir/XXX/split_gene_sets.completed
+# ->working_dir/XXX/NNN.fa
+#
+#_________________________________________________________________________________________
+ at follows(mkdir(w_dir))
+ at files_re("%s/unknown_genes/*.genes.fa" % d_dir,
+ r"(.*/)(.*)(\.genes.fa)",
+ r"\1\2\3", # unknown_gene_set file name
+ r"%s/\2/split_gene_sets.completed" % w_dir, # job_completion_flag
+ r"%s/\2" % w_dir) # split_output_dir
+def split_unknown_gene_set( starting_gene_set,
+ job_completion_flag,
+ split_output_dir):
+ """
+ Simulate splitting gene files for unknown gene set into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+ split_gene_files ( starting_gene_set,
+ job_completion_flag,
+ split_output_dir)
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 2:
+#
+# split_per_species_gene_sets
+
+# data_dir/all_genes_in_each_species/YYY.genes.fa
+# ->working_dir/species_YYY/split_gene_sets.completed
+# ->working_dir/species_YYY/MMM.fa
+#
+#_________________________________________________________________________________________
+ at follows(mkdir(w_dir))
+ at files_re("%s/all_genes_in_each_species/*.genes.fa" % d_dir,
+ r"(.*/)(.*)(\.genes.fa)",
+ r"\1\2\3", # all_genes_in_species
+ r"%s/species_\2/split_gene_sets.completed" % w_dir, # job_completion_flag
+ r"%s/species_\2" % w_dir) # split_output_dir
+def split_per_species_gene_sets(all_genes_in_species,
+ job_completion_flag,
+ split_output_dir):
+ """
+ Simulate splitting gene files for each species into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+ split_gene_files ( all_genes_in_species,
+ job_completion_flag,
+ split_output_dir)
+ sleep(1)
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 3:
+#
+# all_vs_all_comparisons
+# working_dir/species_YYY/MMM.fa
+# working_dir/XXX/NNN.fa
+# -> compare/x/y.n.m.comparison_res
+# -> compare/x/y.n.m.complete
+#
+#_________________________________________________________________________________________
+#
+# function for generating custom parameters
+#
+def generate_all_vs_all_params ():
+ """
+ Custom function to generate
+ all vs. all file names for the various "chunks"
+ """
+
+ chunk_index_regex = re.compile(r".+/(.+).fa")
+ def parse_index_from_chunk_filename (chunk_filename):
+ match = chunk_index_regex.search(chunk_filename)
+ return int(match.group(1))
+
+ species_names = get_species_names()
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ for y in species_names:
+ y = "species_" + y
+
+ m_files = glob.glob("%s/%s/*.fa" % (w_dir, x))
+ n_files = glob.glob("%s/%s/*.fa" % (w_dir, y))
+
+ #
+ # for each species chunk vs for each unknown chunk
+ #
+ for m_file in m_files:
+ for n_file in n_files:
+ input_files = [m_file, n_file]
+ output_dir = "%s/compare/%s" % (w_dir, x)
+
+ m = parse_index_from_chunk_filename(m_file)
+ n = parse_index_from_chunk_filename(n_file)
+
+ job_completion_flag = output_dir + "/%s.%d.%d.complete" % (y, m, n)
+ result_file = output_dir + "/%s.%d.%d.comparison_res" % (y, m, n)
+ name = "%s -> %d vs %d\n" % (y, m, n)
+ yield input_files, job_completion_flag, output_dir, result_file, name
+
+
+
+ at follows(split_unknown_gene_set, split_per_species_gene_sets)
+ at files(generate_all_vs_all_params)
+def all_vs_all_comparisons(file_chunks,
+ job_completion_flag,
+ output_dir,
+ result_file,
+ name):
+ """
+ Simulate comparison of gene chunks against each other
+ Normally runs in parallel on a computational cluster
+ """
+
+ #
+ # make output directory
+ #
+ try:
+ os.makedirs(output_dir)
+ except OSError:
+ pass
+
+ open(job_completion_flag, "w")
+ open(result_file, "w").write(name)
+
+
+#_________________________________________________________________________________________
+#
+# Step 4:
+#
+# Recombine: alignment results to make gene families
+# compare/x/*.comparison_res
+# -> multiple_alignment/x/x.gene_families
+#
+#_________________________________________________________________________________________
+
+#
+# generate_params_for_making_gene_families
+#
+# function for generating custom parameters
+#
+def generate_params_for_making_gene_families ():
+ """
+ Custom function to combining comparison files into gene families
+ """
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ results_files = glob.glob("%s/compare/%s/*.comparison_res" % (w_dir, x))
+ output_dir = "%s/multiple_alignment/%s" % (w_dir, x)
+ family_file = "%s/gene.families" % output_dir
+ yield results_files, family_file, output_dir
+
+
+ at follows(all_vs_all_comparisons)
+ at files(generate_params_for_making_gene_families)
+def combine_into_gene_familes (results_files, family_file_name, output_dir):
+ """
+ Simulate making gene families by concatenating comparison results :-)
+ """
+ #
+ # make output directory
+ #
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ family_file = open(family_file_name, "w")
+ for f in results_files:
+ family_file.write(open(f).read())
+ sleep(1)
+
+#_________________________________________________________________________________________
+#
+# Step 5:
+#
+# split_gene_family_for_evolutionary_analysis
+# multiple_alignment/x/x.gene_families
+# -> multiple_alignment/x/NNN.aln
+# -> multiple_alignment/x/split.completed
+#
+#_________________________________________________________________________________________
+ at follows(combine_into_gene_familes)
+ at files_re("%s/multiple_alignment/*/gene.families" % w_dir,
+ r"(.+)/(gene.families)",
+ r"\1/\2",
+ r"\1/split.completed",
+ r"\1")
+def split_gene_family_for_evolutionary_analysis( family_file,
+ job_completion_flag_file, split_output_dir):
+ """
+ Simulate splitting family of genes into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+
+ # save number of chunks for later tasks
+ number_of_output_files = int(random.uniform(20, 50))
+
+ for index in range(number_of_output_files):
+ open("%s/%d.aln" % (split_output_dir, index), "w").write("chunk %d" % index)
+ open(job_completion_flag_file, "w")
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 6:
+#
+# evolution_analysis
+# multiple_alignment/x/NNN.aln
+# -> multiple_alignment/x/NNN.evo_res
+#
+#_________________________________________________________________________________________
+ at follows(split_gene_family_for_evolutionary_analysis)
+ at files_re("%s/multiple_alignment/*/*.aln" % w_dir,
+ r"(.+).aln",
+ r"\1.evo_res")
+def evolution_analysis( family_file, result_file_name):
+ """
+ Simulate evolutionary analysis
+ """
+
+ result_file = open(result_file_name, "w")
+ result_file.write(family_file + "\n")
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 7:
+#
+# combine_evolution_analysis
+# multiple_alignment/x/NNN.evo_res
+# -> evolutionary_analysis/x.results
+#
+#_________________________________________________________________________________________
+
+#
+# generate_params_for_combining_evolutionary_analyses
+#
+# function for generating custom parameters
+#
+def generate_params_for_combining_evolutionary_analyses ():
+ """
+ Custom function to combining evolutionary analyses per unknown gene set
+ """
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ results_files = glob.glob("%s/multiple_alignment/%s/*.evo_res" % (w_dir, x))
+ combined_file = "%s/evolutionary_analysis/%s.results" % (w_dir, x)
+ yield results_files, combined_file
+
+ at follows(evolution_analysis, mkdir("%s/evolutionary_analysis" % w_dir))
+ at files(generate_params_for_combining_evolutionary_analyses)
+def combine_evolution_analysis (results_files, combined_file_name):
+ """
+ Simular combining evolutionary analyses
+ """
+ combined_file = open(combined_file_name, "w")
+ for f in results_files:
+ combined_file.write(open(f).read())
+ sleep(1)
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 8:
+#
+# summarise_evolution_analysis
+# evolutionary_analysis/x.results
+# -> evolutionary_analysis/x.summary
+#
+#_________________________________________________________________________________________
+ at follows(combine_evolution_analysis)
+ at files_re("%s/evolutionary_analysis/*.results" % w_dir,
+ r"(.+).results",
+ r"\1.summary")
+def summarise_evolution_analysis( results_file, summary_file_name):
+ """
+ Simulate summary of evolutionary analysis
+ """
+ summary_file = open(summary_file_name, "w")
+ summary_file.write("summary of " + open(results_file).read())
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 9:
+#
+# summarise_all
+# evolutionary_analysis/x.summary
+# -> all.total_summary
+#
+#_________________________________________________________________________________________
+summary_file_names = ["%s/evolutionary_analysis/%s.summary" % (w_dir, n)
+ for n in get_unknown_gene_set_names()]
+total_summary_file_name = "%s/all.total_summary" % w_dir
+
+ at follows(summarise_evolution_analysis)
+ at files(summary_file_names, total_summary_file_name)
+def summarise_all( summary_files, total_summary_file_name):
+ """
+ Simulate summarize all
+ """
+ total_summary_file = open(total_summary_file_name, "w")
+ total_summary_file.write("Over all Summary:\n")
+ for f in summary_files:
+ total_summary_file.write(open(f).read())
+ sleep(1)
+
+
+
+
+
+
+
+#888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# print pipeline or run pipeline
+#
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, long_winded=True)
+
+elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+
diff --git a/doc/_build/html/_static/example_scripts/intermediate_example.py b/doc/_build/html/_static/example_scripts/intermediate_example.py
new file mode 100755
index 0000000..8128903
--- /dev/null
+++ b/doc/_build/html/_static/example_scripts/intermediate_example.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+"""
+
+ intermediate_example.py
+
+ This script takes N pairs of input file pairs
+ (with the suffices .gene and .gwas)
+ and runs them against M sets of simulation data
+ (with the suffix .simulation)
+ A summary per input file pair is then produced
+
+
+ In pseudo-code:
+
+ STEP_1:
+
+ for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res
+
+
+ STEP_2:
+
+ for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean
+
+
+
+"""
+
+import os, sys
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+from ruffus import *
+from time import sleep
+import random
+from itertools import izip
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = ["statistical_summary"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+
+parser.add_option("-g", "--gene_data_dir", dest="gene_data_dir",
+ default="%s/data_for_intermediate_example/genes" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with gene data [*.genes / *.gwas].")
+parser.add_option("-s", "--simulation_data_dir", dest="simulation_data_dir",
+ default="%s/data_for_intermediate_example/simulation" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with simulation data [*.simulation].")
+parser.add_option("-w", "--working_dir", dest="working_dir",
+ default="/working_dir",
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+
+
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="store_true", default=False,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-D", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+import glob
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#_________________________________________________________________________________________
+#
+# get gene gwas file pairs
+#
+#_________________________________________________________________________________________
+def get_gene_gwas_file_pairs( ):
+ """
+ Helper function to get all *.gene, *.gwas from the direction specified
+ in --gene_data_dir
+
+ Returns
+ file pairs with both .gene and .gwas extensions,
+ corresponding roots (no extension) of each file
+ """
+
+
+ gene_files = glob.glob(os.path.join(options.gene_data_dir, "*.gene"))
+ gwas_files = glob.glob(os.path.join(options.gene_data_dir, "*.gwas"))
+
+ common_roots = set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gene_files))
+ common_roots &=set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gwas_files))
+ common_roots = list(common_roots)
+
+ p = os.path; g_dir = options.gene_data_dir
+
+ file_pairs = [[p.join(g_dir, x + ".gene"), p.join(g_dir, x + ".gwas")] for x in common_roots]
+
+ return file_pairs, common_roots
+
+#_________________________________________________________________________________________
+#
+# get simulation files
+#
+#_________________________________________________________________________________________
+def get_simulation_files( ):
+ """
+ Helper function to get all *.simulation from the direction specified
+ in --simulation_data_dir
+ Returns
+ file with .simulation extensions,
+ corresponding roots (no extension) of each file
+ """
+ simulation_files = glob.glob(os.path.join(options.simulation_data_dir, "*.simulation"))
+ simulation_roots =map(lambda x: os.path.splitext(os.path.split(x)[1])[0], simulation_files)
+ return simulation_files, simulation_roots
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+working_dir = options.working_dir
+
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 1:
+#
+# for n_file in NNN_pairs_of_input_files:
+# for m_file in MMM_simulation_data:
+#
+# [n_file.gene,
+# n_file.gwas,
+# m_file.simulation] -> working_dir/n_file.m_file.simulation_res
+#
+#_________________________________________________________________________________________
+def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+
+ simulation_files, simulation_file_roots = get_simulation_files()
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+
+ for sim_file, sim_file_root in izip(simulation_files, simulation_file_roots):
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+
+ result_file = "%s.%s.simulation_res" % (gene_file_root, sim_file_root)
+ result_file_path = os.path.join(working_dir, "simulation_results", result_file)
+
+ yield [gene, gwas, sim_file], result_file_path, gene_file_root, sim_file_root, result_file
+
+#
+# mkdir: makes sure output directories exist before task
+#
+ at follows(mkdir(options.working_dir, os.path.join(working_dir, "simulation_results")))
+ at files(generate_simulation_params)
+def gwas_simulation(input_files, result_file_path, gene_file_root, sim_file_root, result_file):
+ """
+ Dummy calculation of gene gwas vs simulation data
+ Normally runs in parallel on a computational cluster
+ """
+ (gene_file,
+ gwas_file,
+ simulation_data_file) = input_files
+
+ simulation_res_file = open(result_file_path, "w")
+ simulation_res_file.write("%s + %s -> %s\n" % (gene_file_root, sim_file_root, result_file))
+
+
+#_________________________________________________________________________________________
+#
+# Step 2:
+#
+# Statistical summary per gene/gwas file pair
+#
+# for n_file in NNN_pairs_of_input_files:
+# working_dir/simulation_results/n.*.simulation_res
+# -> working_dir/n.mean
+#
+#_________________________________________________________________________________________
+def generate_statistical_summary_params():
+ """
+ Custom function to summarising simulation results files per gene / gwas file pair
+ """
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+ result_glob_spec = "%s.*.simulation_res" % (gene_file_root)
+ result_files = glob.glob(os.path.join(working_dir, "simulation_results", result_glob_spec))
+ summary_file = os.path.join(working_dir, gene_file_root + ".mean")
+
+ yield result_files, summary_file
+
+
+
+ at follows(gwas_simulation)
+ at files(generate_statistical_summary_params)
+ at posttask(lambda : sys.stdout.write("\nAll finished: hooray!!!\n"))
+def statistical_summary (result_files, summary_file):
+ """
+ Simulate statistical summary
+ """
+
+ summary_file = open(summary_file, "w")
+ for f in result_files:
+ summary_file.write(open(f).read())
+ sleep(1)
+
+
+
+
+
+
+
+
+
+
+#888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# print pipeline or run pipeline
+#
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, long_winded=True)
+
+elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+
diff --git a/doc/_build/html/_static/example_scripts/play_with_colours.py b/doc/_build/html/_static/example_scripts/play_with_colours.py
new file mode 100644
index 0000000..5054aa4
--- /dev/null
+++ b/doc/_build/html/_static/example_scripts/play_with_colours.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python
+"""
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test
+#
+#
+# Copyright (c) 7/13/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+from optparse import OptionParser
+import StringIO
+
+parser = OptionParser(version="%play_with_colours 1.0",
+ usage = "\n\n play_with_colours "
+ "--flowchart FILE [options] "
+ "[--colour_scheme_index INT ] "
+ "[--key_legend_in_graph]")
+
+#
+# pipeline
+#
+parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+
+(options, remaining_args) = parser.parse_args()
+if not options.flowchart:
+ raise Exception("Missing mandatory parameter: --flowchart.\n")
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# up to date tasks
+#
+ at check_if_uptodate (lambda : (False, ""))
+def Up_to_date_task1(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Up_to_date_task2(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task2)
+def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task3)
+def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+#
+# Explicitly specified
+#
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+#
+# Tasks to run
+#
+ at follows(Explicitly_specified_task)
+def Task_to_run1(infile, outfile):
+ pass
+
+
+ at follows(Task_to_run1)
+def Task_to_run2(infile, outfile):
+ pass
+
+ at follows(Task_to_run2)
+def Task_to_run3(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Task_to_run2)
+def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+#
+# Final target
+#
+ at follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+def Final_target(infile, outfile):
+ pass
+
+#
+# Ignored downstream
+#
+ at follows(Final_target)
+def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+from collections import defaultdict
+custom_flow_chart_colour_scheme = defaultdict(dict)
+
+#
+# Base chart on this overall colour scheme index
+#
+custom_flow_chart_colour_scheme["colour_scheme_index"] = options.colour_scheme_index
+
+#
+# Overriding colours
+#
+if options.colour_scheme_index == None:
+ custom_flow_chart_colour_scheme["Vicious cycle"]["linecolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Pipeline"]["fontcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Key"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ custom_flow_chart_colour_scheme["Task to run"]["linecolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ custom_flow_chart_colour_scheme["Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Final target"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fillcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["color"] = "white"
+ custom_flow_chart_colour_scheme["Vicious cycle"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fillcolor"] = '"#B8CC6E"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Down stream"]["fillcolor"] = "white"
+ custom_flow_chart_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["color"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Task to run"]["fillcolor"] = '"#EBF3FF"'
+ custom_flow_chart_colour_scheme["Task to run"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+if __name__ == '__main__':
+ pipeline_printout_graph (
+
+ open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+
+ # final targets
+ [Final_target, Up_to_date_final_target],
+
+ # Explicitly specified tasks
+ [Explicitly_specified_task],
+
+ # Do we want key legend
+ no_key_legend = not options.key_legend_in_graph,
+
+ # Print all the task types whether used or not
+ minimal_key_legend = False,
+
+ user_colour_scheme = custom_flow_chart_colour_scheme,
+ pipeline_name = "Colour schemes")
+
+
+
+
+
+
+
+
diff --git a/doc/_build/html/_static/example_scripts/ruffus_template.py b/doc/_build/html/_static/example_scripts/ruffus_template.py
new file mode 100644
index 0000000..69a082d
--- /dev/null
+++ b/doc/_build/html/_static/example_scripts/ruffus_template.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+"""
+
+ ruffus_template.py
+ [--log_file PATH]
+ [--verbose]
+ [--target_tasks]
+ [--jobs]
+ [--just_print]
+ [--flowchart]
+ [--key_legend_in_graph]
+ [--forced_tasks]
+
+"""
+import sys, os
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %progs [options]")
+
+
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+ parser.add_option("-L", "--log_file", dest="log_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of log file")
+
+
+
+
+ #
+ # pipeline
+ #
+ parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="N",
+ type="int",
+ help="Allow N jobs (commands) to run simultaneously.")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Don't actually run any commands; just print the pipeline.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+
+ #
+ # Less common pipeline options
+ #
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+ parser.add_option("--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+
+ # get help string
+ f =StringIO.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+ (options, remaining_args) = parser.parse_args()
+
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Change this if necessary #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ #
+ # Add names of mandatory options,
+ # strings corresponding to the "dest" parameter
+ # in the options defined above
+ #
+ mandatory_options = [ ]
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Change this if necessary #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+ def check_mandatory_options (options, mandatory_options, helpstr):
+ """
+ Check if specified mandatory options have b een defined
+ """
+ missing_options = []
+ for o in mandatory_options:
+ if not getattr(options, o):
+ missing_options.append("--" + o)
+
+ if not len(missing_options):
+ return
+
+ raise Exception("Missing mandatory parameter%s: %s.\n\n%s\n\n" %
+ ("s" if len(missing_options) > 1 else "",
+ ", ".join(missing_options),
+ helpstr))
+ check_mandatory_options (options, mandatory_options, helpstr)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+#from json import dumps
+#from collections import defaultdict
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Logger
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+if __name__ == '__main__':
+ import logging
+ import logging.handlers
+
+ MESSAGE = 15
+ logging.addLevelName(MESSAGE, "MESSAGE")
+
+ def setup_std_logging (logger, log_file, verbose):
+ """
+ set up logging using programme options
+ """
+ class debug_filter(logging.Filter):
+ """
+ Ignore INFO messages
+ """
+ def filter(self, record):
+ return logging.INFO != record.levelno
+
+ class NullHandler(logging.Handler):
+ """
+ for when there is no logging
+ """
+ def emit(self, record):
+ pass
+
+ # We are interesting in all messages
+ logger.setLevel(logging.DEBUG)
+ has_handler = False
+
+ # log to file if that is specified
+ if log_file:
+ handler = logging.FileHandler(log_file, delay=False)
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s"))
+ handler.setLevel(MESSAGE)
+ logger.addHandler(handler)
+ has_handler = True
+
+ # log to stderr if verbose
+ if verbose:
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ if log_file:
+ stderrhandler.addFilter(debug_filter())
+ logger.addHandler(stderrhandler)
+ has_handler = True
+
+ # no logging
+ if not has_handler:
+ logger.addHandler(NullHandler())
+
+
+ #
+ # set up log
+ #
+ logger = logging.getLogger(module_name)
+ setup_std_logging(logger, options.log_file, options.verbose)
+
+ #
+ # Allow logging across Ruffus pipeline
+ #
+ def get_logger (logger_name, args):
+ return logger
+
+ from ruffus.proxy_logger import *
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (get_logger,
+ module_name,
+ {})
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Put pipeline code here
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose=options.verbose)
+
+ elif options.flowchart:
+ pipeline_printout_graph ( open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+ options.target_tasks,
+ options.forced_tasks,
+ no_key_legend = not options.key_legend_in_graph)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks,
+ multiprocess = options.jobs,
+ logger = stderr_logger,
+ verbose = options.verbose)
+
diff --git a/doc/_build/html/_static/example_scripts/simpler.py b/doc/_build/html/_static/example_scripts/simpler.py
new file mode 100644
index 0000000..c858130
--- /dev/null
+++ b/doc/_build/html/_static/example_scripts/simpler.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python2.5
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+import StringIO
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="store_true", default=False,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+import json
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def create_custom_file_func(params):
+ """
+ creates function which can be used as input to @files_func
+ """
+ def cust_func ():
+ for job_param in params:
+ yield job_param
+ return cust_func
+
+
+def is_job_uptodate (infiles, outfiles, *extra_params):
+ """
+ assumes first two parameters are files, checks if they are up to date
+ """
+ return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
+
+
+
+def test_post_task_function ():
+ print "Hooray"
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+
+
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+ time.sleep(1)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# task1
+#
+ at files(None, 'a.1')
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task2
+#
+ at files_re('*.1', '(.*).1', r'\1.1', r'\1.2')
+ at follows(task1)
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task3
+#
+ at files_re('*.1', '(.*).1', r'\1.2', r'\1.3')
+ at follows(task2)
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task4
+#
+ at files_re('*.1', '(.*).1', r'\1.3', r'\1.4')
+ at follows(task3)
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
diff --git a/doc/_build/html/_static/file.png b/doc/_build/html/_static/file.png
new file mode 100644
index 0000000..d18082e
Binary files /dev/null and b/doc/_build/html/_static/file.png differ
diff --git a/doc/_build/html/_static/jquery.js b/doc/_build/html/_static/jquery.js
new file mode 100644
index 0000000..198b3ff
--- /dev/null
+++ b/doc/_build/html/_static/jquery.js
@@ -0,0 +1,4 @@
+/*! jQuery v1.7.1 jquery.com | jquery.org/license */
+(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cv(a){if(!ck[a]){var b=c.body,d=f("<"+a+">").appendTo(b),e=d.css("display");d.remove();if(e==="none"||e===""){cl||(cl=c.createElement("iframe"),cl.frameBorder=cl.width=cl.height=0),b.appendChild(cl);if(!cm||!cl.createElement)cm=(cl.contentWindow||cl.contentDocument).document,cm.write((c.compatMode==="CSS1Compat"?"<!doctype html>":"")+"<html><body>"),cm.close();d=cm.createElement( [...]
+f.event={add:function(a,c,d,e,g){var h,i,j,k,l,m,n,o,p,q,r,s;if(!(a.nodeType===3||a.nodeType===8||!c||!d||!(h=f._data(a)))){d.handler&&(p=d,d=p.handler),d.guid||(d.guid=f.guid++),j=h.events,j||(h.events=j={}),i=h.handle,i||(h.handle=i=function(a){return typeof f!="undefined"&&(!a||f.event.triggered!==a.type)?f.event.dispatch.apply(i.elem,arguments):b},i.elem=a),c=f.trim(I(c)).split(" ");for(k=0;k<c.length;k++){l=A.exec(c[k])||[],m=l[1],n=(l[2]||"").split(".").sort(),s=f.event.special[m]| [...]
+{for(var a=0,b;(b=this[a])!=null;a++){b.nodeType===1&&f.cleanData(b.getElementsByTagName("*"));while(b.firstChild)b.removeChild(b.firstChild)}return this},clone:function(a,b){a=a==null?!1:a,b=b==null?a:b;return this.map(function(){return f.clone(this,a,b)})},html:function(a){if(a===b)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(W,""):null;if(typeof a=="string"&&!ba.test(a)&&(f.support.leadingWhitespace||!X.test(a))&&!bg[(Z.exec(a)||["",""])[1].toLowerCase()]){a=a.replac [...]
\ No newline at end of file
diff --git a/doc/_build/html/_static/minus.png b/doc/_build/html/_static/minus.png
new file mode 100644
index 0000000..da1c562
Binary files /dev/null and b/doc/_build/html/_static/minus.png differ
diff --git a/doc/_build/html/_static/plus.png b/doc/_build/html/_static/plus.png
new file mode 100644
index 0000000..b3cb374
Binary files /dev/null and b/doc/_build/html/_static/plus.png differ
diff --git a/doc/_build/html/_static/pygments.css b/doc/_build/html/_static/pygments.css
new file mode 100644
index 0000000..d79caa1
--- /dev/null
+++ b/doc/_build/html/_static/pygments.css
@@ -0,0 +1,62 @@
+.highlight .hll { background-color: #ffffcc }
+.highlight { background: #eeffcc; }
+.highlight .c { color: #408090; font-style: italic } /* Comment */
+.highlight .err { border: 1px solid #FF0000 } /* Error */
+.highlight .k { color: #007020; font-weight: bold } /* Keyword */
+.highlight .o { color: #666666 } /* Operator */
+.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
+.highlight .cp { color: #007020 } /* Comment.Preproc */
+.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
+.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
+.highlight .gd { color: #A00000 } /* Generic.Deleted */
+.highlight .ge { font-style: italic } /* Generic.Emph */
+.highlight .gr { color: #FF0000 } /* Generic.Error */
+.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.highlight .gi { color: #00A000 } /* Generic.Inserted */
+.highlight .go { color: #333333 } /* Generic.Output */
+.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
+.highlight .gs { font-weight: bold } /* Generic.Strong */
+.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.highlight .gt { color: #0044DD } /* Generic.Traceback */
+.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
+.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
+.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
+.highlight .kp { color: #007020 } /* Keyword.Pseudo */
+.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
+.highlight .kt { color: #902000 } /* Keyword.Type */
+.highlight .m { color: #208050 } /* Literal.Number */
+.highlight .s { color: #4070a0 } /* Literal.String */
+.highlight .na { color: #4070a0 } /* Name.Attribute */
+.highlight .nb { color: #007020 } /* Name.Builtin */
+.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
+.highlight .no { color: #60add5 } /* Name.Constant */
+.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
+.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
+.highlight .ne { color: #007020 } /* Name.Exception */
+.highlight .nf { color: #06287e } /* Name.Function */
+.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
+.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
+.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
+.highlight .nv { color: #bb60d5 } /* Name.Variable */
+.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
+.highlight .w { color: #bbbbbb } /* Text.Whitespace */
+.highlight .mf { color: #208050 } /* Literal.Number.Float */
+.highlight .mh { color: #208050 } /* Literal.Number.Hex */
+.highlight .mi { color: #208050 } /* Literal.Number.Integer */
+.highlight .mo { color: #208050 } /* Literal.Number.Oct */
+.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
+.highlight .sc { color: #4070a0 } /* Literal.String.Char */
+.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
+.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
+.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
+.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
+.highlight .sx { color: #c65d09 } /* Literal.String.Other */
+.highlight .sr { color: #235388 } /* Literal.String.Regex */
+.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
+.highlight .ss { color: #517918 } /* Literal.String.Symbol */
+.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
+.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
+.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
+.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
+.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
\ No newline at end of file
diff --git a/doc/_build/html/_static/ruffus.css b/doc/_build/html/_static/ruffus.css
new file mode 100644
index 0000000..4734d47
--- /dev/null
+++ b/doc/_build/html/_static/ruffus.css
@@ -0,0 +1,327 @@
+/*
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+ at import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: sans-serif;
+ font-size: 100%;
+ background-color: #11303d;
+ color: #000;
+ margin: 0;
+ padding: 0;
+}
+
+div.document {
+ background-color: #1c4e63;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 0 0 0 230px;
+}
+
+div.body {
+ background-color: #ffffff;
+ color: #000000;
+ padding: 0 20px 30px 20px;
+}
+
+div.footer {
+ color: #ffffff;
+ width: 100%;
+ padding: 9px 0 9px 0;
+ text-align: center;
+ font-size: 75%;
+}
+
+div.footer a {
+ color: #ffffff;
+ text-decoration: underline;
+}
+
+div.related {
+ background-color: #133f52;
+ line-height: 30px;
+ color: #ffffff;
+}
+
+div.related a {
+ font-size: 120%; /* new */
+ color: #c0c0FF; /* new */
+ /*color: #ffffff; /* orig */
+}
+
+div.sphinxsidebar {
+}
+
+div.sphinxsidebar h3 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.4em;
+ font-weight: normal;
+ margin: 0;
+ padding: 0;
+}
+
+div.sphinxsidebar h3 a {
+ color: #98dbcc;
+ margin: 0;
+ padding: 0;
+/* margin: 0px 0px 0px 0px;
+ padding: 0px 0px 0px 0px;*/
+}
+
+
+
+div.sphinxsidebar h4 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.3em;
+ font-weight: normal;
+ margin: 5px 0 0 0;
+ padding: 0;
+}
+
+div.sphinxsidebar p {
+ color: #ffffff;
+}
+
+div.sphinxsidebar p.topless {
+ margin: 5px 10px 10px 10px;
+}
+
+div.sphinxsidebar ul {
+ font-size: 100%; /* NEW */
+ margin: 10px;
+ padding: 0;
+ color: #ffffff;
+}
+
+div.sphinxsidebar a {
+ color: #98dbcc;
+}
+
+div.sphinxsidebar a em{
+ color: #98dbcc;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+ color: #355f7c;
+ text-decoration: none;
+}
+
+a:hover {
+ text-decoration: underline;
+}
+
+div.body p, div.body dd, div.body li {
+ text-align: justify;
+ line-height: 130%;
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: 'Trebuchet MS', sans-serif;
+ background-color: #f2f2f2;
+ font-weight: normal;
+ color: #20435c;
+ border-bottom: 1px solid #ccc;
+ margin: 20px -20px 10px -20px;
+ padding: 3px 0 3px 10px;
+}
+
+div.body h1 { margin-top: 0; font-size: 200%; color: #0088FF }
+div.body h2 { font-size: 200%; color: #000000 }
+div.body h3 { font-size: 140%; color: #800080; margin-left: 0px; margin-top: 40px ;font-style:italic;}
+div.body h4 { font-size: 110%; color: #008000; margin-left: 10px; margin-top: 40px;background-color: #F0F0F0;}
+div.body h5 { font-size: 90%; }
+div.body h6 { font-size: 80%; }
+
+div.body h3 cite
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0000ff;
+ font-style: normal;
+ font-size: 90%;
+}
+
+div.body h3 em
+{
+ font-weight: normal;
+ color: #000000;
+ font-style: normal;
+ font-size: 80%;
+}
+
+div.body h2 em
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #800080;
+ font-style: normal;
+ font-size: 100%;
+}
+
+div.body h2 cite
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0000ff;
+ font-style: italic;
+ font-size: 80%;
+}
+
+div.body h2 a
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0000ff;
+ font-style: italic;
+ font-size: 80%;
+}
+
+div.body h1 em
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0088FF;
+ font-size: 100%;
+}
+
+dt em
+{
+ font-family: monospace;
+ color: #0000ff;
+ font-style: italic;
+ font-size: 120%;
+}
+
+
+a.headerlink {
+ color: #c60f0f;
+ font-size: 0.8em;
+ padding: 0 4px 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ background-color: #c60f0f;
+ color: white;
+}
+
+div.body p, div.body dd, div.body li {
+ text-align: justify;
+ line-height: 130%;
+}
+
+div.admonition p.admonition-title + p {
+ display: inline;
+}
+
+div.note {
+ background-color: #eee;
+ border: 1px solid #ccc;
+}
+
+div.seealso {
+ background-color: #ffc;
+ border: 1px solid #ff6;
+}
+
+div.topic {
+ background-color: #eee;
+}
+
+div.warning {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre {
+ padding: 5px;
+ background-color: #eeffcc;
+ color: #333333;
+ line-height: 120%;
+ border: 1px solid #ac9;
+ border-left: none;
+ border-right: none;
+}
+
+tt {
+ background-color: #ecf0f3;
+ padding: 0 1px 0 1px;
+ font-size: 0.95em;
+}
+
+
+/*
+div.sphinxsidebar h3 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.4em;
+ font-weight: normal;
+ margin: 0 0 0 0;
+ padding: 0 0 0 0;
+}
+
+div.sphinxsidebar h3 a{
+ color: #98dbcc;
+ margin: 0px 0px 0px 0px;
+ padding: 0px 0px 0px 0px;
+}
+
+*/
+
+
+
+/*
+
+ Override Pygment style so that I can annotate the code
+
+ The key style are comments within highlighted sections. I have set these to have a
+ white background with big bold red text, so that it stands out from and does not appear
+ to be part of the surrounding code. This way I don't have to screen shot and highlight
+ manually the code in photoshop, and the embedded bitmapped code doesn't get out of date,...
+ and the code is selectable, and it fits into a sane workflow etc. etc.
+
+ I use :nth-of-type(n) pseudo class (but one which matches everything)
+ so that this is a specialisation of the css .hll class style in pygments.css
+
+ Nasty hack but this way I don't have to modify pygments.css by hand each time, and I don't have
+ to create my own pygments style.
+
+*/
+
+.highlight .nd:nth-of-type(n) { background-color: #ffff88; color: red; font-weight: bold; outline:red dotted thin}/**/
+.highlight .hll:nth-of-type(n) { background-color: #eeffcc; font-weight: bold}
+.highlight .hll .c:nth-of-type(n) { background-color: #ffffff; color: red; font-weight: bold; font-style: normal;font-size:120%} /* Comment */
diff --git a/doc/_build/html/_static/ruffus.pdf b/doc/_build/html/_static/ruffus.pdf
new file mode 100644
index 0000000..68836b8
Binary files /dev/null and b/doc/_build/html/_static/ruffus.pdf differ
diff --git a/doc/_build/html/_static/searchtools.js b/doc/_build/html/_static/searchtools.js
new file mode 100644
index 0000000..56676b2
--- /dev/null
+++ b/doc/_build/html/_static/searchtools.js
@@ -0,0 +1,622 @@
+/*
+ * searchtools.js_t
+ * ~~~~~~~~~~~~~~~~
+ *
+ * Sphinx JavaScript utilties for the full-text search.
+ *
+ * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+
+/**
+ * Porter Stemmer
+ */
+var Stemmer = function() {
+
+ var step2list = {
+ ational: 'ate',
+ tional: 'tion',
+ enci: 'ence',
+ anci: 'ance',
+ izer: 'ize',
+ bli: 'ble',
+ alli: 'al',
+ entli: 'ent',
+ eli: 'e',
+ ousli: 'ous',
+ ization: 'ize',
+ ation: 'ate',
+ ator: 'ate',
+ alism: 'al',
+ iveness: 'ive',
+ fulness: 'ful',
+ ousness: 'ous',
+ aliti: 'al',
+ iviti: 'ive',
+ biliti: 'ble',
+ logi: 'log'
+ };
+
+ var step3list = {
+ icate: 'ic',
+ ative: '',
+ alize: 'al',
+ iciti: 'ic',
+ ical: 'ic',
+ ful: '',
+ ness: ''
+ };
+
+ var c = "[^aeiou]"; // consonant
+ var v = "[aeiouy]"; // vowel
+ var C = c + "[^aeiouy]*"; // consonant sequence
+ var V = v + "[aeiou]*"; // vowel sequence
+
+ var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
+ var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
+ var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
+ var s_v = "^(" + C + ")?" + v; // vowel in stem
+
+ this.stemWord = function (w) {
+ var stem;
+ var suffix;
+ var firstch;
+ var origword = w;
+
+ if (w.length < 3)
+ return w;
+
+ var re;
+ var re2;
+ var re3;
+ var re4;
+
+ firstch = w.substr(0,1);
+ if (firstch == "y")
+ w = firstch.toUpperCase() + w.substr(1);
+
+ // Step 1a
+ re = /^(.+?)(ss|i)es$/;
+ re2 = /^(.+?)([^s])s$/;
+
+ if (re.test(w))
+ w = w.replace(re,"$1$2");
+ else if (re2.test(w))
+ w = w.replace(re2,"$1$2");
+
+ // Step 1b
+ re = /^(.+?)eed$/;
+ re2 = /^(.+?)(ed|ing)$/;
+ if (re.test(w)) {
+ var fp = re.exec(w);
+ re = new RegExp(mgr0);
+ if (re.test(fp[1])) {
+ re = /.$/;
+ w = w.replace(re,"");
+ }
+ }
+ else if (re2.test(w)) {
+ var fp = re2.exec(w);
+ stem = fp[1];
+ re2 = new RegExp(s_v);
+ if (re2.test(stem)) {
+ w = stem;
+ re2 = /(at|bl|iz)$/;
+ re3 = new RegExp("([^aeiouylsz])\\1$");
+ re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+ if (re2.test(w))
+ w = w + "e";
+ else if (re3.test(w)) {
+ re = /.$/;
+ w = w.replace(re,"");
+ }
+ else if (re4.test(w))
+ w = w + "e";
+ }
+ }
+
+ // Step 1c
+ re = /^(.+?)y$/;
+ if (re.test(w)) {
+ var fp = re.exec(w);
+ stem = fp[1];
+ re = new RegExp(s_v);
+ if (re.test(stem))
+ w = stem + "i";
+ }
+
+ // Step 2
+ re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
+ if (re.test(w)) {
+ var fp = re.exec(w);
+ stem = fp[1];
+ suffix = fp[2];
+ re = new RegExp(mgr0);
+ if (re.test(stem))
+ w = stem + step2list[suffix];
+ }
+
+ // Step 3
+ re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
+ if (re.test(w)) {
+ var fp = re.exec(w);
+ stem = fp[1];
+ suffix = fp[2];
+ re = new RegExp(mgr0);
+ if (re.test(stem))
+ w = stem + step3list[suffix];
+ }
+
+ // Step 4
+ re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
+ re2 = /^(.+?)(s|t)(ion)$/;
+ if (re.test(w)) {
+ var fp = re.exec(w);
+ stem = fp[1];
+ re = new RegExp(mgr1);
+ if (re.test(stem))
+ w = stem;
+ }
+ else if (re2.test(w)) {
+ var fp = re2.exec(w);
+ stem = fp[1] + fp[2];
+ re2 = new RegExp(mgr1);
+ if (re2.test(stem))
+ w = stem;
+ }
+
+ // Step 5
+ re = /^(.+?)e$/;
+ if (re.test(w)) {
+ var fp = re.exec(w);
+ stem = fp[1];
+ re = new RegExp(mgr1);
+ re2 = new RegExp(meq1);
+ re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+ if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
+ w = stem;
+ }
+ re = /ll$/;
+ re2 = new RegExp(mgr1);
+ if (re.test(w) && re2.test(w)) {
+ re = /.$/;
+ w = w.replace(re,"");
+ }
+
+ // and turn initial Y back to y
+ if (firstch == "y")
+ w = firstch.toLowerCase() + w.substr(1);
+ return w;
+ }
+}
+
+
+
+/**
+ * Simple result scoring code.
+ */
+var Scorer = {
+ // Implement the following function to further tweak the score for each result
+ // The function takes a result array [filename, title, anchor, descr, score]
+ // and returns the new score.
+ /*
+ score: function(result) {
+ return result[4];
+ },
+ */
+
+ // query matches the full name of an object
+ objNameMatch: 11,
+ // or matches in the last dotted part of the object name
+ objPartialMatch: 6,
+ // Additive scores depending on the priority of the object
+ objPrio: {0: 15, // used to be importantResults
+ 1: 5, // used to be objectResults
+ 2: -5}, // used to be unimportantResults
+ // Used when the priority is not in the mapping.
+ objPrioDefault: 0,
+
+ // query found in title
+ title: 15,
+ // query found in terms
+ term: 5
+};
+
+
+/**
+ * Search Module
+ */
+var Search = {
+
+ _index : null,
+ _queued_query : null,
+ _pulse_status : -1,
+
+ init : function() {
+ var params = $.getQueryParameters();
+ if (params.q) {
+ var query = params.q[0];
+ $('input[name="q"]')[0].value = query;
+ this.performSearch(query);
+ }
+ },
+
+ loadIndex : function(url) {
+ $.ajax({type: "GET", url: url, data: null,
+ dataType: "script", cache: true,
+ complete: function(jqxhr, textstatus) {
+ if (textstatus != "success") {
+ document.getElementById("searchindexloader").src = url;
+ }
+ }});
+ },
+
+ setIndex : function(index) {
+ var q;
+ this._index = index;
+ if ((q = this._queued_query) !== null) {
+ this._queued_query = null;
+ Search.query(q);
+ }
+ },
+
+ hasIndex : function() {
+ return this._index !== null;
+ },
+
+ deferQuery : function(query) {
+ this._queued_query = query;
+ },
+
+ stopPulse : function() {
+ this._pulse_status = 0;
+ },
+
+ startPulse : function() {
+ if (this._pulse_status >= 0)
+ return;
+ function pulse() {
+ var i;
+ Search._pulse_status = (Search._pulse_status + 1) % 4;
+ var dotString = '';
+ for (i = 0; i < Search._pulse_status; i++)
+ dotString += '.';
+ Search.dots.text(dotString);
+ if (Search._pulse_status > -1)
+ window.setTimeout(pulse, 500);
+ }
+ pulse();
+ },
+
+ /**
+ * perform a search for something (or wait until index is loaded)
+ */
+ performSearch : function(query) {
+ // create the required interface elements
+ this.out = $('#search-results');
+ this.title = $('<h2>' + _('Searching') + '</h2>').appendTo(this.out);
+ this.dots = $('<span></span>').appendTo(this.title);
+ this.status = $('<p style="display: none"></p>').appendTo(this.out);
+ this.output = $('<ul class="search"/>').appendTo(this.out);
+
+ $('#search-progress').text(_('Preparing search...'));
+ this.startPulse();
+
+ // index already loaded, the browser was quick!
+ if (this.hasIndex())
+ this.query(query);
+ else
+ this.deferQuery(query);
+ },
+
+ /**
+ * execute search (requires search index to be loaded)
+ */
+ query : function(query) {
+ var i;
+ var stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"];
+
+ // stem the searchterms and add them to the correct list
+ var stemmer = new Stemmer();
+ var searchterms = [];
+ var excluded = [];
+ var hlterms = [];
+ var tmp = query.split(/\s+/);
+ var objectterms = [];
+ for (i = 0; i < tmp.length; i++) {
+ if (tmp[i] !== "") {
+ objectterms.push(tmp[i].toLowerCase());
+ }
+
+ if ($u.indexOf(stopwords, tmp[i]) != -1 || tmp[i].match(/^\d+$/) ||
+ tmp[i] === "") {
+ // skip this "word"
+ continue;
+ }
+ // stem the word
+ var word = stemmer.stemWord(tmp[i]).toLowerCase();
+ var toAppend;
+ // select the correct list
+ if (word[0] == '-') {
+ toAppend = excluded;
+ word = word.substr(1);
+ }
+ else {
+ toAppend = searchterms;
+ hlterms.push(tmp[i].toLowerCase());
+ }
+ // only add if not already in the list
+ if (!$u.contains(toAppend, word))
+ toAppend.push(word);
+ }
+ var highlightstring = '?highlight=' + $.urlencode(hlterms.join(" "));
+
+ // console.debug('SEARCH: searching for:');
+ // console.info('required: ', searchterms);
+ // console.info('excluded: ', excluded);
+
+ // prepare search
+ var terms = this._index.terms;
+ var titleterms = this._index.titleterms;
+
+ // array of [filename, title, anchor, descr, score]
+ var results = [];
+ $('#search-progress').empty();
+
+ // lookup as object
+ for (i = 0; i < objectterms.length; i++) {
+ var others = [].concat(objectterms.slice(0, i),
+ objectterms.slice(i+1, objectterms.length));
+ results = results.concat(this.performObjectSearch(objectterms[i], others));
+ }
+
+ // lookup as search terms in fulltext
+ results = results.concat(this.performTermsSearch(searchterms, excluded, terms, Scorer.term))
+ .concat(this.performTermsSearch(searchterms, excluded, titleterms, Scorer.title));
+
+ // let the scorer override scores with a custom scoring function
+ if (Scorer.score) {
+ for (i = 0; i < results.length; i++)
+ results[i][4] = Scorer.score(results[i]);
+ }
+
+ // now sort the results by score (in opposite order of appearance, since the
+ // display function below uses pop() to retrieve items) and then
+ // alphabetically
+ results.sort(function(a, b) {
+ var left = a[4];
+ var right = b[4];
+ if (left > right) {
+ return 1;
+ } else if (left < right) {
+ return -1;
+ } else {
+ // same score: sort alphabetically
+ left = a[1].toLowerCase();
+ right = b[1].toLowerCase();
+ return (left > right) ? -1 : ((left < right) ? 1 : 0);
+ }
+ });
+
+ // for debugging
+ //Search.lastresults = results.slice(); // a copy
+ //console.info('search results:', Search.lastresults);
+
+ // print the results
+ var resultCount = results.length;
+ function displayNextItem() {
+ // results left, load the summary and display it
+ if (results.length) {
+ var item = results.pop();
+ var listItem = $('<li style="display:none"></li>');
+ if (DOCUMENTATION_OPTIONS.FILE_SUFFIX === '') {
+ // dirhtml builder
+ var dirname = item[0] + '/';
+ if (dirname.match(/\/index\/$/)) {
+ dirname = dirname.substring(0, dirname.length-6);
+ } else if (dirname == 'index/') {
+ dirname = '';
+ }
+ listItem.append($('<a/>').attr('href',
+ DOCUMENTATION_OPTIONS.URL_ROOT + dirname +
+ highlightstring + item[2]).html(item[1]));
+ } else {
+ // normal html builders
+ listItem.append($('<a/>').attr('href',
+ item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
+ highlightstring + item[2]).html(item[1]));
+ }
+ if (item[3]) {
+ listItem.append($('<span> (' + item[3] + ')</span>'));
+ Search.output.append(listItem);
+ listItem.slideDown(5, function() {
+ displayNextItem();
+ });
+ } else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) {
+ $.ajax({url: DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' + item[0] + '.txt',
+ dataType: "text",
+ complete: function(jqxhr, textstatus) {
+ var data = jqxhr.responseText;
+ if (data !== '') {
+ listItem.append(Search.makeSearchSummary(data, searchterms, hlterms));
+ }
+ Search.output.append(listItem);
+ listItem.slideDown(5, function() {
+ displayNextItem();
+ });
+ }});
+ } else {
+ // no source available, just display title
+ Search.output.append(listItem);
+ listItem.slideDown(5, function() {
+ displayNextItem();
+ });
+ }
+ }
+ // search finished, update title and status message
+ else {
+ Search.stopPulse();
+ Search.title.text(_('Search Results'));
+ if (!resultCount)
+ Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.'));
+ else
+ Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount));
+ Search.status.fadeIn(500);
+ }
+ }
+ displayNextItem();
+ },
+
+ /**
+ * search for object names
+ */
+ performObjectSearch : function(object, otherterms) {
+ var filenames = this._index.filenames;
+ var objects = this._index.objects;
+ var objnames = this._index.objnames;
+ var titles = this._index.titles;
+
+ var i;
+ var results = [];
+
+ for (var prefix in objects) {
+ for (var name in objects[prefix]) {
+ var fullname = (prefix ? prefix + '.' : '') + name;
+ if (fullname.toLowerCase().indexOf(object) > -1) {
+ var score = 0;
+ var parts = fullname.split('.');
+ // check for different match types: exact matches of full name or
+ // "last name" (i.e. last dotted part)
+ if (fullname == object || parts[parts.length - 1] == object) {
+ score += Scorer.objNameMatch;
+ // matches in last name
+ } else if (parts[parts.length - 1].indexOf(object) > -1) {
+ score += Scorer.objPartialMatch;
+ }
+ var match = objects[prefix][name];
+ var objname = objnames[match[1]][2];
+ var title = titles[match[0]];
+ // If more than one term searched for, we require other words to be
+ // found in the name/title/description
+ if (otherterms.length > 0) {
+ var haystack = (prefix + ' ' + name + ' ' +
+ objname + ' ' + title).toLowerCase();
+ var allfound = true;
+ for (i = 0; i < otherterms.length; i++) {
+ if (haystack.indexOf(otherterms[i]) == -1) {
+ allfound = false;
+ break;
+ }
+ }
+ if (!allfound) {
+ continue;
+ }
+ }
+ var descr = objname + _(', in ') + title;
+
+ var anchor = match[3];
+ if (anchor === '')
+ anchor = fullname;
+ else if (anchor == '-')
+ anchor = objnames[match[1]][1] + '-' + fullname;
+ // add custom score for some objects according to scorer
+ if (Scorer.objPrio.hasOwnProperty(match[2])) {
+ score += Scorer.objPrio[match[2]];
+ } else {
+ score += Scorer.objPrioDefault;
+ }
+ results.push([filenames[match[0]], fullname, '#'+anchor, descr, score]);
+ }
+ }
+ }
+
+ return results;
+ },
+
+ /**
+ * search for full-text terms in the index
+ */
+ performTermsSearch : function(searchterms, excluded, terms, score) {
+ var filenames = this._index.filenames;
+ var titles = this._index.titles;
+
+ var i, j, file, files;
+ var fileMap = {};
+ var results = [];
+
+ // perform the search on the required terms
+ for (i = 0; i < searchterms.length; i++) {
+ var word = searchterms[i];
+ // no match but word was a required one
+ if (!(files = terms[word]))
+ break;
+ if (files.length === undefined) {
+ files = [files];
+ }
+ // create the mapping
+ for (j = 0; j < files.length; j++) {
+ file = files[j];
+ if (file in fileMap)
+ fileMap[file].push(word);
+ else
+ fileMap[file] = [word];
+ }
+ }
+
+ // now check if the files don't contain excluded terms
+ for (file in fileMap) {
+ var valid = true;
+
+ // check if all requirements are matched
+ if (fileMap[file].length != searchterms.length)
+ continue;
+
+ // ensure that none of the excluded terms is in the search result
+ for (i = 0; i < excluded.length; i++) {
+ if (terms[excluded[i]] == file ||
+ $u.contains(terms[excluded[i]] || [], file)) {
+ valid = false;
+ break;
+ }
+ }
+
+ // if we have still a valid result we can add it to the result list
+ if (valid) {
+ results.push([filenames[file], titles[file], '', null, score]);
+ }
+ }
+ return results;
+ },
+
+ /**
+ * helper function to return a node containing the
+ * search summary for a given text. keywords is a list
+ * of stemmed words, hlwords is the list of normal, unstemmed
+ * words. the first one is used to find the occurance, the
+ * latter for highlighting it.
+ */
+ makeSearchSummary : function(text, keywords, hlwords) {
+ var textLower = text.toLowerCase();
+ var start = 0;
+ $.each(keywords, function() {
+ var i = textLower.indexOf(this.toLowerCase());
+ if (i > -1)
+ start = i;
+ });
+ start = Math.max(start - 120, 0);
+ var excerpt = ((start > 0) ? '...' : '') +
+ $.trim(text.substr(start, 240)) +
+ ((start + 240 - text.length) ? '...' : '');
+ var rv = $('<div class="context"></div>').text(excerpt);
+ $.each(hlwords, function() {
+ rv = rv.highlightText(this, 'highlighted');
+ });
+ return rv;
+ }
+};
+
+$(document).ready(function() {
+ Search.init();
+});
\ No newline at end of file
diff --git a/doc/_build/html/_static/sidebar.js b/doc/_build/html/_static/sidebar.js
new file mode 100644
index 0000000..5adf895
--- /dev/null
+++ b/doc/_build/html/_static/sidebar.js
@@ -0,0 +1,159 @@
+/*
+ * sidebar.js
+ * ~~~~~~~~~~
+ *
+ * This script makes the Sphinx sidebar collapsible.
+ *
+ * .sphinxsidebar contains .sphinxsidebarwrapper. This script adds
+ * in .sphixsidebar, after .sphinxsidebarwrapper, the #sidebarbutton
+ * used to collapse and expand the sidebar.
+ *
+ * When the sidebar is collapsed the .sphinxsidebarwrapper is hidden
+ * and the width of the sidebar and the margin-left of the document
+ * are decreased. When the sidebar is expanded the opposite happens.
+ * This script saves a per-browser/per-session cookie used to
+ * remember the position of the sidebar among the pages.
+ * Once the browser is closed the cookie is deleted and the position
+ * reset to the default (expanded).
+ *
+ * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+$(function() {
+
+
+
+
+
+
+
+
+ // global elements used by the functions.
+ // the 'sidebarbutton' element is defined as global after its
+ // creation, in the add_sidebar_button function
+ var bodywrapper = $('.bodywrapper');
+ var sidebar = $('.sphinxsidebar');
+ var sidebarwrapper = $('.sphinxsidebarwrapper');
+
+ // for some reason, the document has no sidebar; do not run into errors
+ if (!sidebar.length) return;
+
+ // original margin-left of the bodywrapper and width of the sidebar
+ // with the sidebar expanded
+ var bw_margin_expanded = bodywrapper.css('margin-left');
+ var ssb_width_expanded = sidebar.width();
+
+ // margin-left of the bodywrapper and width of the sidebar
+ // with the sidebar collapsed
+ var bw_margin_collapsed = '.8em';
+ var ssb_width_collapsed = '.8em';
+
+ // colors used by the current theme
+ var dark_color = $('.related').css('background-color');
+ var light_color = $('.document').css('background-color');
+
+ function sidebar_is_collapsed() {
+ return sidebarwrapper.is(':not(:visible)');
+ }
+
+ function toggle_sidebar() {
+ if (sidebar_is_collapsed())
+ expand_sidebar();
+ else
+ collapse_sidebar();
+ }
+
+ function collapse_sidebar() {
+ sidebarwrapper.hide();
+ sidebar.css('width', ssb_width_collapsed);
+ bodywrapper.css('margin-left', bw_margin_collapsed);
+ sidebarbutton.css({
+ 'margin-left': '0',
+ 'height': bodywrapper.height()
+ });
+ sidebarbutton.find('span').text('»');
+ sidebarbutton.attr('title', _('Expand sidebar'));
+ document.cookie = 'sidebar=collapsed';
+ }
+
+ function expand_sidebar() {
+ bodywrapper.css('margin-left', bw_margin_expanded);
+ sidebar.css('width', ssb_width_expanded);
+ sidebarwrapper.show();
+ sidebarbutton.css({
+ 'margin-left': ssb_width_expanded-12,
+ 'height': bodywrapper.height()
+ });
+ sidebarbutton.find('span').text('«');
+ sidebarbutton.attr('title', _('Collapse sidebar'));
+ document.cookie = 'sidebar=expanded';
+ }
+
+ function add_sidebar_button() {
+ sidebarwrapper.css({
+ 'float': 'left',
+ 'margin-right': '0',
+ 'width': ssb_width_expanded - 28
+ });
+ // create the button
+ sidebar.append(
+ '<div id="sidebarbutton"><span>«</span></div>'
+ );
+ var sidebarbutton = $('#sidebarbutton');
+ light_color = sidebarbutton.css('background-color');
+ // find the height of the viewport to center the '<<' in the page
+ var viewport_height;
+ if (window.innerHeight)
+ viewport_height = window.innerHeight;
+ else
+ viewport_height = $(window).height();
+ sidebarbutton.find('span').css({
+ 'display': 'block',
+ 'margin-top': (viewport_height - sidebar.position().top - 20) / 2
+ });
+
+ sidebarbutton.click(toggle_sidebar);
+ sidebarbutton.attr('title', _('Collapse sidebar'));
+ sidebarbutton.css({
+ 'color': '#FFFFFF',
+ 'border-left': '1px solid ' + dark_color,
+ 'font-size': '1.2em',
+ 'cursor': 'pointer',
+ 'height': bodywrapper.height(),
+ 'padding-top': '1px',
+ 'margin-left': ssb_width_expanded - 12
+ });
+
+ sidebarbutton.hover(
+ function () {
+ $(this).css('background-color', dark_color);
+ },
+ function () {
+ $(this).css('background-color', light_color);
+ }
+ );
+ }
+
+ function set_position_from_cookie() {
+ if (!document.cookie)
+ return;
+ var items = document.cookie.split(';');
+ for(var k=0; k<items.length; k++) {
+ var key_val = items[k].split('=');
+ var key = key_val[0];
+ if (key == 'sidebar') {
+ var value = key_val[1];
+ if ((value == 'collapsed') && (!sidebar_is_collapsed()))
+ collapse_sidebar();
+ else if ((value == 'expanded') && (sidebar_is_collapsed()))
+ expand_sidebar();
+ }
+ }
+ }
+
+ add_sidebar_button();
+ var sidebarbutton = $('#sidebarbutton');
+ set_position_from_cookie();
+});
\ No newline at end of file
diff --git a/doc/_build/html/_static/underscore.js b/doc/_build/html/_static/underscore.js
new file mode 100644
index 0000000..5b55f32
--- /dev/null
+++ b/doc/_build/html/_static/underscore.js
@@ -0,0 +1,31 @@
+// Underscore.js 1.3.1
+// (c) 2009-2012 Jeremy Ashkenas, DocumentCloud Inc.
+// Underscore is freely distributable under the MIT license.
+// Portions of Underscore are inspired or borrowed from Prototype,
+// Oliver Steele's Functional, and John Resig's Micro-Templating.
+// For all details and documentation:
+// http://documentcloud.github.com/underscore
+(function(){function q(a,c,d){if(a===c)return a!==0||1/a==1/c;if(a==null||c==null)return a===c;if(a._chain)a=a._wrapped;if(c._chain)c=c._wrapped;if(a.isEqual&&b.isFunction(a.isEqual))return a.isEqual(c);if(c.isEqual&&b.isFunction(c.isEqual))return c.isEqual(a);var e=l.call(a);if(e!=l.call(c))return false;switch(e){case "[object String]":return a==String(c);case "[object Number]":return a!=+a?c!=+c:a==0?1/a==1/c:a==+c;case "[object Date]":case "[object Boolean]":return+a==+c;case "[object [...]
+c.source&&a.global==c.global&&a.multiline==c.multiline&&a.ignoreCase==c.ignoreCase}if(typeof a!="object"||typeof c!="object")return false;for(var f=d.length;f--;)if(d[f]==a)return true;d.push(a);var f=0,g=true;if(e=="[object Array]"){if(f=a.length,g=f==c.length)for(;f--;)if(!(g=f in a==f in c&&q(a[f],c[f],d)))break}else{if("constructor"in a!="constructor"in c||a.constructor!=c.constructor)return false;for(var h in a)if(b.has(a,h)&&(f++,!(g=b.has(c,h)&&q(a[h],c[h],d))))break;if(g){for(h i [...]
+h)&&!f--)break;g=!f}}d.pop();return g}var r=this,G=r._,n={},k=Array.prototype,o=Object.prototype,i=k.slice,H=k.unshift,l=o.toString,I=o.hasOwnProperty,w=k.forEach,x=k.map,y=k.reduce,z=k.reduceRight,A=k.filter,B=k.every,C=k.some,p=k.indexOf,D=k.lastIndexOf,o=Array.isArray,J=Object.keys,s=Function.prototype.bind,b=function(a){return new m(a)};if(typeof exports!=="undefined"){if(typeof module!=="undefined"&&module.exports)exports=module.exports=b;exports._=b}else r._=b;b.VERSION="1.3.1";var [...]
+b.forEach=function(a,c,d){if(a!=null)if(w&&a.forEach===w)a.forEach(c,d);else if(a.length===+a.length)for(var e=0,f=a.length;e<f;e++){if(e in a&&c.call(d,a[e],e,a)===n)break}else for(e in a)if(b.has(a,e)&&c.call(d,a[e],e,a)===n)break};b.map=b.collect=function(a,c,b){var e=[];if(a==null)return e;if(x&&a.map===x)return a.map(c,b);j(a,function(a,g,h){e[e.length]=c.call(b,a,g,h)});if(a.length===+a.length)e.length=a.length;return e};b.reduce=b.foldl=b.inject=function(a,c,d,e){var f=arguments.l [...]
+null&&(a=[]);if(y&&a.reduce===y)return e&&(c=b.bind(c,e)),f?a.reduce(c,d):a.reduce(c);j(a,function(a,b,i){f?d=c.call(e,d,a,b,i):(d=a,f=true)});if(!f)throw new TypeError("Reduce of empty array with no initial value");return d};b.reduceRight=b.foldr=function(a,c,d,e){var f=arguments.length>2;a==null&&(a=[]);if(z&&a.reduceRight===z)return e&&(c=b.bind(c,e)),f?a.reduceRight(c,d):a.reduceRight(c);var g=b.toArray(a).reverse();e&&!f&&(c=b.bind(c,e));return f?b.reduce(g,c,d,e):b.reduce(g,c)};b.f [...]
+function(a,c,b){var e;E(a,function(a,g,h){if(c.call(b,a,g,h))return e=a,true});return e};b.filter=b.select=function(a,c,b){var e=[];if(a==null)return e;if(A&&a.filter===A)return a.filter(c,b);j(a,function(a,g,h){c.call(b,a,g,h)&&(e[e.length]=a)});return e};b.reject=function(a,c,b){var e=[];if(a==null)return e;j(a,function(a,g,h){c.call(b,a,g,h)||(e[e.length]=a)});return e};b.every=b.all=function(a,c,b){var e=true;if(a==null)return e;if(B&&a.every===B)return a.every(c,b);j(a,function(a,g, [...]
+e&&c.call(b,a,g,h)))return n});return e};var E=b.some=b.any=function(a,c,d){c||(c=b.identity);var e=false;if(a==null)return e;if(C&&a.some===C)return a.some(c,d);j(a,function(a,b,h){if(e||(e=c.call(d,a,b,h)))return n});return!!e};b.include=b.contains=function(a,c){var b=false;if(a==null)return b;return p&&a.indexOf===p?a.indexOf(c)!=-1:b=E(a,function(a){return a===c})};b.invoke=function(a,c){var d=i.call(arguments,2);return b.map(a,function(a){return(b.isFunction(c)?c||a:a[c]).apply(a,d) [...]
+function(a,c){return b.map(a,function(a){return a[c]})};b.max=function(a,c,d){if(!c&&b.isArray(a))return Math.max.apply(Math,a);if(!c&&b.isEmpty(a))return-Infinity;var e={computed:-Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b>=e.computed&&(e={value:a,computed:b})});return e.value};b.min=function(a,c,d){if(!c&&b.isArray(a))return Math.min.apply(Math,a);if(!c&&b.isEmpty(a))return Infinity;var e={computed:Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b<e.computed&&(e={value:a [...]
+return e.value};b.shuffle=function(a){var b=[],d;j(a,function(a,f){f==0?b[0]=a:(d=Math.floor(Math.random()*(f+1)),b[f]=b[d],b[d]=a)});return b};b.sortBy=function(a,c,d){return b.pluck(b.map(a,function(a,b,g){return{value:a,criteria:c.call(d,a,b,g)}}).sort(function(a,b){var c=a.criteria,d=b.criteria;return c<d?-1:c>d?1:0}),"value")};b.groupBy=function(a,c){var d={},e=b.isFunction(c)?c:function(a){return a[c]};j(a,function(a,b){var c=e(a,b);(d[c]||(d[c]=[])).push(a)});return d};b.sortedInd [...]
+c,d){d||(d=b.identity);for(var e=0,f=a.length;e<f;){var g=e+f>>1;d(a[g])<d(c)?e=g+1:f=g}return e};b.toArray=function(a){return!a?[]:a.toArray?a.toArray():b.isArray(a)?i.call(a):b.isArguments(a)?i.call(a):b.values(a)};b.size=function(a){return b.toArray(a).length};b.first=b.head=function(a,b,d){return b!=null&&!d?i.call(a,0,b):a[0]};b.initial=function(a,b,d){return i.call(a,0,a.length-(b==null||d?1:b))};b.last=function(a,b,d){return b!=null&&!d?i.call(a,Math.max(a.length-b,0)):a[a.length- [...]
+b.tail=function(a,b,d){return i.call(a,b==null||d?1:b)};b.compact=function(a){return b.filter(a,function(a){return!!a})};b.flatten=function(a,c){return b.reduce(a,function(a,e){if(b.isArray(e))return a.concat(c?e:b.flatten(e));a[a.length]=e;return a},[])};b.without=function(a){return b.difference(a,i.call(arguments,1))};b.uniq=b.unique=function(a,c,d){var d=d?b.map(a,d):a,e=[];b.reduce(d,function(d,g,h){if(0==h||(c===true?b.last(d)!=g:!b.include(d,g)))d[d.length]=g,e[e.length]=a[h];retur [...]
+return e};b.union=function(){return b.uniq(b.flatten(arguments,true))};b.intersection=b.intersect=function(a){var c=i.call(arguments,1);return b.filter(b.uniq(a),function(a){return b.every(c,function(c){return b.indexOf(c,a)>=0})})};b.difference=function(a){var c=b.flatten(i.call(arguments,1));return b.filter(a,function(a){return!b.include(c,a)})};b.zip=function(){for(var a=i.call(arguments),c=b.max(b.pluck(a,"length")),d=Array(c),e=0;e<c;e++)d[e]=b.pluck(a,""+e);return d};b.indexOf=func [...]
+d){if(a==null)return-1;var e;if(d)return d=b.sortedIndex(a,c),a[d]===c?d:-1;if(p&&a.indexOf===p)return a.indexOf(c);for(d=0,e=a.length;d<e;d++)if(d in a&&a[d]===c)return d;return-1};b.lastIndexOf=function(a,b){if(a==null)return-1;if(D&&a.lastIndexOf===D)return a.lastIndexOf(b);for(var d=a.length;d--;)if(d in a&&a[d]===b)return d;return-1};b.range=function(a,b,d){arguments.length<=1&&(b=a||0,a=0);for(var d=arguments[2]||1,e=Math.max(Math.ceil((b-a)/d),0),f=0,g=Array(e);f<e;)g[f++]=a,a+=d; [...]
+var F=function(){};b.bind=function(a,c){var d,e;if(a.bind===s&&s)return s.apply(a,i.call(arguments,1));if(!b.isFunction(a))throw new TypeError;e=i.call(arguments,2);return d=function(){if(!(this instanceof d))return a.apply(c,e.concat(i.call(arguments)));F.prototype=a.prototype;var b=new F,g=a.apply(b,e.concat(i.call(arguments)));return Object(g)===g?g:b}};b.bindAll=function(a){var c=i.call(arguments,1);c.length==0&&(c=b.functions(a));j(c,function(c){a[c]=b.bind(a[c],a)});return a};b.mem [...]
+c){var d={};c||(c=b.identity);return function(){var e=c.apply(this,arguments);return b.has(d,e)?d[e]:d[e]=a.apply(this,arguments)}};b.delay=function(a,b){var d=i.call(arguments,2);return setTimeout(function(){return a.apply(a,d)},b)};b.defer=function(a){return b.delay.apply(b,[a,1].concat(i.call(arguments,1)))};b.throttle=function(a,c){var d,e,f,g,h,i=b.debounce(function(){h=g=false},c);return function(){d=this;e=arguments;var b;f||(f=setTimeout(function(){f=null;h&&a.apply(d,e);i()},c)) [...]
+a.apply(d,e);i();g=true}};b.debounce=function(a,b){var d;return function(){var e=this,f=arguments;clearTimeout(d);d=setTimeout(function(){d=null;a.apply(e,f)},b)}};b.once=function(a){var b=false,d;return function(){if(b)return d;b=true;return d=a.apply(this,arguments)}};b.wrap=function(a,b){return function(){var d=[a].concat(i.call(arguments,0));return b.apply(this,d)}};b.compose=function(){var a=arguments;return function(){for(var b=arguments,d=a.length-1;d>=0;d--)b=[a[d].apply(this,b)] [...]
+b.after=function(a,b){return a<=0?b():function(){if(--a<1)return b.apply(this,arguments)}};b.keys=J||function(a){if(a!==Object(a))throw new TypeError("Invalid object");var c=[],d;for(d in a)b.has(a,d)&&(c[c.length]=d);return c};b.values=function(a){return b.map(a,b.identity)};b.functions=b.methods=function(a){var c=[],d;for(d in a)b.isFunction(a[d])&&c.push(d);return c.sort()};b.extend=function(a){j(i.call(arguments,1),function(b){for(var d in b)a[d]=b[d]});return a};b.defaults=function( [...]
+1),function(b){for(var d in b)a[d]==null&&(a[d]=b[d])});return a};b.clone=function(a){return!b.isObject(a)?a:b.isArray(a)?a.slice():b.extend({},a)};b.tap=function(a,b){b(a);return a};b.isEqual=function(a,b){return q(a,b,[])};b.isEmpty=function(a){if(b.isArray(a)||b.isString(a))return a.length===0;for(var c in a)if(b.has(a,c))return false;return true};b.isElement=function(a){return!!(a&&a.nodeType==1)};b.isArray=o||function(a){return l.call(a)=="[object Array]"};b.isObject=function(a){ret [...]
+b.isArguments=function(a){return l.call(a)=="[object Arguments]"};if(!b.isArguments(arguments))b.isArguments=function(a){return!(!a||!b.has(a,"callee"))};b.isFunction=function(a){return l.call(a)=="[object Function]"};b.isString=function(a){return l.call(a)=="[object String]"};b.isNumber=function(a){return l.call(a)=="[object Number]"};b.isNaN=function(a){return a!==a};b.isBoolean=function(a){return a===true||a===false||l.call(a)=="[object Boolean]"};b.isDate=function(a){return l.call(a) [...]
+b.isRegExp=function(a){return l.call(a)=="[object RegExp]"};b.isNull=function(a){return a===null};b.isUndefined=function(a){return a===void 0};b.has=function(a,b){return I.call(a,b)};b.noConflict=function(){r._=G;return this};b.identity=function(a){return a};b.times=function(a,b,d){for(var e=0;e<a;e++)b.call(d,e)};b.escape=function(a){return(""+a).replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""").replace(/'/g,"'").replace(/\//g,"/")};b.mixin [...]
+function(c){K(c,b[c]=a[c])})};var L=0;b.uniqueId=function(a){var b=L++;return a?a+b:b};b.templateSettings={evaluate:/<%([\s\S]+?)%>/g,interpolate:/<%=([\s\S]+?)%>/g,escape:/<%-([\s\S]+?)%>/g};var t=/.^/,u=function(a){return a.replace(/\\\\/g,"\\").replace(/\\'/g,"'")};b.template=function(a,c){var d=b.templateSettings,d="var __p=[],print=function(){__p.push.apply(__p,arguments);};with(obj||{}){__p.push('"+a.replace(/\\/g,"\\\\").replace(/'/g,"\\'").replace(d.escape||t,function(a,b){return [...]
+u(b)+"),'"}).replace(d.interpolate||t,function(a,b){return"',"+u(b)+",'"}).replace(d.evaluate||t,function(a,b){return"');"+u(b).replace(/[\r\n\t]/g," ")+";__p.push('"}).replace(/\r/g,"\\r").replace(/\n/g,"\\n").replace(/\t/g,"\\t")+"');}return __p.join('');",e=new Function("obj","_",d);return c?e(c,b):function(a){return e.call(this,a,b)}};b.chain=function(a){return b(a).chain()};var m=function(a){this._wrapped=a};b.prototype=m.prototype;var v=function(a,c){return c?b(a).chain():a},K=func [...]
+function(){var a=i.call(arguments);H.call(a,this._wrapped);return v(c.apply(b,a),this._chain)}};b.mixin(b);j("pop,push,reverse,shift,sort,splice,unshift".split(","),function(a){var b=k[a];m.prototype[a]=function(){var d=this._wrapped;b.apply(d,arguments);var e=d.length;(a=="shift"||a=="splice")&&e===0&&delete d[0];return v(d,this._chain)}});j(["concat","join","slice"],function(a){var b=k[a];m.prototype[a]=function(){return v(b.apply(this._wrapped,arguments),this._chain)}});m.prototype.ch [...]
+true;return this};m.prototype.value=function(){return this._wrapped}}).call(this);
diff --git a/doc/_build/html/_static/up-pressed.png b/doc/_build/html/_static/up-pressed.png
new file mode 100644
index 0000000..8bd587a
Binary files /dev/null and b/doc/_build/html/_static/up-pressed.png differ
diff --git a/doc/_build/html/_static/up.png b/doc/_build/html/_static/up.png
new file mode 100644
index 0000000..b946256
Binary files /dev/null and b/doc/_build/html/_static/up.png differ
diff --git a/doc/_build/html/_static/websupport.js b/doc/_build/html/_static/websupport.js
new file mode 100644
index 0000000..19fcda5
--- /dev/null
+++ b/doc/_build/html/_static/websupport.js
@@ -0,0 +1,808 @@
+/*
+ * websupport.js
+ * ~~~~~~~~~~~~~
+ *
+ * sphinx.websupport utilties for all documentation.
+ *
+ * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+(function($) {
+ $.fn.autogrow = function() {
+ return this.each(function() {
+ var textarea = this;
+
+ $.fn.autogrow.resize(textarea);
+
+ $(textarea)
+ .focus(function() {
+ textarea.interval = setInterval(function() {
+ $.fn.autogrow.resize(textarea);
+ }, 500);
+ })
+ .blur(function() {
+ clearInterval(textarea.interval);
+ });
+ });
+ };
+
+ $.fn.autogrow.resize = function(textarea) {
+ var lineHeight = parseInt($(textarea).css('line-height'), 10);
+ var lines = textarea.value.split('\n');
+ var columns = textarea.cols;
+ var lineCount = 0;
+ $.each(lines, function() {
+ lineCount += Math.ceil(this.length / columns) || 1;
+ });
+ var height = lineHeight * (lineCount + 1);
+ $(textarea).css('height', height);
+ };
+})(jQuery);
+
+(function($) {
+ var comp, by;
+
+ function init() {
+ initEvents();
+ initComparator();
+ }
+
+ function initEvents() {
+ $('a.comment-close').live("click", function(event) {
+ event.preventDefault();
+ hide($(this).attr('id').substring(2));
+ });
+ $('a.vote').live("click", function(event) {
+ event.preventDefault();
+ handleVote($(this));
+ });
+ $('a.reply').live("click", function(event) {
+ event.preventDefault();
+ openReply($(this).attr('id').substring(2));
+ });
+ $('a.close-reply').live("click", function(event) {
+ event.preventDefault();
+ closeReply($(this).attr('id').substring(2));
+ });
+ $('a.sort-option').live("click", function(event) {
+ event.preventDefault();
+ handleReSort($(this));
+ });
+ $('a.show-proposal').live("click", function(event) {
+ event.preventDefault();
+ showProposal($(this).attr('id').substring(2));
+ });
+ $('a.hide-proposal').live("click", function(event) {
+ event.preventDefault();
+ hideProposal($(this).attr('id').substring(2));
+ });
+ $('a.show-propose-change').live("click", function(event) {
+ event.preventDefault();
+ showProposeChange($(this).attr('id').substring(2));
+ });
+ $('a.hide-propose-change').live("click", function(event) {
+ event.preventDefault();
+ hideProposeChange($(this).attr('id').substring(2));
+ });
+ $('a.accept-comment').live("click", function(event) {
+ event.preventDefault();
+ acceptComment($(this).attr('id').substring(2));
+ });
+ $('a.delete-comment').live("click", function(event) {
+ event.preventDefault();
+ deleteComment($(this).attr('id').substring(2));
+ });
+ $('a.comment-markup').live("click", function(event) {
+ event.preventDefault();
+ toggleCommentMarkupBox($(this).attr('id').substring(2));
+ });
+ }
+
+ /**
+ * Set comp, which is a comparator function used for sorting and
+ * inserting comments into the list.
+ */
+ function setComparator() {
+ // If the first three letters are "asc", sort in ascending order
+ // and remove the prefix.
+ if (by.substring(0,3) == 'asc') {
+ var i = by.substring(3);
+ comp = function(a, b) { return a[i] - b[i]; };
+ } else {
+ // Otherwise sort in descending order.
+ comp = function(a, b) { return b[by] - a[by]; };
+ }
+
+ // Reset link styles and format the selected sort option.
+ $('a.sel').attr('href', '#').removeClass('sel');
+ $('a.by' + by).removeAttr('href').addClass('sel');
+ }
+
+ /**
+ * Create a comp function. If the user has preferences stored in
+ * the sortBy cookie, use those, otherwise use the default.
+ */
+ function initComparator() {
+ by = 'rating'; // Default to sort by rating.
+ // If the sortBy cookie is set, use that instead.
+ if (document.cookie.length > 0) {
+ var start = document.cookie.indexOf('sortBy=');
+ if (start != -1) {
+ start = start + 7;
+ var end = document.cookie.indexOf(";", start);
+ if (end == -1) {
+ end = document.cookie.length;
+ by = unescape(document.cookie.substring(start, end));
+ }
+ }
+ }
+ setComparator();
+ }
+
+ /**
+ * Show a comment div.
+ */
+ function show(id) {
+ $('#ao' + id).hide();
+ $('#ah' + id).show();
+ var context = $.extend({id: id}, opts);
+ var popup = $(renderTemplate(popupTemplate, context)).hide();
+ popup.find('textarea[name="proposal"]').hide();
+ popup.find('a.by' + by).addClass('sel');
+ var form = popup.find('#cf' + id);
+ form.submit(function(event) {
+ event.preventDefault();
+ addComment(form);
+ });
+ $('#s' + id).after(popup);
+ popup.slideDown('fast', function() {
+ getComments(id);
+ });
+ }
+
+ /**
+ * Hide a comment div.
+ */
+ function hide(id) {
+ $('#ah' + id).hide();
+ $('#ao' + id).show();
+ var div = $('#sc' + id);
+ div.slideUp('fast', function() {
+ div.remove();
+ });
+ }
+
+ /**
+ * Perform an ajax request to get comments for a node
+ * and insert the comments into the comments tree.
+ */
+ function getComments(id) {
+ $.ajax({
+ type: 'GET',
+ url: opts.getCommentsURL,
+ data: {node: id},
+ success: function(data, textStatus, request) {
+ var ul = $('#cl' + id);
+ var speed = 100;
+ $('#cf' + id)
+ .find('textarea[name="proposal"]')
+ .data('source', data.source);
+
+ if (data.comments.length === 0) {
+ ul.html('<li>No comments yet.</li>');
+ ul.data('empty', true);
+ } else {
+ // If there are comments, sort them and put them in the list.
+ var comments = sortComments(data.comments);
+ speed = data.comments.length * 100;
+ appendComments(comments, ul);
+ ul.data('empty', false);
+ }
+ $('#cn' + id).slideUp(speed + 200);
+ ul.slideDown(speed);
+ },
+ error: function(request, textStatus, error) {
+ showError('Oops, there was a problem retrieving the comments.');
+ },
+ dataType: 'json'
+ });
+ }
+
+ /**
+ * Add a comment via ajax and insert the comment into the comment tree.
+ */
+ function addComment(form) {
+ var node_id = form.find('input[name="node"]').val();
+ var parent_id = form.find('input[name="parent"]').val();
+ var text = form.find('textarea[name="comment"]').val();
+ var proposal = form.find('textarea[name="proposal"]').val();
+
+ if (text == '') {
+ showError('Please enter a comment.');
+ return;
+ }
+
+ // Disable the form that is being submitted.
+ form.find('textarea,input').attr('disabled', 'disabled');
+
+ // Send the comment to the server.
+ $.ajax({
+ type: "POST",
+ url: opts.addCommentURL,
+ dataType: 'json',
+ data: {
+ node: node_id,
+ parent: parent_id,
+ text: text,
+ proposal: proposal
+ },
+ success: function(data, textStatus, error) {
+ // Reset the form.
+ if (node_id) {
+ hideProposeChange(node_id);
+ }
+ form.find('textarea')
+ .val('')
+ .add(form.find('input'))
+ .removeAttr('disabled');
+ var ul = $('#cl' + (node_id || parent_id));
+ if (ul.data('empty')) {
+ $(ul).empty();
+ ul.data('empty', false);
+ }
+ insertComment(data.comment);
+ var ao = $('#ao' + node_id);
+ ao.find('img').attr({'src': opts.commentBrightImage});
+ if (node_id) {
+ // if this was a "root" comment, remove the commenting box
+ // (the user can get it back by reopening the comment popup)
+ $('#ca' + node_id).slideUp();
+ }
+ },
+ error: function(request, textStatus, error) {
+ form.find('textarea,input').removeAttr('disabled');
+ showError('Oops, there was a problem adding the comment.');
+ }
+ });
+ }
+
+ /**
+ * Recursively append comments to the main comment list and children
+ * lists, creating the comment tree.
+ */
+ function appendComments(comments, ul) {
+ $.each(comments, function() {
+ var div = createCommentDiv(this);
+ ul.append($(document.createElement('li')).html(div));
+ appendComments(this.children, div.find('ul.comment-children'));
+ // To avoid stagnating data, don't store the comments children in data.
+ this.children = null;
+ div.data('comment', this);
+ });
+ }
+
+ /**
+ * After adding a new comment, it must be inserted in the correct
+ * location in the comment tree.
+ */
+ function insertComment(comment) {
+ var div = createCommentDiv(comment);
+
+ // To avoid stagnating data, don't store the comments children in data.
+ comment.children = null;
+ div.data('comment', comment);
+
+ var ul = $('#cl' + (comment.node || comment.parent));
+ var siblings = getChildren(ul);
+
+ var li = $(document.createElement('li'));
+ li.hide();
+
+ // Determine where in the parents children list to insert this comment.
+ for(i=0; i < siblings.length; i++) {
+ if (comp(comment, siblings[i]) <= 0) {
+ $('#cd' + siblings[i].id)
+ .parent()
+ .before(li.html(div));
+ li.slideDown('fast');
+ return;
+ }
+ }
+
+ // If we get here, this comment rates lower than all the others,
+ // or it is the only comment in the list.
+ ul.append(li.html(div));
+ li.slideDown('fast');
+ }
+
+ function acceptComment(id) {
+ $.ajax({
+ type: 'POST',
+ url: opts.acceptCommentURL,
+ data: {id: id},
+ success: function(data, textStatus, request) {
+ $('#cm' + id).fadeOut('fast');
+ $('#cd' + id).removeClass('moderate');
+ },
+ error: function(request, textStatus, error) {
+ showError('Oops, there was a problem accepting the comment.');
+ }
+ });
+ }
+
+ function deleteComment(id) {
+ $.ajax({
+ type: 'POST',
+ url: opts.deleteCommentURL,
+ data: {id: id},
+ success: function(data, textStatus, request) {
+ var div = $('#cd' + id);
+ if (data == 'delete') {
+ // Moderator mode: remove the comment and all children immediately
+ div.slideUp('fast', function() {
+ div.remove();
+ });
+ return;
+ }
+ // User mode: only mark the comment as deleted
+ div
+ .find('span.user-id:first')
+ .text('[deleted]').end()
+ .find('div.comment-text:first')
+ .text('[deleted]').end()
+ .find('#cm' + id + ', #dc' + id + ', #ac' + id + ', #rc' + id +
+ ', #sp' + id + ', #hp' + id + ', #cr' + id + ', #rl' + id)
+ .remove();
+ var comment = div.data('comment');
+ comment.username = '[deleted]';
+ comment.text = '[deleted]';
+ div.data('comment', comment);
+ },
+ error: function(request, textStatus, error) {
+ showError('Oops, there was a problem deleting the comment.');
+ }
+ });
+ }
+
+ function showProposal(id) {
+ $('#sp' + id).hide();
+ $('#hp' + id).show();
+ $('#pr' + id).slideDown('fast');
+ }
+
+ function hideProposal(id) {
+ $('#hp' + id).hide();
+ $('#sp' + id).show();
+ $('#pr' + id).slideUp('fast');
+ }
+
+ function showProposeChange(id) {
+ $('#pc' + id).hide();
+ $('#hc' + id).show();
+ var textarea = $('#pt' + id);
+ textarea.val(textarea.data('source'));
+ $.fn.autogrow.resize(textarea[0]);
+ textarea.slideDown('fast');
+ }
+
+ function hideProposeChange(id) {
+ $('#hc' + id).hide();
+ $('#pc' + id).show();
+ var textarea = $('#pt' + id);
+ textarea.val('').removeAttr('disabled');
+ textarea.slideUp('fast');
+ }
+
+ function toggleCommentMarkupBox(id) {
+ $('#mb' + id).toggle();
+ }
+
+ /** Handle when the user clicks on a sort by link. */
+ function handleReSort(link) {
+ var classes = link.attr('class').split(/\s+/);
+ for (var i=0; i<classes.length; i++) {
+ if (classes[i] != 'sort-option') {
+ by = classes[i].substring(2);
+ }
+ }
+ setComparator();
+ // Save/update the sortBy cookie.
+ var expiration = new Date();
+ expiration.setDate(expiration.getDate() + 365);
+ document.cookie= 'sortBy=' + escape(by) +
+ ';expires=' + expiration.toUTCString();
+ $('ul.comment-ul').each(function(index, ul) {
+ var comments = getChildren($(ul), true);
+ comments = sortComments(comments);
+ appendComments(comments, $(ul).empty());
+ });
+ }
+
+ /**
+ * Function to process a vote when a user clicks an arrow.
+ */
+ function handleVote(link) {
+ if (!opts.voting) {
+ showError("You'll need to login to vote.");
+ return;
+ }
+
+ var id = link.attr('id');
+ if (!id) {
+ // Didn't click on one of the voting arrows.
+ return;
+ }
+ // If it is an unvote, the new vote value is 0,
+ // Otherwise it's 1 for an upvote, or -1 for a downvote.
+ var value = 0;
+ if (id.charAt(1) != 'u') {
+ value = id.charAt(0) == 'u' ? 1 : -1;
+ }
+ // The data to be sent to the server.
+ var d = {
+ comment_id: id.substring(2),
+ value: value
+ };
+
+ // Swap the vote and unvote links.
+ link.hide();
+ $('#' + id.charAt(0) + (id.charAt(1) == 'u' ? 'v' : 'u') + d.comment_id)
+ .show();
+
+ // The div the comment is displayed in.
+ var div = $('div#cd' + d.comment_id);
+ var data = div.data('comment');
+
+ // If this is not an unvote, and the other vote arrow has
+ // already been pressed, unpress it.
+ if ((d.value !== 0) && (data.vote === d.value * -1)) {
+ $('#' + (d.value == 1 ? 'd' : 'u') + 'u' + d.comment_id).hide();
+ $('#' + (d.value == 1 ? 'd' : 'u') + 'v' + d.comment_id).show();
+ }
+
+ // Update the comments rating in the local data.
+ data.rating += (data.vote === 0) ? d.value : (d.value - data.vote);
+ data.vote = d.value;
+ div.data('comment', data);
+
+ // Change the rating text.
+ div.find('.rating:first')
+ .text(data.rating + ' point' + (data.rating == 1 ? '' : 's'));
+
+ // Send the vote information to the server.
+ $.ajax({
+ type: "POST",
+ url: opts.processVoteURL,
+ data: d,
+ error: function(request, textStatus, error) {
+ showError('Oops, there was a problem casting that vote.');
+ }
+ });
+ }
+
+ /**
+ * Open a reply form used to reply to an existing comment.
+ */
+ function openReply(id) {
+ // Swap out the reply link for the hide link
+ $('#rl' + id).hide();
+ $('#cr' + id).show();
+
+ // Add the reply li to the children ul.
+ var div = $(renderTemplate(replyTemplate, {id: id})).hide();
+ $('#cl' + id)
+ .prepend(div)
+ // Setup the submit handler for the reply form.
+ .find('#rf' + id)
+ .submit(function(event) {
+ event.preventDefault();
+ addComment($('#rf' + id));
+ closeReply(id);
+ })
+ .find('input[type=button]')
+ .click(function() {
+ closeReply(id);
+ });
+ div.slideDown('fast', function() {
+ $('#rf' + id).find('textarea').focus();
+ });
+ }
+
+ /**
+ * Close the reply form opened with openReply.
+ */
+ function closeReply(id) {
+ // Remove the reply div from the DOM.
+ $('#rd' + id).slideUp('fast', function() {
+ $(this).remove();
+ });
+
+ // Swap out the hide link for the reply link
+ $('#cr' + id).hide();
+ $('#rl' + id).show();
+ }
+
+ /**
+ * Recursively sort a tree of comments using the comp comparator.
+ */
+ function sortComments(comments) {
+ comments.sort(comp);
+ $.each(comments, function() {
+ this.children = sortComments(this.children);
+ });
+ return comments;
+ }
+
+ /**
+ * Get the children comments from a ul. If recursive is true,
+ * recursively include childrens' children.
+ */
+ function getChildren(ul, recursive) {
+ var children = [];
+ ul.children().children("[id^='cd']")
+ .each(function() {
+ var comment = $(this).data('comment');
+ if (recursive)
+ comment.children = getChildren($(this).find('#cl' + comment.id), true);
+ children.push(comment);
+ });
+ return children;
+ }
+
+ /** Create a div to display a comment in. */
+ function createCommentDiv(comment) {
+ if (!comment.displayed && !opts.moderator) {
+ return $('<div class="moderate">Thank you! Your comment will show up '
+ + 'once it is has been approved by a moderator.</div>');
+ }
+ // Prettify the comment rating.
+ comment.pretty_rating = comment.rating + ' point' +
+ (comment.rating == 1 ? '' : 's');
+ // Make a class (for displaying not yet moderated comments differently)
+ comment.css_class = comment.displayed ? '' : ' moderate';
+ // Create a div for this comment.
+ var context = $.extend({}, opts, comment);
+ var div = $(renderTemplate(commentTemplate, context));
+
+ // If the user has voted on this comment, highlight the correct arrow.
+ if (comment.vote) {
+ var direction = (comment.vote == 1) ? 'u' : 'd';
+ div.find('#' + direction + 'v' + comment.id).hide();
+ div.find('#' + direction + 'u' + comment.id).show();
+ }
+
+ if (opts.moderator || comment.text != '[deleted]') {
+ div.find('a.reply').show();
+ if (comment.proposal_diff)
+ div.find('#sp' + comment.id).show();
+ if (opts.moderator && !comment.displayed)
+ div.find('#cm' + comment.id).show();
+ if (opts.moderator || (opts.username == comment.username))
+ div.find('#dc' + comment.id).show();
+ }
+ return div;
+ }
+
+ /**
+ * A simple template renderer. Placeholders such as <%id%> are replaced
+ * by context['id'] with items being escaped. Placeholders such as <#id#>
+ * are not escaped.
+ */
+ function renderTemplate(template, context) {
+ var esc = $(document.createElement('div'));
+
+ function handle(ph, escape) {
+ var cur = context;
+ $.each(ph.split('.'), function() {
+ cur = cur[this];
+ });
+ return escape ? esc.text(cur || "").html() : cur;
+ }
+
+ return template.replace(/<([%#])([\w\.]*)\1>/g, function() {
+ return handle(arguments[2], arguments[1] == '%' ? true : false);
+ });
+ }
+
+ /** Flash an error message briefly. */
+ function showError(message) {
+ $(document.createElement('div')).attr({'class': 'popup-error'})
+ .append($(document.createElement('div'))
+ .attr({'class': 'error-message'}).text(message))
+ .appendTo('body')
+ .fadeIn("slow")
+ .delay(2000)
+ .fadeOut("slow");
+ }
+
+ /** Add a link the user uses to open the comments popup. */
+ $.fn.comment = function() {
+ return this.each(function() {
+ var id = $(this).attr('id').substring(1);
+ var count = COMMENT_METADATA[id];
+ var title = count + ' comment' + (count == 1 ? '' : 's');
+ var image = count > 0 ? opts.commentBrightImage : opts.commentImage;
+ var addcls = count == 0 ? ' nocomment' : '';
+ $(this)
+ .append(
+ $(document.createElement('a')).attr({
+ href: '#',
+ 'class': 'sphinx-comment-open' + addcls,
+ id: 'ao' + id
+ })
+ .append($(document.createElement('img')).attr({
+ src: image,
+ alt: 'comment',
+ title: title
+ }))
+ .click(function(event) {
+ event.preventDefault();
+ show($(this).attr('id').substring(2));
+ })
+ )
+ .append(
+ $(document.createElement('a')).attr({
+ href: '#',
+ 'class': 'sphinx-comment-close hidden',
+ id: 'ah' + id
+ })
+ .append($(document.createElement('img')).attr({
+ src: opts.closeCommentImage,
+ alt: 'close',
+ title: 'close'
+ }))
+ .click(function(event) {
+ event.preventDefault();
+ hide($(this).attr('id').substring(2));
+ })
+ );
+ });
+ };
+
+ var opts = {
+ processVoteURL: '/_process_vote',
+ addCommentURL: '/_add_comment',
+ getCommentsURL: '/_get_comments',
+ acceptCommentURL: '/_accept_comment',
+ deleteCommentURL: '/_delete_comment',
+ commentImage: '/static/_static/comment.png',
+ closeCommentImage: '/static/_static/comment-close.png',
+ loadingImage: '/static/_static/ajax-loader.gif',
+ commentBrightImage: '/static/_static/comment-bright.png',
+ upArrow: '/static/_static/up.png',
+ downArrow: '/static/_static/down.png',
+ upArrowPressed: '/static/_static/up-pressed.png',
+ downArrowPressed: '/static/_static/down-pressed.png',
+ voting: false,
+ moderator: false
+ };
+
+ if (typeof COMMENT_OPTIONS != "undefined") {
+ opts = jQuery.extend(opts, COMMENT_OPTIONS);
+ }
+
+ var popupTemplate = '\
+ <div class="sphinx-comments" id="sc<%id%>">\
+ <p class="sort-options">\
+ Sort by:\
+ <a href="#" class="sort-option byrating">best rated</a>\
+ <a href="#" class="sort-option byascage">newest</a>\
+ <a href="#" class="sort-option byage">oldest</a>\
+ </p>\
+ <div class="comment-header">Comments</div>\
+ <div class="comment-loading" id="cn<%id%>">\
+ loading comments... <img src="<%loadingImage%>" alt="" /></div>\
+ <ul id="cl<%id%>" class="comment-ul"></ul>\
+ <div id="ca<%id%>">\
+ <p class="add-a-comment">Add a comment\
+ (<a href="#" class="comment-markup" id="ab<%id%>">markup</a>):</p>\
+ <div class="comment-markup-box" id="mb<%id%>">\
+ reStructured text markup: <i>*emph*</i>, <b>**strong**</b>, \
+ <tt>``code``</tt>, \
+ code blocks: <tt>::</tt> and an indented block after blank line</div>\
+ <form method="post" id="cf<%id%>" class="comment-form" action="">\
+ <textarea name="comment" cols="80"></textarea>\
+ <p class="propose-button">\
+ <a href="#" id="pc<%id%>" class="show-propose-change">\
+ Propose a change ▹\
+ </a>\
+ <a href="#" id="hc<%id%>" class="hide-propose-change">\
+ Propose a change ▿\
+ </a>\
+ </p>\
+ <textarea name="proposal" id="pt<%id%>" cols="80"\
+ spellcheck="false"></textarea>\
+ <input type="submit" value="Add comment" />\
+ <input type="hidden" name="node" value="<%id%>" />\
+ <input type="hidden" name="parent" value="" />\
+ </form>\
+ </div>\
+ </div>';
+
+ var commentTemplate = '\
+ <div id="cd<%id%>" class="sphinx-comment<%css_class%>">\
+ <div class="vote">\
+ <div class="arrow">\
+ <a href="#" id="uv<%id%>" class="vote" title="vote up">\
+ <img src="<%upArrow%>" />\
+ </a>\
+ <a href="#" id="uu<%id%>" class="un vote" title="vote up">\
+ <img src="<%upArrowPressed%>" />\
+ </a>\
+ </div>\
+ <div class="arrow">\
+ <a href="#" id="dv<%id%>" class="vote" title="vote down">\
+ <img src="<%downArrow%>" id="da<%id%>" />\
+ </a>\
+ <a href="#" id="du<%id%>" class="un vote" title="vote down">\
+ <img src="<%downArrowPressed%>" />\
+ </a>\
+ </div>\
+ </div>\
+ <div class="comment-content">\
+ <p class="tagline comment">\
+ <span class="user-id"><%username%></span>\
+ <span class="rating"><%pretty_rating%></span>\
+ <span class="delta"><%time.delta%></span>\
+ </p>\
+ <div class="comment-text comment"><#text#></div>\
+ <p class="comment-opts comment">\
+ <a href="#" class="reply hidden" id="rl<%id%>">reply ▹</a>\
+ <a href="#" class="close-reply" id="cr<%id%>">reply ▿</a>\
+ <a href="#" id="sp<%id%>" class="show-proposal">proposal ▹</a>\
+ <a href="#" id="hp<%id%>" class="hide-proposal">proposal ▿</a>\
+ <a href="#" id="dc<%id%>" class="delete-comment hidden">delete</a>\
+ <span id="cm<%id%>" class="moderation hidden">\
+ <a href="#" id="ac<%id%>" class="accept-comment">accept</a>\
+ </span>\
+ </p>\
+ <pre class="proposal" id="pr<%id%>">\
+<#proposal_diff#>\
+ </pre>\
+ <ul class="comment-children" id="cl<%id%>"></ul>\
+ </div>\
+ <div class="clearleft"></div>\
+ </div>\
+ </div>';
+
+ var replyTemplate = '\
+ <li>\
+ <div class="reply-div" id="rd<%id%>">\
+ <form id="rf<%id%>">\
+ <textarea name="comment" cols="80"></textarea>\
+ <input type="submit" value="Add reply" />\
+ <input type="button" value="Cancel" />\
+ <input type="hidden" name="parent" value="<%id%>" />\
+ <input type="hidden" name="node" value="" />\
+ </form>\
+ </div>\
+ </li>';
+
+ $(document).ready(function() {
+ init();
+ });
+})(jQuery);
+
+$(document).ready(function() {
+ // add comment anchors for all paragraphs that are commentable
+ $('.sphinx-has-comment').comment();
+
+ // highlight search words in search results
+ $("div.context").each(function() {
+ var params = $.getQueryParameters();
+ var terms = (params.q) ? params.q[0].split(/\s+/) : [];
+ var result = $(this);
+ $.each(terms, function() {
+ result.highlightText(this.toLowerCase(), 'highlighted');
+ });
+ });
+
+ // directly open comment window if requested
+ var anchor = document.location.hash;
+ if (anchor.substring(0, 9) == '#comment-') {
+ $('#ao' + anchor.substring(9)).click();
+ document.location.hash = '#s' + anchor.substring(9);
+ }
+});
diff --git a/doc/_build/html/cheatsheet.html b/doc/_build/html/cheatsheet.html
new file mode 100644
index 0000000..0d48a19
--- /dev/null
+++ b/doc/_build/html/cheatsheet.html
@@ -0,0 +1,283 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Cheat Sheet — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Pipeline functions" href="pipeline_functions.html" />
+ <link rel="prev" title="Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...)" href="tutorials/new_tutorial/flowchart_colours_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="pipeline_functions.html" title="Pipeline functions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="tutorials/new_tutorial/flowchart_colours_code.html" title="Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...)"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="#">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="cheat-sheet">
+<span id="id1"></span><h1>Cheat Sheet<a class="headerlink" href="#cheat-sheet" title="Permalink to this headline">¶</a></h1>
+<p>The <tt class="docutils literal"><span class="pre">ruffus</span></tt> module is a lightweight way to add support
+for running computational pipelines.</p>
+<div class="line-block">
+<div class="line">Each stage or <strong>task</strong> in a computational pipeline is represented by a python function</div>
+<div class="line">Each python function can be called in parallel to run multiple <strong>jobs</strong>.</div>
+</div>
+<div class="section" id="annotate-functions-with-ruffus-decorators">
+<h2>1. Annotate functions with <strong>Ruffus</strong> decorators<a class="headerlink" href="#annotate-functions-with-ruffus-decorators" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="core">
+<h3>Core<a class="headerlink" href="#core" title="Permalink to this headline">¶</a></h3>
+<table border="1" class="docutils">
+<colgroup>
+<col width="14%" />
+<col width="86%" />
+<col width="0%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Decorator</th>
+<th class="head">Syntax</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>@originate (<a class="reference internal" href="tutorials/new_tutorial/originate.html#new-manual-originate"><em>Manual</em></a>)</td>
+<td><a class="reference internal" href="decorators/originate.html#decorators-originate"><em>@originate</em></a> ( <tt class="docutils literal"><span class="pre">output_files</span></tt>, [<tt class="docutils literal"><span class="pre">extra_parameters</span></tt>,...] )</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td>@split (<a class="reference internal" href="tutorials/new_tutorial/split.html#new-manual-split"><em>Manual</em></a>)</td>
+<td><a class="reference internal" href="decorators/split.html#decorators-split"><em>@split</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <tt class="docutils literal"><span class="pre">output_files</span></tt>, [<tt class="docutils literal"><span class="pre">extra_parameters</span></tt>,...] )</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td>@transform (<a class="reference internal" href="tutorials/new_tutorial/transform.html#new-manual-transform"><em>Manual</em></a>)</td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="decorators/transform.html#decorators-transform"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="decorators/transform_ex.html#decorators-transform-suffix-string"><em>suffix</em></a><em>(</em><tt class="docutils literal"><span class="pre">suffix_string</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</spa [...]
+<div class="line"><a class="reference internal" href="decorators/transform.html#decorators-transform"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="decorators/transform_ex.html#decorators-transform-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</spa [...]
+</div>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td>@merge (<a class="reference internal" href="tutorials/new_tutorial/merge.html#new-manual-merge"><em>Manual</em></a>)</td>
+<td><a class="reference internal" href="decorators/merge.html#decorators-merge"><em>@merge</em></a> (<tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <tt class="docutils literal"><span class="pre">output</span></tt>, [<tt class="docutils literal"><span class="pre">extra_parameters</span></tt>,...] )</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td>@posttask (<a class="reference internal" href="tutorials/new_tutorial/posttask.html#new-manual-posttask"><em>Manual</em></a>)</td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> ( <tt class="docutils literal"><span class="pre">signal_task_completion_function</span></tt> )</div>
+<div class="line"><a class="reference internal" href="decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> (<a class="reference internal" href="decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a>( <tt class="docutils literal"><span class="pre">'task1.completed'</span></tt> ))</div>
+</div>
+</td>
+<td> </td>
+</tr>
+</tbody>
+</table>
+</div>
+<div class="section" id="see-decorators-for-a-complete-list-of-decorators">
+<h3>See <a class="reference internal" href="decorators/decorators.html#decorators"><em>Decorators</em></a> for a complete list of decorators<a class="headerlink" href="#see-decorators-for-a-complete-list-of-decorators" title="Permalink to this headline">¶</a></h3>
+</div>
+</div>
+<div class="section" id="print-dependency-graph-if-necessary">
+<h2>2. Print dependency graph if necessary<a class="headerlink" href="#print-dependency-graph-if-necessary" title="Permalink to this headline">¶</a></h2>
+<ul>
+<li><p class="first">For a graphical flowchart in <tt class="docutils literal"><span class="pre">jpg</span></tt>, <tt class="docutils literal"><span class="pre">svg</span></tt>, <tt class="docutils literal"><span class="pre">dot</span></tt>, <tt class="docutils literal"><span class="pre">png</span></tt>, <tt class="docutils literal"><span class="pre">ps</span></tt>, <tt class="docutils literal"><span class="pre">gif</span></tt> formats:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="s">"flowchart.svg"</span><span class="p">)</span>
+</pre></div>
+</div>
+</li>
+</ul>
+<ul>
+<li><p class="first">For a text printout of all jobs</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout</span><span class="p">()</span>
+</pre></div>
+</div>
+</li>
+</ul>
+</div>
+<div class="section" id="run-the-pipeline">
+<h2>3. Run the pipeline<a class="headerlink" href="#run-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="n">N_PARALLEL_JOBS</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Cheat Sheet</a><ul>
+<li><a class="reference internal" href="#annotate-functions-with-ruffus-decorators">1. Annotate functions with <strong>Ruffus</strong> decorators</a><ul>
+<li><a class="reference internal" href="#core">Core</a></li>
+<li><a class="reference internal" href="#see-decorators-for-a-complete-list-of-decorators">See <tt class="docutils literal"><span class="pre">Decorators</span></tt> for a complete list of decorators</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#print-dependency-graph-if-necessary">2. Print dependency graph if necessary</a></li>
+<li><a class="reference internal" href="#run-the-pipeline">3. Run the pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="tutorials/new_tutorial/flowchart_colours_code.html"
+ title="previous chapter"><strong>Appendix 1</strong>: Python code for Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="pipeline_functions.html"
+ title="next chapter">Pipeline functions</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/cheatsheet.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="pipeline_functions.html" title="Pipeline functions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="tutorials/new_tutorial/flowchart_colours_code.html" title="Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...)"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="#">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/contents.html b/doc/_build/html/contents.html
new file mode 100644
index 0000000..abb1d29
--- /dev/null
+++ b/doc/_build/html/contents.html
@@ -0,0 +1,691 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus documentation — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Installation" href="installation.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="installation.html" title="Installation"
+ accesskey="N">next</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="#">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-documentation">
+<h1><strong>Ruffus</strong> documentation<a class="headerlink" href="#ruffus-documentation" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="start-here">
+<h2>Start Here:<a class="headerlink" href="#start-here" title="Permalink to this headline">¶</a></h2>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="installation.html#the-easy-way">The easy way</a></li>
+<li class="toctree-l2"><a class="reference internal" href="installation.html#the-most-up-to-date-code">The most up-to-date code:</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/manual_contents.html"><strong>Ruffus</strong> Manual: List of Chapters and Example code</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/introduction.html"><strong>Chapter 1</strong>: An introduction to basic <em>Ruffus</em> syntax</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/introduction.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/introduction.html#importing-ruffus">Importing <em>Ruffus</em></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/introduction.html#ruffus-decorators"><em>Ruffus</em> decorators</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/introduction.html#your-first-ruffus-pipeline">Your first <em>Ruffus</em> pipeline</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/transform.html"><strong>Chapter 2</strong>: Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform.html#review">Review</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform.html#task-functions-as-recipes">Task functions as recipes</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform.html#transform-is-a-1-to-1-operation"><tt class="docutils literal"><span class="pre">@transform</span></tt> is a 1 to 1 operation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform.html#input-and-output-parameters"><strong>Input</strong> and <strong>Output</strong> parameters</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html"><strong>Chapter 3</strong>: More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#review">Review</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#running-pipelines-in-parallel">Running pipelines in parallel</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#up-to-date-jobs-are-not-re-run-unnecessarily">Up-to-date jobs are not re-run unnecessarily</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#defining-pipeline-tasks-out-of-order">Defining pipeline tasks out of order</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#multiple-dependencies">Multiple dependencies</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#follows"><tt class="docutils literal"><span class="pre">@follows</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#making-directories-automatically-with-follows-and-mkdir">Making directories automatically with <tt class="docutils literal"><span class="pre">@follows</span></tt> and <tt class="docutils literal"><span class="pre">mkdir</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#globs-in-the-input-parameter">Globs in the <strong>Input</strong> parameter</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel.html#mixing-tasks-and-globs-in-the-input-parameter">Mixing Tasks and Globs in the <strong>Input</strong> parameter</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/originate.html"><strong>Chapter 4</strong>: Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/originate.html#simplifying-our-example-with-originate">Simplifying our example with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout.html"><strong>Chapter 5</strong>: Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout.html#printing-out-which-jobs-will-be-run">Printing out which jobs will be run</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout.html#determining-which-jobs-are-out-of-date-or-not">Determining which jobs are out-of-date or not</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout.html#verbosity-levels">Verbosity levels</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout.html#abbreviating-long-file-paths-with-verbose-abbreviated-path">Abbreviating long file paths with <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout.html#getting-a-list-of-all-tasks-in-a-pipeline">Getting a list of all tasks in a pipeline</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/command_line.html"><strong>Chapter 6</strong>: Running <em>Ruffus</em> from the command line with ruffus.cmdline</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#template-for-argparse">Template for argparse</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#command-line-arguments">Command Line Arguments</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#logging">1) Logging</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#tracing-pipeline-progress">2) Tracing pipeline progress</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#printing-a-flowchart">3) Printing a flowchart</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#running-in-parallel-on-multiple-processors">4) Running in parallel on multiple processors</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#setup-checkpointing-so-that-ruffus-knows-which-files-are-out-of-date">5) Setup checkpointing so that <em>Ruffus</em> knows which files are out of date</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#skipping-specified-options">6) Skipping specified options</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#specifying-verbosity-and-abbreviating-long-paths">7) Specifying verbosity and abbreviating long paths</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#displaying-the-version">8) Displaying the version</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/command_line.html#template-for-optparse">Template for optparse</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph.html"><strong>Chapter 7</strong>: Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph.html#printing-out-a-flowchart-of-our-pipeline">Printing out a flowchart of our pipeline</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph.html#command-line-options-made-easier-with-ruffus-cmdline">Command line options made easier with <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph.html#horribly-complicated-pipelines">Horribly complicated pipelines!</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph.html#circular-dependency-errors-in-pipelines">Circular dependency errors in pipelines!</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph.html#graphviz-customising-the-appearance-of-each-task"><tt class="docutils literal"><span class="pre">@graphviz</span></tt>: Customising the appearance of each task</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/output_file_names.html"><strong>Chapter 8</strong>: Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#review">Review</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#a-different-file-name-suffix-for-each-pipeline-stage">A different file name <tt class="docutils literal"><span class="pre">suffix()</span></tt> for each pipeline stage</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#formatter-manipulates-pathnames-and-regular-expression"><tt class="docutils literal"><span class="pre">formatter()</span></tt> manipulates pathnames and regular expression</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#regex-manipulates-via-regular-expressions"><tt class="docutils literal"><span class="pre">regex()</span></tt> manipulates via regular expressions</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/mkdir.html"><strong>Chapter 9</strong>: Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/mkdir.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/mkdir.html#creating-directories-after-string-substitution-in-a-zoo">Creating directories after string substitution in a zoo...</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html"><strong>Chapter 10</strong>: Checkpointing: Interrupted Pipelines and Exceptions</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#interrupting-tasks">Interrupting tasks</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#checkpointing-only-log-completed-jobs">Checkpointing: only log completed jobs</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#do-not-share-the-same-checkpoint-file-across-for-multiple-pipelines">Do not share the same checkpoint file across for multiple pipelines!</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#setting-checkpoint-file-names">Setting checkpoint file names</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#useful-checkpoint-file-name-policies-default-ruffus-history-file">Useful checkpoint file name policies <tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#regenerating-the-checkpoint-file">Regenerating the checkpoint file</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#rules-for-determining-if-files-are-up-to-date">Rules for determining if files are up to date</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#missing-files-generate-exceptions">Missing files generate exceptions</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#caveats-coarse-timestamp-resolution">Caveats: Coarse Timestamp resolution</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#flag-files-checkpointing-for-the-paranoid">Flag files: Checkpointing for the paranoid</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html"><strong>Chapter 11</strong>: Pipeline topologies and a compendium of <em>Ruffus</em> decorators</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#transform"><tt class="docutils literal"><span class="pre">@transform</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#a-bestiary-of-ruffus-decorators">A bestiary of <em>Ruffus</em> decorators</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#originate"><tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#merge"><tt class="docutils literal"><span class="pre">@merge</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#split"><tt class="docutils literal"><span class="pre">@split</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#subdivide"><tt class="docutils literal"><span class="pre">@subdivide</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#collate"><tt class="docutils literal"><span class="pre">@collate</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#combinatorics">Combinatorics</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#product"><tt class="docutils literal"><span class="pre">@product</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#combinations"><tt class="docutils literal"><span class="pre">@combinations</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#combinations-with-replacement"><tt class="docutils literal"><span class="pre">@combinations_with_replacement</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/decorators_compendium.html#permutations"><tt class="docutils literal"><span class="pre">@permutations</span></tt></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/split.html"><strong>Chapter 12</strong>: Splitting up large tasks / files with <strong>@split</strong></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#example-calculate-variance-for-a-large-list-of-numbers-in-parallel">Example: Calculate variance for a large list of numbers in parallel</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#output-files-for-split">Output files for <tt class="docutils literal"><span class="pre">@split</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#be-careful-in-specifying-output-globs">Be careful in specifying <strong>Output</strong> globs</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#clean-up-previous-pipeline-runs">Clean up previous pipeline runs</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#to-many">1 to many</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/split.html#nothing-to-many">Nothing to many</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/merge.html"><strong>Chapter 13</strong>: <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/merge.html#overview-of-merge">Overview of <tt class="docutils literal"><span class="pre">@merge</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/merge.html#merge-is-a-many-to-one-operator"><tt class="docutils literal"><span class="pre">@merge</span></tt> is a many to one operator</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/merge.html#example-combining-partial-solutions-calculating-variances">Example: Combining partial solutions: Calculating variances</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html"><strong>Chapter 14</strong>: Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#restricting-parallelism-with-jobs-limit">Restricting parallelism with <tt class="docutils literal"><span class="pre">@jobs_limit</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#using-drmaa-to-dispatch-work-to-computational-clusters-or-grid-engines-from-ruffus-jobs">Using <tt class="docutils literal"><span class="pre">drmaa</span></tt> to dispatch work to Computational Clusters or Grid engines from Ruffus jobs</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#forcing-a-pipeline-to-appear-up-to-date">Forcing a pipeline to appear up to date</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/logging.html"><strong>Chapter 15</strong>: Logging progress through a pipeline</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/logging.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/logging.html#logging-task-job-completion">Logging task/job completion</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/logging.html#use-ruffus-cmdline">Use <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/logging.html#customising-logging">Customising logging</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/logging.html#log-your-own-messages">Log your own messages</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html"><strong>Chapter 16</strong>: <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#subdivide-in-parallel"><tt class="docutils literal"><span class="pre">@subdivide</span></tt> in parallel</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#grouping-using-collate">Grouping using <tt class="docutils literal"><span class="pre">@collate</span></tt></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html"><strong>Chapter 17</strong>: <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#generating-output-with-formatter">Generating output with <tt class="docutils literal"><span class="pre">formatter()</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#all-vs-all-comparisons-with-product">All vs all comparisons with <tt class="docutils literal"><span class="pre">@product</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#permute-all-k-tuple-orderings-of-inputs-without-repeats-using-permutations">Permute all k-tuple orderings of inputs without repeats using <tt class="docutils literal"><span class="pre">@permutations</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#select-unordered-k-tuples-within-inputs-excluding-repeated-elements-using-combinations">Select unordered k-tuples within inputs excluding repeated elements using <tt class="docutils literal"><span class="pre">@combinations</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#select-unordered-k-tuples-within-inputs-including-repeated-elements-with-combinations-with-replacement">Select unordered k-tuples within inputs <em>including</em> repeated elements with <tt class="docutils literal"><span class="pre">@combinations_with_replacement</span></tt></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/active_if.html"><strong>Chapter 18</strong>: Turning parts of the pipeline on and off at runtime with <tt class="docutils literal"><span class="pre">@active_if</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/active_if.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/active_if.html#active-if-controls-the-state-of-tasks"><tt class="docutils literal"><span class="pre">@active_if</span></tt> controls the state of tasks</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/posttask.html"><strong>Chapter 19</strong>: Signal the completion of each stage of our pipeline with <tt class="docutils literal"><span class="pre">@posttask</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/posttask.html#overview">Overview</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/inputs.html"><strong>Chapter 20</strong>: Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/inputs.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/inputs.html#adding-additional-input-prerequisites-per-job-with-add-inputs">Adding additional <em>input</em> prerequisites per job with <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/inputs.html#replacing-all-input-parameters-with-inputs">Replacing all input parameters with <tt class="docutils literal"><span class="pre">inputs()</span></tt></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/onthefly.html"><strong>Chapter 21</strong>: Esoteric: Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/onthefly.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/onthefly.html#files-syntax"><tt class="docutils literal"><span class="pre">@files</span></tt> syntax</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/onthefly.html#a-cartesian-product-all-vs-all-example">A Cartesian Product, all vs all example</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/parallel.html"><strong>Chapter 22</strong>: Esoteric: Running jobs in parallel without files using <tt class="docutils literal"><span class="pre">@parallel</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/parallel.html#parallel"><strong>@parallel</strong></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/check_if_uptodate.html"><strong>Chapter 23</strong>: Esoteric: Writing custom functions to decide which jobs are up to date with <tt class="docutils literal"><span class="pre">@check_if_uptodate</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/check_if_uptodate.html#check-if-uptodate-manual-dependency-checking"><strong>@check_if_uptodate</strong> : Manual dependency checking</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/flowchart_colours.html"><strong>Appendix 1</strong>: Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/flowchart_colours.html#flowchart-colours">Flowchart colours</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/dependencies.html"><strong>Appendix 2</strong>: How dependency is checked</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/dependencies.html#overview">Overview</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/exceptions.html"><strong>Appendix 3</strong>: Exceptions thrown inside pipelines</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/exceptions.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/exceptions.html#pipelines-running-in-parallel-accumulate-exceptions">Pipelines running in parallel accumulate Exceptions</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/exceptions.html#terminate-pipeline-immediately-upon-exceptions">Terminate pipeline immediately upon Exceptions</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/exceptions.html#display-exceptions-as-they-occur">Display exceptions as they occur</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/list_of_ruffus_names.html"><strong>Appendix 4</strong>: Names exported from Ruffus</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/list_of_ruffus_names.html#ruffus-names">Ruffus Names</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/deprecated_files.html"><strong>Appendix 5</strong>: <strong>@files</strong>: Deprecated syntax</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/deprecated_files.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/deprecated_files.html#files"><strong>@files</strong></a></li>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/deprecated_files.html#running-the-same-code-on-different-parameters-in-parallel">Running the same code on different parameters in parallel</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/deprecated_files_re.html"><strong>Appendix 6</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a><ul>
+<li class="toctree-l2"><a class="reference internal" href="tutorials/new_tutorial/deprecated_files_re.html#overview">Overview</a></li>
+</ul>
+</li>
+</ul>
+</div>
+<p>Example code for:</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/introduction_code.html"><strong>Chapter 1</strong>: Python Code for An introduction to basic Ruffus syntax</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/transform_code.html"><strong>Chapter 1</strong>: Python Code for Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/transform_in_parallel_code.html"><strong>Chapter 3</strong>: Python Code for More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/originate_code.html"><strong>Chapter 4</strong>: Python Code for Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_code.html"><strong>Chapter 5</strong>: Python Code for Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/pipeline_printout_graph_code.html"><strong>Chapter 7</strong>: Python Code for Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/output_file_names_code.html"><strong>Chapter 8</strong>: Python Code for Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/mkdir_code.html"><strong>Chapter 9</strong>: Python Code for Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/checkpointing_code.html"><strong>Chapter 10</strong>: Python Code for Checkpointing: Interrupted Pipelines and Exceptions</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/split_code.html"><strong>Chapter 12</strong>: Python Code for Splitting up large tasks / files with <strong>@split</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/merge_code.html"><strong>Chapter 13</strong>: Python Code for <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/multiprocessing_code.html"><strong>Chapter 14</strong>: Python Code for Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/logging_code.html"><strong>Chapter 15</strong>: Python Code for Logging progress through a pipeline</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate_code.html"><strong>Chapter 16</strong>: Python Code for <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/combinatorics_code.html"><strong>Chapter 17</strong>: Python Code for <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/inputs_code.html"><strong>Chapter 20</strong>: Python Code for Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/onthefly_code.html"><strong>Chapter 21</strong>: Esoteric: Python Code for Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorials/new_tutorial/flowchart_colours_code.html"><strong>Appendix 1</strong>: Python code for Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></li>
+</ul>
+</div>
+</div>
+<div class="section" id="overview">
+<h2>Overview:<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="cheatsheet.html">Cheat Sheet</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="cheatsheet.html#annotate-functions-with-ruffus-decorators">1. Annotate functions with <strong>Ruffus</strong> decorators</a></li>
+<li class="toctree-l2"><a class="reference internal" href="cheatsheet.html#print-dependency-graph-if-necessary">2. Print dependency graph if necessary</a></li>
+<li class="toctree-l2"><a class="reference internal" href="cheatsheet.html#run-the-pipeline">3. Run the pipeline</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="pipeline_functions.html">Pipeline functions</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="pipeline_functions.html#index-0"><em>pipeline_run</em></a></li>
+<li class="toctree-l2"><a class="reference internal" href="pipeline_functions.html#index-1"><em>pipeline_printout</em></a></li>
+<li class="toctree-l2"><a class="reference internal" href="pipeline_functions.html#index-2"><em>pipeline_printout_graph</em></a></li>
+<li class="toctree-l2"><a class="reference internal" href="pipeline_functions.html#index-3"><em>pipeline_get_task_names</em></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="drmaa_wrapper_functions.html">drmaa functions</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="drmaa_wrapper_functions.html#index-0"><em>run_job</em></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="installation.html#the-easy-way">The easy way</a></li>
+<li class="toctree-l2"><a class="reference internal" href="installation.html#the-most-up-to-date-code">The most up-to-date code:</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="design.html">Design & Architecture</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="design.html#gnu-make"><cite>GNU Make</cite></a></li>
+<li class="toctree-l2"><a class="reference internal" href="design.html#scons-rake-and-other-make-alternatives"><cite>Scons</cite>, <cite>Rake</cite> and other <cite>Make</cite> alternatives</a></li>
+<li class="toctree-l2"><a class="reference internal" href="design.html#managing-pipelines-stage-by-stage-using-ruffus">Managing pipelines stage-by-stage using <strong>Ruffus</strong></a></li>
+<li class="toctree-l2"><a class="reference internal" href="design.html#alternatives-to-ruffus">Alternatives to <strong>Ruffus</strong></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="history.html">Major Features added to Ruffus</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-5rc">version 2.5RC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-4-1">version 2.4.1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-4">version 2.4</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-3">version 2.3</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-2">version 2.2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-1-1">version 2.1.1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-1-0">version 2.1.0</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-0-10">version 2.0.10</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-0-9">version 2.0.9</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-0-8">version 2.0.8</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-0-2">version 2.0.2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-2-0">version 2.0</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-1-1-4">version 1.1.4</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-1-0-7">version 1.0.7</a></li>
+<li class="toctree-l2"><a class="reference internal" href="history.html#version-1-0">version 1.0</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="history.html#fixed-bugs">Fixed Bugs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="todo.html">Future Changes to Ruffus</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-pipeline-printout-graph-should-print-inactive-tasks">Todo: pipeline_printout_graph should print inactive tasks</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-mark-input-strings-as-non-file-names-and-add-support-for-dynamically-returned-parameters">Todo: Mark input strings as non-file names, and add support for dynamically returned parameters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-allow-extra-parameters-to-be-used-in-output-substitution">Todo: Allow “extra” parameters to be used in output substitution</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-extra-signalling-before-and-after-each-task-and-job">Todo: Extra signalling before and after each task and job</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-split-subdivide-returns-the-actual-output-created">Todo: <tt class="docutils literal"><span class="pre">@split</span></tt> / <tt class="docutils literal"><span class="pre">@subdivide</span></tt> returns the actual output created</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#id1">Todo: New decorators</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-named-parameters-in-decorators-for-clarity">Todo: Named parameters in decorators for clarity</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-bioinformatics-example-to-end-all-examples">Todo: Bioinformatics example to end all examples</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#todo-allow-the-next-task-to-start-before-all-jobs-in-the-previous-task-have-finished">Todo: Allow the next task to start before all jobs in the previous task have finished</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="todo.html#planned-improvements-to-ruffus">Planned Improvements to Ruffus</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-running-python-code-task-functions-transparently-on-remote-cluster-nodes">Planned: Running python code (task functions) transparently on remote cluster nodes</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-custom-parameter-generator">Planned: Custom parameter generator</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-ruffus-gui-interface">Planned: Ruffus GUI interface.</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-non-decorator-function-interface-to-ruffus">Planned: Non-decorator / Function interface to Ruffus</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-remove-intermediate-files">Planned: Remove intermediate files</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-retry-on-error-num-of-retries">Planned: @retry_on_error(NUM_OF_RETRIES)</a></li>
+<li class="toctree-l2"><a class="reference internal" href="todo.html#planned-clean-up">Planned: Clean up</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="implementation_notes.html">Implementation Tips</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#release">Release</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#dbdict-py">dbdict.py</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#how-to-write-new-decorators">how to write new decorators</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="implementation_notes.html#implementation-notes">Implementation notes</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#ctrl-c-handling"><tt class="docutils literal"><span class="pre">Ctrl-C</span></tt> handling</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#python3-compatability">Python3 compatability</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#refactoring-parameter-handling">Refactoring: parameter handling</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#formatter"><tt class="docutils literal"><span class="pre">formatter</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#product">@product()</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#permutations-combinations-combinations-with-replacement"><tt class="docutils literal"><span class="pre">@permutations(...),</span></tt> <tt class="docutils literal"><span class="pre">@combinations(...),</span></tt> <tt class="docutils literal"><span class="pre">@combinations_with_replacement(...)</span></tt></a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#drmaa-alternatives">drmaa alternatives</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#task-completion-monitoring">Task completion monitoring</a></li>
+<li class="toctree-l2"><a class="reference internal" href="implementation_notes.html#mkdir"><tt class="docutils literal"><span class="pre">@mkdir(...),</span></tt></a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="faq.html#citations">Citations</a></li>
+<li class="toctree-l2"><a class="reference internal" href="faq.html#good-practices">Good practices</a></li>
+<li class="toctree-l2"><a class="reference internal" href="faq.html#general">General</a></li>
+<li class="toctree-l2"><a class="reference internal" href="faq.html#windows">Windows</a></li>
+<li class="toctree-l2"><a class="reference internal" href="faq.html#sun-grid-engine-pbs-slurm-etc">Sun Grid Engine / PBS / SLURM etc</a></li>
+<li class="toctree-l2"><a class="reference internal" href="faq.html#sharing-python-objects-between-ruffus-processes-running-concurrently">Sharing python objects between Ruffus processes running concurrently</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="glossary.html">Glossary</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gallery.html">Hall of Fame: User contributed flowcharts</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="gallery.html#rnaseq-pipeline">RNASeq pipeline</a></li>
+<li class="toctree-l2"><a class="reference internal" href="gallery.html#non-coding-evolutionary-constraints">non-coding evolutionary constraints</a></li>
+<li class="toctree-l2"><a class="reference internal" href="gallery.html#snp-annotation">SNP annotation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="gallery.html#chip-seq-analysis">Chip-Seq analysis</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="why_ruffus.html">Why <em>Ruffus</em>?</a></li>
+</ul>
+</div>
+</div>
+<div class="section" id="examples">
+<h2>Examples<a class="headerlink" href="#examples" title="Permalink to this headline">¶</a></h2>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="examples/bioinformatics/index.html">Construction of a simple pipeline to run BLAST jobs</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#prerequisites">Prerequisites</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#code">Code</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#step-1-splitting-up-the-query-sequences">Step 1. Splitting up the query sequences</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#step-2-run-blast-jobs-in-parallel">Step 2. Run BLAST jobs in parallel</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#step-3-combining-blast-results">Step 3. Combining BLAST results</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#step-4-running-the-pipeline">Step 4. Running the pipeline</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#step-5-testing-dependencies">Step 5. Testing dependencies</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/index.html#what-is-next">What is next?</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="examples/bioinformatics/part2.html">Part 2: A slightly more practical pipeline to run blasts jobs</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#overview">Overview</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#step-1-cleaning-up-any-leftover-junk-from-previous-pipeline-runs">Step 1. Cleaning up any leftover junk from previous pipeline runs</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#step-2-adding-a-flag-file-to-mark-successful-completion">Step 2. Adding a “flag” file to mark successful completion</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#step-3-allowing-the-script-to-be-invoked-on-the-command-line">Step 3. Allowing the script to be invoked on the command line</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#step-4-printing-out-a-flowchart-for-the-pipeline">Step 4. Printing out a flowchart for the pipeline</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#step-5-errors">Step 5. Errors</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/bioinformatics/part2.html#step-6-will-it-run">Step 6. Will it run?</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="examples/bioinformatics/part1_code.html">Ruffus code</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/bioinformatics/part2_code.html">Ruffus code</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/paired_end_data.py.html">Example code for <tt class="docutils literal"><span class="pre">FAQ</span> <span class="pre">Good</span> <span class="pre">practices:</span> <span class="pre">"What</span> <span class="pre">is</span> <span class="pre">the</span> <span class="pre">best</span> <span class="pre">way</span> <span class="pre">of</span> <span class="pre">handling</span> <span class="pre">data</span> <span class="p [...]
+</ul>
+</div>
+</div>
+<div class="section" id="reference">
+<h2>Reference:<a class="headerlink" href="#reference" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="decorators">
+<h3>Decorators<a class="headerlink" href="#decorators" title="Permalink to this headline">¶</a></h3>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="decorators/decorators.html">Ruffus Decorators</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/indicator_objects.html">Indicator Objects</a></li>
+</ul>
+</div>
+<div class="topic">
+<p class="topic-title first">Core</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="decorators/originate.html">@originate</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/split.html">@split</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/split.html#split-with-regex-add-inputs-and-inputs">@split with <tt class="docutils literal"><span class="pre">regex(...)</span></tt>, <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/transform.html">@transform</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/merge.html">@merge</a></li>
+</ul>
+</div>
+</div>
+<div class="topic">
+<p class="topic-title first">For advanced users</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="decorators/subdivide.html">@subdivide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/transform_ex.html">@transform with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/collate.html">@collate</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/collate_ex.html">@collate with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/graphviz.html">@graphviz</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/mkdir.html">@mkdir</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/jobs_limit.html">@jobs_limit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/posttask.html">@posttask</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/active_if.html">@active_if</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/follows.html">@follows</a></li>
+</ul>
+</div>
+</div>
+<div class="topic">
+<p class="topic-title first">Combinatorics</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="decorators/product.html">@product</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/permutations.html">@permutations</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/combinations.html">@combinations</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/combinations_with_replacement.html">@combinations_with_replacement</a></li>
+</ul>
+</div>
+</div>
+<div class="topic">
+<p class="topic-title first">Esoteric</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="decorators/files_ex.html">Generating parameters on the fly for @files</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/check_if_uptodate.html">@check_if_uptodate</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/parallel.html">@parallel</a></li>
+</ul>
+</div>
+</div>
+<div class="topic">
+<p class="topic-title first">Deprecated</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="decorators/files.html">@files</a></li>
+<li class="toctree-l1"><a class="reference internal" href="decorators/files_re.html">@files_re</a></li>
+</ul>
+</div>
+</div>
+</div>
+<div class="section" id="modules">
+<h3>Modules:<a class="headerlink" href="#modules" title="Permalink to this headline">¶</a></h3>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="task.html">ruffus.Task</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="task.html#decorators">Decorators</a></li>
+<li class="toctree-l2"><a class="reference internal" href="task.html#pipeline-functions">Pipeline functions</a></li>
+<li class="toctree-l2"><a class="reference internal" href="task.html#id1">Logging</a></li>
+<li class="toctree-l2"><a class="reference internal" href="task.html#implementation">Implementation:</a></li>
+<li class="toctree-l2"><a class="reference internal" href="task.html#exceptions-and-errors">Exceptions and Errors</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="proxy_logger.html">ruffus.proxy_logger</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="proxy_logger.html#create-proxy-for-logging-for-use-with-multiprocessing">Create proxy for logging for use with multiprocessing</a></li>
+<li class="toctree-l2"><a class="reference internal" href="proxy_logger.html#proxies-for-a-log">Proxies for a log:</a></li>
+<li class="toctree-l2"><a class="reference internal" href="proxy_logger.html#create-a-logging-object">Create a logging object</a></li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+</div>
+<div class="section" id="indices-and-tables">
+<h2>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Permalink to this headline">¶</a></h2>
+<ul class="simple">
+<li><a class="reference internal" href="genindex.html"><em>Index</em></a></li>
+<li><a class="reference internal" href="py-modindex.html"><em>Module Index</em></a></li>
+<li><a class="reference internal" href="search.html"><em>Search Page</em></a></li>
+</ul>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="#">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Ruffus</strong> documentation</a><ul>
+<li><a class="reference internal" href="#start-here">Start Here:</a><ul>
+</ul>
+</li>
+<li><a class="reference internal" href="#overview">Overview:</a><ul>
+</ul>
+</li>
+<li><a class="reference internal" href="#examples">Examples</a><ul>
+</ul>
+</li>
+<li><a class="reference internal" href="#reference">Reference:</a><ul>
+<li><a class="reference internal" href="#decorators">Decorators</a><ul>
+</ul>
+</li>
+<li><a class="reference internal" href="#modules">Modules:</a><ul>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#indices-and-tables">Indices and tables</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Next topic</h4>
+ <p class="topless"><a href="installation.html"
+ title="next chapter">Installation</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/contents.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="installation.html" title="Installation"
+ >next</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="#">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/active_if.html b/doc/_build/html/decorators/active_if.html
new file mode 100644
index 0000000..b7fa389
--- /dev/null
+++ b/doc/_build/html/decorators/active_if.html
@@ -0,0 +1,295 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@active_if — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@follows" href="follows.html" />
+ <link rel="prev" title="@posttask" href="posttask.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="follows.html" title="@follows"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="posttask.html" title="@posttask"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-active-if"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+<li>More on @active_if in the <tt class="docutils literal"><span class="pre">Ruffus</span></tt> <a class="reference internal" href="../tutorials/new_tutorial/active_if.html#new-manual-active-if"><em>Manual</em></a></li>
+</ul>
+</div>
+<div class="section" id="active-if">
+<h1>@active_if<a class="headerlink" href="#active-if" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="active-if-on-or-off1-on-or-off2">
+<h2><em>@active_if</em>(on_or_off1, [on_or_off2,...])<a class="headerlink" href="#active-if-on-or-off1-on-or-off2" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><ul class="simple">
+<li>Switches tasks on and off at run time depending on its parameters</li>
+<li>Evaluated each time <tt class="docutils literal"><span class="pre">pipeline_run</span></tt>, <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt> or <tt class="docutils literal"><span class="pre">pipeline_printout_graph</span></tt> is called.</li>
+<li>The Design and initial implementation were contributed by Jacob Biesinger</li>
+<li>Dormant tasks behave as if they are up to date and have no output.</li>
+</ul>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">run_if_true_1</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">False</span>
+<span class="n">run_if_true_3</span> <span class="o">=</span> <span class="bp">True</span>
+
+
+<span class="c">#</span>
+<span class="c"># task1</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">([</span><span class="s">'a.foo'</span><span class="p">,</span> <span class="s">'b.foo'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_files</span><span class="p">(</span><span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> create_files</span>
+<span class="sd"> """</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">outfile</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># Only runs if all three run_if_true conditions are met</span>
+<span class="c">#</span>
+<span class="hll"><span class="c"># @active_if determines if task is active</span>
+</span><span class="nd">@active_if</span><span class="p">(</span><span class="n">run_if_true_1</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">run_if_true_2</span><span class="p">)</span>
+<span class="nd">@active_if</span><span class="p">(</span><span class="n">run_if_true_3</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">this_task_might_be_inactive</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">infile</span> [...]
+
+
+<span class="c"># @active_if switches off task because run_if_true_2 == False</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="c"># @active_if switches on task because all run_if_true conditions are met</span>
+<span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Produces the following output:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="hll"><span class="gp">>>> </span><span class="c"># @active_if switches off task "this_task_might_be_inactive" because run_if_true_2 == False</span>
+</span><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go">Task enters queue = create_files</span>
+<span class="go">create_files</span>
+<span class="go"> Job = [None -> a.foo] Missing file [a.foo]</span>
+<span class="go"> Job = [None -> b.foo] Missing file [b.foo]</span>
+<span class="go"> Job = [None -> a.foo] completed</span>
+<span class="go"> Job = [None -> b.foo] completed</span>
+<span class="go">Completed Task = create_files</span>
+<span class="go">Inactive Task = this_task_might_be_inactive</span>
+
+<span class="hll"><span class="gp">>>> </span><span class="c"># @active_if switches on task "this_task_might_be_inactive" because all run_if_true conditions are met</span>
+</span><span class="gp">>>> </span><span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go">Task enters queue = this_task_might_be_inactive</span>
+
+<span class="go"> Job = [a.foo -> a.bar] Missing file [a.bar]</span>
+<span class="go"> Job = [b.foo -> b.bar] Missing file [b.bar]</span>
+<span class="go"> Job = [a.foo -> a.bar] completed</span>
+<span class="go"> Job = [b.foo -> b.bar] completed</span>
+<span class="go">Completed Task = this_task_might_be_inactive</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-active-if-on-or-off">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>on_or_off</em>:</dt>
+<dd><p class="first">A comma separated list of boolean conditions. These can be values, functions or callable objects which return True / False</p>
+<p class="last">Multiple <tt class="docutils literal"><span class="pre">@active_if</span></tt> decorators can be stacked for clarity as in the example</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@active_if</a><ul>
+<li><a class="reference internal" href="#active-if-on-or-off1-on-or-off2"><em>@active_if</em>(on_or_off1, [on_or_off2,...])</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="posttask.html"
+ title="previous chapter">@posttask</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="follows.html"
+ title="next chapter">@follows</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/active_if.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="#">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="follows.html" title="@follows"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="posttask.html" title="@posttask"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/check_if_uptodate.html b/doc/_build/html/decorators/check_if_uptodate.html
new file mode 100644
index 0000000..38e4bcb
--- /dev/null
+++ b/doc/_build/html/decorators/check_if_uptodate.html
@@ -0,0 +1,256 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@check_if_uptodate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@parallel" href="parallel.html" />
+ <link rel="prev" title="Generating parameters on the fly for @files" href="files_ex.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="parallel.html" title="@parallel"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="files_ex.html" title="Generating parameters on the fly for @files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-check-if-uptodate"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="check-if-uptodate">
+<h1>@check_if_uptodate<a class="headerlink" href="#check-if-uptodate" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="check-if-uptodate-dependency-checking-function">
+<h2><em>@check_if_uptodate</em> (<a class="reference internal" href="#decorators-check-if-uptodate-dependency-checking-function"><cite>dependency_checking_function</cite></a>)<a class="headerlink" href="#check-if-uptodate-dependency-checking-function" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Checks to see if a job is up to date, and needs to be run.</p>
+<p class="last">Usually used in conjunction with <a class="reference internal" href="parallel.html#decorators-parallel"><em>@parallel()</em></a></p>
+</dd>
+</dl>
+<p><strong>Example</strong>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="k">def</span> <span class="nf">check_file_exists</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">True</span><span class="p">,</span> <span class="s">"Missing file </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">output_file</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span><span class="p">,</span> <span class="s">"File </span><span class="si">%s</span><span class="s"> exists"</span> <span class="o">%</span> <span class="n">output_file</span>
+
+<span class="nd">@parallel</span><span class="p">([[</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">]])</span>
+<span class="nd">@check_if_uptodate</span><span class="p">(</span><span class="n">check_file_exists</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>Is equivalent to:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>Both produce the same output:</p>
+<div class="highlight-python"><pre>Task = create_if_necessary
+ Job = [null, "a.1"] completed</pre>
+</div>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-check-if-uptodate-dependency-checking-function">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>dependency_checking_function</em>:</dt>
+<dd><p class="first">returns two parameters: if job needs to be run, and a message explaining why</p>
+<p class="last">dependency_checking_func() needs to handle the same number of parameters as the
+task function e.g. <tt class="docutils literal"><span class="pre">input_file</span></tt> and <tt class="docutils literal"><span class="pre">output_file</span></tt> above.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@check_if_uptodate</a><ul>
+<li><a class="reference internal" href="#check-if-uptodate-dependency-checking-function"><em>@check_if_uptodate</em> (<cite>dependency_checking_function</cite>)</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="files_ex.html"
+ title="previous chapter">Generating parameters on the fly for @files</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="parallel.html"
+ title="next chapter">@parallel</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/check_if_uptodate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="#">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="parallel.html" title="@parallel"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="files_ex.html" title="Generating parameters on the fly for @files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/collate.html b/doc/_build/html/decorators/collate.html
new file mode 100644
index 0000000..3b3b5d7
--- /dev/null
+++ b/doc/_build/html/decorators/collate.html
@@ -0,0 +1,362 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@collate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@collate with add_inputs and inputs" href="collate_ex.html" />
+ <link rel="prev" title="@transform with add_inputs and inputs" href="transform_ex.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="collate_ex.html" title="@collate with add_inputs and inputs"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform_ex.html" title="@transform with add_inputs and inputs"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-collate"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="collate">
+<h1>@collate<a class="headerlink" href="#collate" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters">
+<h2><em>@collate</em> ( <a class="reference internal" href="#decorators-collate-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a><em>(</em><a class="reference internal" href="#decorators-collate-matching-regex"><cite>matching_regex</cite></a><em>)</em> | <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class="refe [...]
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Groups / collates sets of input files, each into a separate summary.</p>
+<p>Only out of date tasks (comparing input and output files) will be run</p>
+<p>Output file names and strings in the extra parameters
+are determined from <a class="reference internal" href="#decorators-collate-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names.</p>
+<p>String replacement occurs either through suffix matches via <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a> or
+the <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicators.</p>
+<p><tt class="docutils literal"><span class="pre">@collate</span></tt> groups together all <strong>Input</strong> which result in identical <strong>Output</strong> and <strong>extra</strong>
+parameters.</p>
+<p class="last">It is a <strong>many to fewer</strong> operation.</p>
+</dd>
+<dt><strong>Example</strong>:</dt>
+<dd><p class="first"><tt class="docutils literal"><span class="pre">regex(r".*(\..+)"),</span> <span class="pre">"\1.summary"</span></tt> creates a separate summary file for each suffix:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="n">animal_files</span> <span class="o">=</span> <span class="s">"a.fish"</span><span class="p">,</span> <span class="s">"b.fish"</span><span class="p">,</span> <span class="s">"c.mammals"</span><span class="p">,</span> <span class="s">"d.mammals"</span>
+<span class="c"># summarise by file suffix:</span>
+<span class="nd">@collate</span><span class="p">(</span><span class="n">animal_files</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"\.(.+)$"</span><span class="p">),</span> <span class="s">r'\1.summary'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">summarize</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">summary_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-collate-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first last">is a python regular expression string, which must be wrapped in
+a <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicator object
+See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Any extra parameters are passed verbatim to the task function</p>
+</dd>
+</dl>
+</li>
+</ul>
+<ol class="arabic simple">
+<li><em>outputs</em> and optional extra parameters are passed to the functions after string
+substitution in any strings. Non-string values are passed through unchanged.</li>
+<li>Each collate job consists of input files which are aggregated by string substitution
+to a single set of output / extra parameter matches</li>
+<li>In the above cases, <tt class="docutils literal"><span class="pre">a.fish</span></tt> and <tt class="docutils literal"><span class="pre">b.fish</span></tt> both produce <tt class="docutils literal"><span class="pre">fish.summary</span></tt> after regular
+expression subsitution, and are collated into a single job:
+<tt class="docutils literal"><span class="pre">["a.fish",</span> <span class="pre">"b.fish"</span> <span class="pre">-></span> <span class="pre">"fish.summary"]</span></tt>
+while <tt class="docutils literal"><span class="pre">c.mammals</span></tt>, <tt class="docutils literal"><span class="pre">d.mammals</span></tt> both produce <tt class="docutils literal"><span class="pre">mammals.summary</span></tt>, are collated in a separate job:
+<tt class="docutils literal"><span class="pre">["c.mammals",</span> <span class="pre">"d.mammals"</span> <span class="pre">-></span> <span class="pre">"mammals.summary"]</span></tt></li>
+</ol>
+<p><strong>Example2</strong>:</p>
+<blockquote>
+<div><p>Suppose we had the following files:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">cows</span><span class="o">.</span><span class="n">mammals</span><span class="o">.</span><span class="n">animal</span>
+<span class="n">horses</span><span class="o">.</span><span class="n">mammals</span><span class="o">.</span><span class="n">animal</span>
+<span class="n">sheep</span><span class="o">.</span><span class="n">mammals</span><span class="o">.</span><span class="n">animal</span>
+
+<span class="n">snake</span><span class="o">.</span><span class="n">reptile</span><span class="o">.</span><span class="n">animal</span>
+<span class="n">lizard</span><span class="o">.</span><span class="n">reptile</span><span class="o">.</span><span class="n">animal</span>
+<span class="n">crocodile</span><span class="o">.</span><span class="n">reptile</span><span class="o">.</span><span class="n">animal</span>
+
+<span class="n">pufferfish</span><span class="o">.</span><span class="n">fish</span><span class="o">.</span><span class="n">animal</span>
+</pre></div>
+</div>
+<p>and we wanted to end up with three different resulting output:</p>
+<div class="highlight-python"><pre>cow.mammals.animal
+horse.mammals.animal
+sheep.mammals.animal
+ -> mammals.results
+
+snake.reptile.animal
+lizard.reptile.animal
+crocodile.reptile.animal
+ -> reptile.results
+
+pufferfish.fish.animal
+ -> fish.results</pre>
+</div>
+<p>This is the <tt class="docutils literal"><span class="pre">@collate</span></tt> code required:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">animals</span> <span class="o">=</span> <span class="p">[</span> <span class="s">"cows.mammals.animal"</span><span class="p">,</span>
+ <span class="s">"horses.mammals.animal"</span><span class="p">,</span>
+ <span class="s">"sheep.mammals.animal"</span><span class="p">,</span>
+ <span class="s">"snake.reptile.animal"</span><span class="p">,</span>
+ <span class="s">"lizard.reptile.animal"</span><span class="p">,</span>
+ <span class="s">"crocodile.reptile.animal"</span><span class="p">,</span>
+ <span class="s">"pufferfish.fish.animal"</span><span class="p">]</span>
+
+<span class="nd">@collate</span><span class="p">(</span><span class="n">animals</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.+)\.(.+)\.animal"</span><span class="p">),</span> <span class="s">r"\2.results"</span><span class="p">)</span>
+<span class="c"># \1 = species [cow, horse]</span>
+<span class="c"># \2 = phylogenetics group [mammals, reptile, fish]</span>
+<span class="k">def</span> <span class="nf">summarize_animals_into_groups</span><span class="p">(</span><span class="n">species_file</span><span class="p">,</span> <span class="n">result_file</span><span class="p">):</span>
+ <span class="s">" ... more code here"</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p>See <a class="reference internal" href="merge.html#decorators-merge"><em>@merge</em></a> for an alternative way to summarise files.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@collate</a><ul>
+<li><a class="reference internal" href="#collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters"><em>@collate</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">regex</span></tt><em>(</em><cite>matching_regex</cite><em>)</em> | <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatter</cite><em>)</em>, <cite>output_pattern</cite>, [<cite>extra_para [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform_ex.html"
+ title="previous chapter">@transform with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="collate_ex.html"
+ title="next chapter">@collate with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/collate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="#">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="collate_ex.html" title="@collate with add_inputs and inputs"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform_ex.html" title="@transform with add_inputs and inputs"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/collate_ex.html b/doc/_build/html/decorators/collate_ex.html
new file mode 100644
index 0000000..082d734
--- /dev/null
+++ b/doc/_build/html/decorators/collate_ex.html
@@ -0,0 +1,363 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@collate with add_inputs and inputs — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@graphviz" href="graphviz.html" />
+ <link rel="prev" title="@collate" href="collate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="graphviz.html" title="@graphviz"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="collate.html" title="@collate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-collate-ex"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="collate-with-add-inputs-and-inputs">
+<h1>@collate with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt><a class="headerlink" href="#collate-with-add-inputs-and-inputs" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters">
+<h2><em>@collate</em> ( <a class="reference internal" href="#decorators-collate-ex-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a><em>(</em><a class="reference internal" href="#decorators-collate-ex-matching-regex"><cite>matching_regex</cite></a><em>)</em> | <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class [...]
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Groups / collates sets of input files, each into a separate summary.</p>
+<p>This variant of <tt class="docutils literal"><span class="pre">@collate</span></tt> allows additional inputs or dependencies to be added
+dynamically to the task.</p>
+<p>Output file names are determined from <a class="reference internal" href="#decorators-collate-ex-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names.</p>
+<p>This variant of <tt class="docutils literal"><span class="pre">@collate</span></tt> allows input file names to be derived in the same way.</p>
+<p><a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>add_inputs</em></a> nests the the original input parameters in a list before adding additional dependencies.</p>
+<p><a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> replaces the original input parameters wholescale.</p>
+<p class="last">Only out of date tasks (comparing input and output files) will be run</p>
+</dd>
+</dl>
+<p><strong>Example of</strong> <a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>add_inputs</em></a></p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">regex(r".*(\..+)"),</span> <span class="pre">"\1.summary"</span></tt> creates a separate summary file for each suffix.
+But we also add date of birth data for each species:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">animal_files</span> <span class="o">=</span> <span class="s">"tuna.fish"</span><span class="p">,</span> <span class="s">"shark.fish"</span><span class="p">,</span> <span class="s">"dog.mammals"</span><span class="p">,</span> <span class="s">"cat.mammals"</span>
+<span class="c"># summarise by file suffix:</span>
+<span class="nd">@collate</span><span class="p">(</span><span class="n">animal_files</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r".+\.(.+)$"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">r"\1.date_of_birth"</span><span class="p">),</span> <span class="s">r'\1.summary'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">summarize</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">summary_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This results in the following equivalent function calls:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">summarize</span><span class="p">([</span> <span class="p">[</span><span class="s">"shark.fish"</span><span class="p">,</span> <span class="s">"fish.date_of_birth"</span> <span class="p">],</span>
+ <span class="p">[</span><span class="s">"tuna.fish"</span><span class="p">,</span> <span class="s">"fish.date_of_birth"</span> <span class="p">]</span> <span class="p">],</span> <span class="s">"fish.summary"</span><span class="p">)</span>
+<span class="n">summarize</span><span class="p">([</span> <span class="p">[</span><span class="s">"cat.mammals"</span><span class="p">,</span> <span class="s">"mammals.date_of_birth"</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">"dog.mammals"</span><span class="p">,</span> <span class="s">"mammals.date_of_birth"</span><span class="p">]</span> <span class="p">],</span> <span class="s">"mammals.summary"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Example of</strong> <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>add_inputs</em></a></p>
+<blockquote>
+<div><p>using <tt class="docutils literal"><span class="pre">inputs(...)</span></tt> will summarise only the dates of births for each species group:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">animal_files</span> <span class="o">=</span> <span class="s">"tuna.fish"</span><span class="p">,</span> <span class="s">"shark.fish"</span><span class="p">,</span> <span class="s">"dog.mammals"</span><span class="p">,</span> <span class="s">"cat.mammals"</span>
+<span class="c"># summarise by file suffix:</span>
+<span class="nd">@collate</span><span class="p">(</span><span class="n">animal_files</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r".+\.(.+)$"</span><span class="p">),</span> <span class="n">inputs</span><span class="p">(</span><span class="s">r"\1.date_of_birth"</span><span class="p">),</span> <span class="s">r'\1.summary'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">summarize</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">summary_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This results in the following equivalent function calls:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">summarize</span><span class="p">([</span><span class="s">"fish.date_of_birth"</span> <span class="p">],</span> <span class="s">"fish.summary"</span><span class="p">)</span>
+<span class="n">summarize</span><span class="p">([</span><span class="s">"mammals.date_of_birth"</span><span class="p">],</span> <span class="s">"mammals.summary"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-collate-ex-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-ex-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first last">is a python regular expression string, which must be wrapped in
+a <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicator object
+See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-ex-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-ex-input-pattern-or-glob">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>input_pattern</em></dt>
+<dd><p class="first">Specifies the resulting input(s) to each job.
+Must be wrapped in an <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> or an <a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>inputs</em></a> indicator object.</p>
+<p>Can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><p class="first last">Strings will be subject to substitution.
+File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.
+E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-ex-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-collate-ex-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Any extra parameters are passed verbatim to the task function</p>
+</dd>
+</dl>
+</li>
+</ul>
+<ol class="arabic simple">
+<li><em>outputs</em> and optional extra parameters are passed to the functions after string
+substitution in any strings. Non-string values are passed through unchanged.</li>
+<li>Each collate job consists of input files which are aggregated by string substitution
+to a single set of output / extra parameter matches</li>
+</ol>
+</div></blockquote>
+<p>See <a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a> for more straightforward ways to use collate.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@collate with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a><ul>
+<li><a class="reference internal" href="#collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters"><em>@collate</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">regex</span></tt><em>(</em><cite>matching_regex</cite><em>)</em> | <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatter</c [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="collate.html"
+ title="previous chapter">@collate</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="graphviz.html"
+ title="next chapter">@graphviz</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/collate_ex.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="#">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="graphviz.html" title="@graphviz"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="collate.html" title="@collate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/combinations.html b/doc/_build/html/decorators/combinations.html
new file mode 100644
index 0000000..fa340eb
--- /dev/null
+++ b/doc/_build/html/decorators/combinations.html
@@ -0,0 +1,351 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@combinations — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@combinations_with_replacement" href="combinations_with_replacement.html" />
+ <link rel="prev" title="@permutations" href="permutations.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="combinations_with_replacement.html" title="@combinations_with_replacement"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="permutations.html" title="@permutations"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-combinations"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="combinations">
+<h1>@combinations<a class="headerlink" href="#combinations" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="combinations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters">
+<h2><em>@combinations</em> ( <a class="reference internal" href="#decorators-combinations-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class="reference internal" href="#decorators-combinations-matching-formatter"><cite>matching_formatter</cite></a><em>)</em>, <a class="reference internal" href="#decorators-combinations-output-pattern"><cite>output_pattern< [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Generates the <strong>combinations</strong>, between all the elements of a set of <strong>Input</strong> (e.g. <strong>A B C D</strong>),
+i.e. r-length tuples of <em>input</em> elements with no repeated elements (<strong>A A</strong>)
+and where order of the tuples is irrelevant (either <strong>A B</strong> or <strong>B A</strong>, not both).</p>
+<p>The effect is analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.combinations">itertools</a>
+function of the same name:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">combinations</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># combinations('ABCD', 3) --> ABC ABD ACD BCD</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">combinations</span><span class="p">(</span><span class="s">"ABCD"</span><span class="p">,</span> <span class="mi">3</span><span class="p">)]</span>
+<span class="go">['ABC', 'ABD', 'ACD', 'BCD']</span>
+</pre></div>
+</div>
+<p>Only out of date tasks (comparing input and output files) will be run</p>
+<p>Output file names and strings in the extra parameters
+are determined from <a class="reference internal" href="#decorators-combinations-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+<a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a>.</p>
+<p>The replacement strings require an extra level of indirection to refer to
+parsed components:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first level refers to which <em>set</em> in each tuple of inputs.</li>
+<li>The second level refers to which input file in any particular <em>set</em> of inputs.</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><p>Calculates the <strong>@combinations</strong> of <strong>A,B,C,D</strong> files</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @combinations</span>
+</span><span class="nd">@combinations</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 3 at a time</span>
+</span> <span class="mi">3</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}_vs_"</span>
+ <span class="s">"{basename[2][1]}.combinations"</span><span class="p">,</span>
+<span class="hll">
+</span> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+ <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+<span class="hll">
+</span><span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+ <span class="s">"{basename[2][0]}"</span><span class="p">,</span> <span class="c"># 3rd</span>
+ <span class="p">])</span>
+<span class="k">def</span> <span class="nf">combinations_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">A - B - C</span>
+<span class="go">A - B - D</span>
+<span class="go">A - C - D</span>
+<span class="go">B - C - D</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-combinations-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-combinations-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-combinations-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s) after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-combinations-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Optional extra parameters are passed to the functions after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@combinations</a><ul>
+<li><a class="reference internal" href="#combinations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters"><em>@combinations</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatter</cite><em>)</em>, <cite>output_pattern</cite>, [<cite>extra_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="permutations.html"
+ title="previous chapter">@permutations</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="combinations_with_replacement.html"
+ title="next chapter">@combinations_with_replacement</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/combinations.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="#">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="combinations_with_replacement.html" title="@combinations_with_replacement"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="permutations.html" title="@permutations"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/combinations_with_replacement.html b/doc/_build/html/decorators/combinations_with_replacement.html
new file mode 100644
index 0000000..406b35f
--- /dev/null
+++ b/doc/_build/html/decorators/combinations_with_replacement.html
@@ -0,0 +1,355 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@combinations_with_replacement — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="Generating parameters on the fly for @files" href="files_ex.html" />
+ <link rel="prev" title="@combinations" href="combinations.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="files_ex.html" title="Generating parameters on the fly for @files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="combinations.html" title="@combinations"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-combinations-with-replacement"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="combinations-with-replacement">
+<h1>@combinations_with_replacement<a class="headerlink" href="#combinations-with-replacement" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="combinations-with-replacement-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters">
+<h2><em>@combinations_with_replacement</em> ( <a class="reference internal" href="#decorators-combinations-with-replacement-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class="reference internal" href="#decorators-combinations-with-replacement-matching-formatter"><cite>matching_formatter</cite></a><em>)</em>, <a class="reference internal" href="#decorators [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Generates the <strong>combinations_with_replacement</strong>, between all the elements of a set of <strong>Input</strong> (e.g. <strong>A B C D</strong>),
+i.e. r-length tuples of <em>input</em> elements included repeated elements (<strong>A A</strong>)
+and where order of the tuples is irrelevant (either <strong>A B</strong> or <strong>B A</strong>, not both).</p>
+<p>The effect is analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement">itertools</a>
+function of the same name:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">combinations_with_replacement</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># combinations_with_replacement('ABCD', 2) --> AA AB AC AD BB BC BD CC CD DD</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">combinations_with_replacement</span><span class="p">(</span><span class="s">'ABCD'</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
+<span class="go">['AA', 'AB', 'AC', 'AD', 'BB', 'BC', 'BD', 'CC', 'CD', 'DD']</span>
+</pre></div>
+</div>
+<p>Only out of date tasks (comparing input and output files) will be run</p>
+<p>Output file names and strings in the extra parameters
+are determined from <a class="reference internal" href="#decorators-combinations-with-replacement-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+<a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a>.</p>
+<p>The replacement strings require an extra level of indirection to refer to
+parsed components:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first level refers to which <em>set</em> in each tuple of inputs.</li>
+<li>The second level refers to which input file in any particular <em>set</em> of inputs.</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><p>Calculates the <strong>@combinations_with_replacement</strong> of <strong>A,B,C,D</strong> files</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @combinations_with_replacement</span>
+</span><span class="nd">@combinations_with_replacement</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 2 at a time</span>
+</span> <span class="mi">2</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}.combinations_with_replacement"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2rd</span>
+</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">combinations_with_replacement_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">A - A</span>
+<span class="go">A - B</span>
+<span class="go">A - C</span>
+<span class="go">A - D</span>
+<span class="go">B - B</span>
+<span class="go">B - C</span>
+<span class="go">B - D</span>
+<span class="go">C - C</span>
+<span class="go">C - D</span>
+<span class="go">D - D</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-combinations-with-replacement-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-combinations-with-replacement-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-combinations-with-replacement-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s) after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-combinations-with-replacement-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Optional extra parameters are passed to the functions after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@combinations_with_replacement</a><ul>
+<li><a class="reference internal" href="#combinations-with-replacement-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters"><em>@combinations_with_replacement</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatter</cite><em>)</em>, <cite>output_pattern</cite>, [<cite>extra_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="combinations.html"
+ title="previous chapter">@combinations</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="files_ex.html"
+ title="next chapter">Generating parameters on the fly for @files</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/combinations_with_replacement.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="#">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="files_ex.html" title="Generating parameters on the fly for @files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="combinations.html" title="@combinations"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/decorators.html b/doc/_build/html/decorators/decorators.html
new file mode 100644
index 0000000..f58a5d5
--- /dev/null
+++ b/doc/_build/html/decorators/decorators.html
@@ -0,0 +1,750 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus Decorators — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="Indicator Objects" href="indicator_objects.html" />
+ <link rel="prev" title="Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?"" href="../examples/paired_end_data.py.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="indicator_objects.html" title="Indicator Objects"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../examples/paired_end_data.py.html" title="Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?""
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-decorators">
+<h1>Ruffus Decorators<a class="headerlink" href="#ruffus-decorators" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<p class="last"><a class="reference internal" href="indicator_objects.html#decorators-indicator-objects"><em>Indicator objects</em></a></p>
+</div>
+<div class="section" id="core">
+<span id="decorators"></span><h2><em>Core</em><a class="headerlink" href="#core" title="Permalink to this headline">¶</a></h2>
+<table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+<col width="0%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Decorator</th>
+<th class="head">Examples</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><p class="first"><strong>@originate</strong> (<a class="reference internal" href="originate.html#decorators-originate"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/originate.html#new-manual-originate"><em>Manual</em></a>)</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Creates (originates) a set of starting file without dependencies from scratch (<em>ex nihilo</em>!)</li>
+<li>Only called to create files which do not exist.</li>
+<li>Invoked onces (a job created) per item in the <tt class="docutils literal"><span class="pre">output_files</span></tt> list.</li>
+</ul>
+</div></blockquote>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="originate.html#decorators-originate"><em>@originate</em></a> ( <tt class="docutils literal"><span class="pre">output_files</span></tt>, [<tt class="docutils literal"><span class="pre">extra_parameters</span></tt>,...] )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@split</strong> (<a class="reference internal" href="split.html#decorators-split"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/split.html#new-manual-split"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Splits a single input into multiple output</li>
+<li>Globs in <tt class="docutils literal"><span class="pre">output</span></tt> can specify an indeterminate number of files.</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="split.html#decorators-split"><em>@split</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <tt class="docutils literal"><span class="pre">output_files</span></tt>, [<tt class="docutils literal"><span class="pre">extra_parameters</span></tt>,...] )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@transform</strong> (<a class="reference internal" href="transform.html#decorators-transform"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/transform.html#new-manual-transform"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Applies the task function to transform input data to output.</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="transform_ex.html#decorators-transform-suffix-string"><em>suffix</em></a><em>(</em><tt class="docutils literal"><span class="pre">suffix_string</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt>, [<tt class="docutils litera [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="transform_ex.html#decorators-transform-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt>, [<tt class="docutils litera [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="transform_ex.html#decorators-transform-matching-formatter"><em>formatter</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt>, [<tt class="docutil [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@merge</strong> (<a class="reference internal" href="merge.html#decorators-merge"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/merge.html#new-manual-merge"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Merges multiple input files into a single output.</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="merge.html#decorators-merge"><em>@merge</em></a> (<tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <tt class="docutils literal"><span class="pre">output</span></tt>, [<tt class="docutils literal"><span class="pre">extra_parameters</span></tt>,...] )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+</tbody>
+</table>
+</div>
+<div class="section" id="combinatorics">
+<span id="decorators-combinatorics"></span><h2><em>Combinatorics</em><a class="headerlink" href="#combinatorics" title="Permalink to this headline">¶</a></h2>
+<table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+<col width="0%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Decorator</th>
+<th class="head">Examples</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><p class="first"><strong>@product</strong> (<a class="reference internal" href="product.html#decorators-product"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/combinatorics.html#new-manual-product"><em>Manual</em></a>)</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Generates the <strong>product</strong>, i.e. all vs all comparisons, between sets of input files.</li>
+</ul>
+</div></blockquote>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="product.html#decorators-product"><em>@product</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="product.html#decorators-product-matching-formatter"><em>formatter</em></a> <em>([</em> <tt class="docutils literal"><span class="pre">regex_pattern</span></tt> <em>])</em> ,*[* <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="referenc [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@permutations</strong> (<a class="reference internal" href="permutations.html#decorators-permutations"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/combinatorics.html#new-manual-permutations"><em>Manual</em></a>)</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Generates the <strong>permutations</strong>, between all the elements of a set of <strong>Input</strong></li>
+<li>Analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.permutations">itertools.permutations</a></li>
+<li>permutations(‘ABCD’, 2) –> AB AC AD BA BC BD CA CB CD DA DB DC</li>
+</ul>
+</div></blockquote>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="permutations.html#decorators-permutations"><em>@permutations</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="product.html#decorators-product-matching-formatter"><em>formatter</em></a> <em>([</em> <tt class="docutils literal"><span class="pre">regex_pattern</span></tt> <em>])</em>, <tt class="docutils literal"><span class="pre">tuple_size</span></tt>, <tt class="docut [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@combinations</strong> (<a class="reference internal" href="combinations.html#decorators-combinations"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/combinatorics.html#new-manual-combinations"><em>Manual</em></a>)</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Generates the <strong>permutations</strong>, between all the elements of a set of <strong>Input</strong></li>
+<li>Analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.permutations">itertools.combinations</a></li>
+<li>combinations(‘ABCD’, 3) –> ABC ABD ACD BCD</li>
+<li>Generates the <strong>combinations</strong>, between all the elements of a set of <strong>Input</strong>:
+i.e. r-length tuples of <em>input</em> elements with no repeated elements (<strong>A A</strong>)
+and where order of the tuples is irrelevant (either <strong>A B</strong> or <strong>B A</strong>, not both).</li>
+</ul>
+</div></blockquote>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="permutations.html#decorators-permutations"><em>@combinations</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="product.html#decorators-product-matching-formatter"><em>formatter</em></a> <em>([</em> <tt class="docutils literal"><span class="pre">regex_pattern</span></tt> <em>])</em>, <tt class="docutils literal"><span class="pre">tuple_size</span></tt>, <tt class="docut [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@combinations_with_replacement</strong> (<a class="reference internal" href="combinations_with_replacement.html#decorators-combinations-with-replacement"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/combinatorics.html#new-manual-combinations-with-replacement"><em>Manual</em></a>)</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Generates the <strong>permutations</strong>, between all the elements of a set of <strong>Input</strong></li>
+<li>Analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.permutations">itertools.permutations</a></li>
+<li>combinations(‘ABCD’, 3) –> ABC ABD ACD BCD</li>
+<li>Generates the <strong>combinations_with_replacement</strong>, between all the elements of a set of <strong>Input</strong>:
+i.e. r-length tuples of <em>input</em> elements with no repeated elements (<strong>A A</strong>)
+and where order of the tuples is irrelevant (either <strong>A B</strong> or <strong>B A</strong>, not both).</li>
+</ul>
+</div></blockquote>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="permutations.html#decorators-permutations"><em>@combinations_with_replacement</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="product.html#decorators-product-matching-formatter"><em>formatter</em></a> <em>([</em> <tt class="docutils literal"><span class="pre">regex_pattern</span></tt> <em>])</em>, <tt class="docutils literal"><span class="pre">tuple_size</span></tt>, [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+</tbody>
+</table>
+</div>
+<div class="section" id="advanced">
+<h2><em>Advanced</em><a class="headerlink" href="#advanced" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+<col width="0%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Decorator</th>
+<th class="head">Examples</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><strong>@subdivide</strong> (<a class="reference internal" href="subdivide.html#decorators-subdivide"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/subdivide_collate.html#new-manual-subdivide"><em>Manual</em></a>)
+- Subdivides a set of <em>Inputs</em> each further into multiple <em>Outputs</em>.
+- The number of files in each <em>Output</em> can be set at runtime by the use of globs.
+- <strong>Many to Even More</strong> operator.
+- The use of <strong>split</strong> is a synonym for subdivide is deprecated.</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="subdivide.html#decorators-subdivide-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, [ <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> | <a [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="subdivide.html#decorators-subdivide-matching-formatter"><em>formatter</em></a><em>(</em>[<tt class="docutils literal"><span class="pre">regex_pattern</span></tt>] <em>)</em>, [ <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@transform</strong> (<a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/inputs.html#new-manual-inputs"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Infers input as well as output from regular expression substitutions</li>
+<li>Useful for adding additional file dependencies</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="transform_ex.html#decorators-transform-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, [ <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em>< [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@transform</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="transform_ex.html#decorators-transform-matching-formatter"><em>formatter</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, [ <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inpu [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@collate</strong> (<a class="reference internal" href="collate.html#decorators-collate"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/subdivide_collate.html#new-manual-collate"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Groups multiple input files using regular expression matching</li>
+<li>Input resulting in the same output after substitution will be collated together.</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a> (<tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="collate.html#decorators-collate-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt>, [<tt class="docutils literal"><span class [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="collate_ex.html#decorators-collate-ex"><em>@collate</em></a> (<tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="collate_ex.html#decorators-collate-ex-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> | <a c [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a> (<tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="collate.html#decorators-collate-matching-formatter"><em>formatter</em></a><em>(</em><tt class="docutils literal"><span class="pre">formatter_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt>, [<tt class="docutils literal" [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="collate_ex.html#decorators-collate-ex"><em>@collate</em></a> (<tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="collate_ex.html#decorators-collate-ex-matching-formatter"><em>formatter</em></a><em>(</em><tt class="docutils literal"><span class="pre">formatter_pattern</span></tt><em>)</em>, <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em [...]
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@follows</strong> (<a class="reference internal" href="follows.html#decorators-follows"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/transform_in_parallel.html#new-manual-follows"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Indicates task dependency</li>
+<li>optional <a class="reference internal" href="follows.html#decorators-follows-directory-name"><em>mkdir</em></a> prerequisite (<a class="reference internal" href="../tutorials/new_tutorial/transform_in_parallel.html#new-manual-follows-mkdir"><em>see Manual</em></a>)</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="follows.html#decorators-follows"><em>@follows</em></a> ( <tt class="docutils literal"><span class="pre">task1</span></tt>, <tt class="docutils literal"><span class="pre">'task2'</span></tt> ))</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="follows.html#decorators-follows"><em>@follows</em></a> ( <tt class="docutils literal"><span class="pre">task1</span></tt>, <a class="reference internal" href="follows.html#decorators-follows-directory-name"><em>mkdir</em></a>( <tt class="docutils literal"><span class="pre">'my/directory/'</span></tt> ))</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@posttask</strong> (<a class="reference internal" href="posttask.html#decorators-posttask"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/posttask.html#new-manual-posttask"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Calls function after task completes</li>
+<li>Optional <a class="reference internal" href="posttask.html#decorators-posttask-file-name"><em>touch_file</em></a> indicator (<a class="reference internal" href="../tutorials/new_tutorial/posttask.html#new-manual-posttask-touch-file"><em>Manual</em></a>)</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="posttask.html#decorators-posttask"><em>@posttask</em></a> ( <tt class="docutils literal"><span class="pre">signal_task_completion_function</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="posttask.html#decorators-posttask"><em>@posttask</em></a> (<a class="reference internal" href="indicator_objects.html#decorators-touch-file"><em>touch_file</em></a>( <tt class="docutils literal"><span class="pre">'task1.completed'</span></tt> ))</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@active_if</strong> (<a class="reference internal" href="active_if.html#decorators-active-if"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/active_if.html#new-manual-active-if"><em>Manual</em></a>)</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Switches tasks on and off at run time depending on its parameters</li>
+<li>Evaluated each time <a class="reference internal" href="../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(...)</em></a>, <a class="reference internal" href="../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> or <a class="reference internal" href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a> is called.</li>
+<li>Dormant tasks behave as if they are up to date and have no output.</li>
+</ul>
+</div></blockquote>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="active_if.html#decorators-active-if"><em>@active_if</em></a> ( <tt class="docutils literal"><span class="pre">on_or_off1,</span> <span class="pre">[on_or_off2,</span> <span class="pre">...]</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@jobs_limit</strong> (<a class="reference internal" href="jobs_limit.html#decorators-jobs-limit"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/multiprocessing.html#new-manual-jobs-limit"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Limits the amount of multiprocessing for the specified task</li>
+<li>Ensures that fewer than N jobs for this task are run in parallel</li>
+<li>Overrides <tt class="docutils literal"><span class="pre">multiprocess</span></tt> parameter in <a class="reference internal" href="../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(...)</em></a></li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a> ( <tt class="docutils literal"><span class="pre">NUMBER_OF_JOBS_RUNNING_CONCURRENTLY</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@mkdir</strong> (<a class="reference internal" href="mkdir.html#decorators-mkdir"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/mkdir.html#new-manual-mkdir"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Generates paths for <a class="reference external" href="http://docs.python.org/2/library/os.html#os.makedirs">os.makedirs</a></li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="mkdir.html#decorators-mkdir"><em>@mkdir</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="mkdir.html#decorators-mkdir-suffix-string"><em>suffix</em></a><em>(</em><tt class="docutils literal"><span class="pre">suffix_string</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="mkdir.html#decorators-mkdir"><em>@mkdir</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="mkdir.html#decorators-mkdir-matching-regex"><em>regex</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="mkdir.html#decorators-mkdir"><em>@mkdir</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <a class="reference internal" href="mkdir.html#decorators-mkdir-matching-formatter"><em>formatter</em></a><em>(</em><tt class="docutils literal"><span class="pre">regex_pattern</span></tt><em>)</em>, <tt class="docutils literal"><span class="pre">output_pattern</span></tt>)</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@graphviz</strong> (<a class="reference internal" href="graphviz.html#decorators-graphviz"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/pipeline_printout_graph.html#new-manual-pipeline-printout-graph"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Customise the graphic for each task in printed flowcharts</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="graphviz.html#decorators-graphviz"><em>@graphviz</em></a> ( <tt class="docutils literal"><span class="pre">graphviz_parameter</span> <span class="pre">=</span> <span class="pre">XXX</span></tt>, <tt class="docutils literal"><span class="pre">[graphviz_parameter2</span> <span class="pre">=</span> <span class="pre">YYY</span> <span class="pre">...]</span></tt>)</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="esoteric">
+<h2><em>Esoteric!</em><a class="headerlink" href="#esoteric" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+<col width="0%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Decorator</th>
+<th class="head">Examples</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><p class="first"><strong>@files</strong> (<a class="reference internal" href="files.html#decorators-files"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/deprecated_files.html#new-manual-deprecated-files"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>I/O parameters</li>
+<li>skips up-to-date jobs</li>
+<li>Should use <a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a> etc instead</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="files.html#decorators-files"><em>@files</em></a>( <tt class="docutils literal"><span class="pre">parameter_list</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="files.html#decorators-files"><em>@files</em></a>( <tt class="docutils literal"><span class="pre">parameter_generating_function</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="files.html#decorators-files"><em>@files</em></a> ( <tt class="docutils literal"><span class="pre">input_file</span></tt>, <tt class="docutils literal"><span class="pre">output_file</span></tt>, <tt class="docutils literal"><span class="pre">other_params</span></tt>, ... )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><p class="first"><strong>@parallel</strong> (<a class="reference internal" href="parallel.html#decorators-parallel"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/parallel.html#new-manual-deprecated-parallel"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>By default, does not check if jobs are up to date</li>
+<li>Best used in conjuction with <a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a></li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="parallel.html#decorators-parallel"><em>@parallel</em></a> ( <tt class="docutils literal"><span class="pre">parameter_list</span></tt> ) (<a class="reference internal" href="../tutorials/new_tutorial/parallel.html#new-manual-deprecated-parallel"><em>see Manual</em></a>)</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="parallel.html#decorators-parallel"><em>@parallel</em></a> ( <tt class="docutils literal"><span class="pre">parameter_generating_function</span></tt> ) (<a class="reference internal" href="../tutorials/new_tutorial/onthefly.html#new-manual-on-the-fly"><em>see Manual</em></a>)</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><p class="first"><strong>@check_if_uptodate</strong> (<a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>Summary</em></a> / <a class="reference internal" href="../tutorials/new_tutorial/check_if_uptodate.html#new-manual-check-if-uptodate"><em>Manual</em></a>)</p>
+<ul class="last simple">
+<li>Custom function to determine if jobs need to be run</li>
+</ul>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a> ( <tt class="docutils literal"><span class="pre">is_task_up_to_date_function</span></tt> )</dt>
+<dd><p class="first last"></p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><div class="first last admonition tip">
+<p class="first admonition-title">Tip</p>
+<dl class="last docutils">
+<dt>The use of this overly complicated function is discouraged.</dt>
+<dd><p class="first"><strong>@files_re</strong> (<a class="reference internal" href="files_re.html#decorators-files-re"><em>Summary</em></a>)</p>
+<ul class="last simple">
+<li>I/O file names via regular
+expressions</li>
+<li>start from lists of file names
+or <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> results</li>
+<li>skips up-to-date jobs</li>
+</ul>
+</dd>
+</dl>
+</div>
+</td>
+<td><ul class="first last">
+<li><dl class="first docutils">
+<dt><a class="reference internal" href="files_re.html#decorators-files-re"><em>@files_re</em></a> ( <tt class="docutils literal"><span class="pre">tasks_or_file_names</span></tt>, <tt class="docutils literal"><span class="pre">matching_regex</span></tt>, [<tt class="docutils literal"><span class="pre">input_pattern</span></tt>,] <tt class="docutils literal"><span class="pre">output_pattern</span></tt>, <tt class="docutils literal"><span class="pre">...</span></tt> )</dt>
+<dd><p class="first last"><tt class="docutils literal"><span class="pre">input_pattern</span></tt>/<tt class="docutils literal"><span class="pre">output_pattern</span></tt> are regex patterns
+used to create input/output file names from the starting
+list of either glob_str or file names</p>
+</dd>
+</dl>
+</li>
+</ul>
+</td>
+<td> </td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Ruffus Decorators</a><ul>
+<li><a class="reference internal" href="#core"><em>Core</em></a></li>
+<li><a class="reference internal" href="#combinatorics"><em>Combinatorics</em></a></li>
+<li><a class="reference internal" href="#advanced"><em>Advanced</em></a></li>
+<li><a class="reference internal" href="#esoteric"><em>Esoteric!</em></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../examples/paired_end_data.py.html"
+ title="previous chapter">Example code for <tt class="docutils literal"><span class="pre">FAQ</span> <span class="pre">Good</span> <span class="pre">practices:</span> <span class="pre">"What</span> <span class="pre">is</span> <span class="pre">the</span> <span class="pre">best</span> <span class="pre">way</span> <span class="pre">of</span> <span class="pre">handling</span> <span class="pre">data</span> <span class="pre">in</span> <span class="pre">file</span> [...]
+ <h4>Next topic</h4>
+ <p class="topless"><a href="indicator_objects.html"
+ title="next chapter">Indicator Objects</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/decorators.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="##core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="##advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="##combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="##esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="indicator_objects.html" title="Indicator Objects"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../examples/paired_end_data.py.html" title="Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?""
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/files.html b/doc/_build/html/decorators/files.html
new file mode 100644
index 0000000..84c77b5
--- /dev/null
+++ b/doc/_build/html/decorators/files.html
@@ -0,0 +1,364 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@files_re" href="files_re.html" />
+ <link rel="prev" title="@parallel" href="parallel.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="files_re.html" title="@files_re"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="parallel.html" title="@parallel"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-files"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="files">
+<h1>@files<a class="headerlink" href="#files" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="files-input1-output1-extra-parameters1">
+<h2><em>@files</em> (<a class="reference internal" href="#decorators-files-input1"><cite>input1</cite></a>, <a class="reference internal" href="#decorators-files-output1"><cite>output1</cite></a>, [<a class="reference internal" href="#decorators-files-extra-parameters1"><cite>extra_parameters1</cite></a>, ...])<a class="headerlink" href="#files-input1-output1-extra-parameters1" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="files-for-single-jobs">
+<h3>@files for single jobs<a class="headerlink" href="#files-for-single-jobs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Provides parameters to run a task.</p>
+<p>The first two parameters in each set represent the input and output which are
+used to see if the job is out of date and needs to be (re-)run.</p>
+<p class="last">By default, out of date checking uses input/output file timestamps.
+(On some file systems, timestamps have a resolution in seconds.)
+See <a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate()</em></a> for alternatives.</p>
+</dd>
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@files</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">'a.2'</span><span class="p">,</span> <span class="s">'A file'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">transform_files</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
+ <span class="k">pass</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">transform_files</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+<dt>If <tt class="docutils literal"><span class="pre">a.2</span></tt> is missing or was created before <tt class="docutils literal"><span class="pre">a.1</span></tt>, then the following will be called:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">transform_files</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">'a.2'</span><span class="p">,</span> <span class="s">'A file'</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-files-input1">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>input</em></dt>
+<dd><p class="first last">Input file names</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-output1">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output</em></dt>
+<dd><p class="first last">Output file names</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-extra-parameters1">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">optional <tt class="docutils literal"><span class="pre">extra_parameters</span></tt> are passed verbatim to each job.</p>
+</dd>
+</dl>
+</li>
+</ul>
+<dl class="docutils">
+<dt><strong>Checking if jobs are up to date:</strong></dt>
+<dd><p class="first">Strings in <tt class="docutils literal"><span class="pre">input</span></tt> and <tt class="docutils literal"><span class="pre">output</span></tt> (including in nested sequences) are interpreted as file names and
+used to check if jobs are up-to-date.</p>
+<p class="last">See <a class="reference internal" href="#decorators-files-check-up-to-date"><em>above</em></a> for more details</p>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="files-input-output-extra-parameters">
+<h2><em>@files</em> ( <em>((</em> <a class="reference internal" href="#decorators-files-input"><cite>input</cite></a>, <a class="reference internal" href="#decorators-files-output"><cite>output</cite></a>, [<a class="reference internal" href="#decorators-files-extra-parameters"><cite>extra_parameters</cite></a>,...] <em>), (...), ...)</em> )<a class="headerlink" href="#files-input-output-extra-parameters" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="files-in-parallel">
+<h3>@files in parallel<a class="headerlink" href="#files-in-parallel" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Passes each set of parameters to separate jobs which can run in parallel</p>
+<p>The first two parameters in each set represent the input and output which are
+used to see if the job is out of date and needs to be (re-)run.</p>
+<p>By default, out of date checking uses input/output file timestamps.
+(On some file systems, timestamps have a resolution in seconds.)
+See <a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate()</em></a> for alternatives.</p>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'a.1'</span><span class="p">,</span> <span class="s">'a.2'</span><span class="p">,</span> <span class="s">'A file'</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'b.1'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">,</span> <span class="s">'B file'</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_io_task</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
+ <span class="k">pass</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_io_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+<dt>is the equivalent of calling:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">parallel_io_task</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">'a.2'</span><span class="p">,</span> <span class="s">'A file'</span><span class="p">)</span>
+<span class="n">parallel_io_task</span><span class="p">(</span><span class="s">'b.1'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">,</span> <span class="s">'B file'</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-files-input">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>input</em></dt>
+<dd><p class="first last">Input file names</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-output">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output</em></dt>
+<dd><p class="first last">Output file names</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">optional <tt class="docutils literal"><span class="pre">extra_parameters</span></tt> are passed verbatim to each job.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-check-up-to-date">
+<div><dl class="docutils">
+<dt><strong>Checking if jobs are up to date:</strong></dt>
+<dd><ol class="first last arabic simple">
+<li>Strings in <tt class="docutils literal"><span class="pre">input</span></tt> and <tt class="docutils literal"><span class="pre">output</span></tt> (including in nested sequences) are interpreted as file names and
+used to check if jobs are up-to-date.</li>
+<li>In the absence of input files (e.g. <tt class="docutils literal"><span class="pre">input</span> <span class="pre">==</span> <span class="pre">None</span></tt>), the job will run if any output file is missing.</li>
+<li>In the absence of output files (e.g. <tt class="docutils literal"><span class="pre">output</span> <span class="pre">==</span> <span class="pre">None</span></tt>), the job will always run.</li>
+<li>If any of the output files is missing, the job will run.</li>
+<li>If any of the input files is missing when the job is run, a
+<tt class="docutils literal"><span class="pre">MissingInputFileError</span></tt> exception will be raised.</li>
+</ol>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@files</a><ul>
+<li><a class="reference internal" href="#files-input1-output1-extra-parameters1"><em>@files</em> (<cite>input1</cite>, <cite>output1</cite>, [<cite>extra_parameters1</cite>, ...])</a><ul>
+<li><a class="reference internal" href="#files-for-single-jobs">@files for single jobs</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#files-input-output-extra-parameters"><em>@files</em> ( <em>((</em> <cite>input</cite>, <cite>output</cite>, [<cite>extra_parameters</cite>,...] <em>), (...), ...)</em> )</a><ul>
+<li><a class="reference internal" href="#files-in-parallel">@files in parallel</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="parallel.html"
+ title="previous chapter">@parallel</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="files_re.html"
+ title="next chapter">@files_re</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/files.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="files_re.html" title="@files_re"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="parallel.html" title="@parallel"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/files_ex.html b/doc/_build/html/decorators/files_ex.html
new file mode 100644
index 0000000..4449d1a
--- /dev/null
+++ b/doc/_build/html/decorators/files_ex.html
@@ -0,0 +1,264 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Generating parameters on the fly for @files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@check_if_uptodate" href="check_if_uptodate.html" />
+ <link rel="prev" title="@combinations_with_replacement" href="combinations_with_replacement.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="@check_if_uptodate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="combinations_with_replacement.html" title="@combinations_with_replacement"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-files-on-the-fly"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="generating-parameters-on-the-fly-for-files">
+<h1>Generating parameters on the fly for @files<a class="headerlink" href="#generating-parameters-on-the-fly-for-files" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="files-custom-function">
+<h2><em>@files</em> (<a class="reference internal" href="#decorators-files-custom-function"><cite>custom_function</cite></a>)<a class="headerlink" href="#files-custom-function" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Uses a custom function to generate sets of parameters to separate jobs which can run in parallel.</p>
+<p>The first two parameters in each set represent the input and output which are
+used to see if the job is out of date and needs to be (re-)run.</p>
+<p>By default, out of date checking uses input/output file timestamps.
+(On some file systems, timestamps have a resolution in seconds.)
+See <a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate()</em></a> for alternatives.</p>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="k">def</span> <span class="nf">generate_parameters_on_the_fly</span><span class="p">():</span>
+ <span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'input_file1'</span><span class="p">,</span> <span class="s">'output_file1'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'input_file2'</span><span class="p">,</span> <span class="s">'output_file2'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'input_file3'</span><span class="p">,</span> <span class="s">'output_file3'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+ <span class="k">for</span> <span class="n">job_parameters</span> <span class="ow">in</span> <span class="n">parameters</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">job_parameters</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">generate_parameters_on_the_fly</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_io_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">param1</span><span class="p">,</span> <span class="n">param2</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+<dt>is the equivalent of calling:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">parallel_io_task</span><span class="p">(</span><span class="s">'input_file1'</span><span class="p">,</span> <span class="s">'output_file1'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
+<span class="n">parallel_io_task</span><span class="p">(</span><span class="s">'input_file2'</span><span class="p">,</span> <span class="s">'output_file2'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
+<span class="n">parallel_io_task</span><span class="p">(</span><span class="s">'input_file3'</span><span class="p">,</span> <span class="s">'output_file3'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-files-custom-function">
+<div><blockquote>
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>custom_function</em>:</dt>
+<dd><p class="first last">Generator function which yields each time a complete set of parameters for one job</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Checking if jobs are up to date:</strong></dt>
+<dd><p class="first">Strings in <tt class="docutils literal"><span class="pre">input</span></tt> and <tt class="docutils literal"><span class="pre">output</span></tt> (including in nested sequences) are interpreted as file names and
+used to check if jobs are up-to-date.</p>
+<p class="last">See <a class="reference internal" href="files.html#decorators-files-check-up-to-date"><em>above</em></a> for more details</p>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Generating parameters on the fly for @files</a><ul>
+<li><a class="reference internal" href="#files-custom-function"><em>@files</em> (<cite>custom_function</cite>)</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="combinations_with_replacement.html"
+ title="previous chapter">@combinations_with_replacement</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="check_if_uptodate.html"
+ title="next chapter">@check_if_uptodate</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/files_ex.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="#">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="@check_if_uptodate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="combinations_with_replacement.html" title="@combinations_with_replacement"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/files_re.html b/doc/_build/html/decorators/files_re.html
new file mode 100644
index 0000000..17ee9a5
--- /dev/null
+++ b/doc/_build/html/decorators/files_re.html
@@ -0,0 +1,344 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@files_re — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="ruffus.Task" href="../task.html" />
+ <link rel="prev" title="@files" href="files.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../task.html" title="ruffus.Task"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="files.html" title="@files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-files-re"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="files-re">
+<h1>@files_re<a class="headerlink" href="#files-re" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="files-re-tasks-or-file-names-matching-regex-input-pattern-output-pattern-extra-parameters">
+<h2><em>@files_re</em> (<a class="reference internal" href="#decorators-files-re-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="#decorators-files-re-matching-regex"><cite>matching_regex</cite></a>, [<a class="reference internal" href="#decorators-files-re-input-pattern"><cite>input_pattern</cite></a>], <a class="reference internal" href="#decorators-files-re-output-pattern"><cite>output_pattern</cite></a>, [<a class="reference internal" [...]
+<div class="section" id="legacy-design-now-deprecated-we-suggest-using-transform-instead">
+<h3>Legacy design now deprecated. We suggest using <a class="reference internal" href="transform.html#decorators-transform"><em>@transform()</em></a> instead<a class="headerlink" href="#legacy-design-now-deprecated-we-suggest-using-transform-instead" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>All singing, all dancing decorator which can do everything that <a class="reference internal" href="merge.html#decorators-merge"><em>@merge()</em></a> and
+<a class="reference internal" href="transform.html#decorators-transform"><em>@transform()</em></a> can do.</p>
+<p>Applies the task function to transform data from input to output files.</p>
+<p>Output file names are determined from <a class="reference internal" href="#decorators-files-re-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of specified tasks, or a list of file names, using regular expression pattern substitutions.</p>
+<p>Only out of date tasks (comparing input and output files) will be run.</p>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="c">#</span>
+<span class="c"># convert all files ending in ".1" into files ending in ".2"</span>
+<span class="c">#</span>
+<span class="nd">@files_re</span><span class="p">(</span><span class="s">'*.1'</span><span class="p">,</span> <span class="s">'(.*).1'</span><span class="p">,</span> <span class="s">r'\1.2'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">transform_func</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> <span class="o">+</span> <span class="s">"</span><span cla [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">task_re</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+<dt>If the following files are present <tt class="docutils literal"><span class="pre">a.1</span></tt>, <tt class="docutils literal"><span class="pre">b.1</span></tt>, <tt class="docutils literal"><span class="pre">c.1</span></tt>, this will result in the following function calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">transform_func</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"a.2"</span><span class="p">)</span>
+<span class="n">transform_func</span><span class="p">(</span><span class="s">"b.1"</span><span class="p">,</span> <span class="s">"b.2"</span><span class="p">)</span>
+<span class="n">transform_func</span><span class="p">(</span><span class="s">"c.1"</span><span class="p">,</span> <span class="s">"c.2"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-files-re-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> .</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-re-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first">a python regular expression string.</p>
+<div class="last line-block">
+<div class="line">See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a> documentation for details of regular expression syntax</div>
+<div class="line">Each output file name is created using regular expression substitution with <a class="reference internal" href="#decorators-files-re-output-pattern"><cite>output_pattern</cite></a></div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-re-input-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>input_pattern</em></dt>
+<dd><p class="first last">Optionally specifies the resulting input file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-re-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-files-re-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt>[<em>extra_parameters, ...</em>]</dt>
+<dd><p class="first">Any extra parameters are passed to the task function.</p>
+<div class="line-block">
+<div class="line">Regular expression substitution is first applied to (even nested) string parameters.</div>
+<div class="line">Other data types are passed verbatim.</div>
+</div>
+<dl class="last docutils">
+<dt>For example:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="c">#</span>
+<span class="c"># convert all files ending in ".1" into files ending in ".2"</span>
+<span class="c">#</span>
+<span class="nd">@files_re</span><span class="p">(</span><span class="s">'*.1'</span><span class="p">,</span> <span class="s">'(.*).1'</span><span class="p">,</span> <span class="s">r'\1.2'</span><span class="p">,</span> <span class="p">[</span><span class="s">r'\1'</span><span class="p">,</span> <span class="mi">55</span><span class="p">],</span> <span class="mi">17</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">transform_func</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">extras</span><span class="p">,</span> <span class="n">extra3</span><span class="p">):</span>
+ <span class="n">extra1</span><span class="p">,</span> <span class="n">extra2</span> <span class="o">=</span> <span class="n">extras</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> <span class="o">+</span> <span class="s">"</span><span cla [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">transform_func</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+<dt>If the following files are present <tt class="docutils literal"><span class="pre">a.1</span></tt>, <tt class="docutils literal"><span class="pre">b.1</span></tt>, <tt class="docutils literal"><span class="pre">c.1</span></tt>, this will result in the following function calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">transform_func</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"a.2"</span><span class="p">,</span> <span class="p">[</span><span class="s">"a"</span><span class="p">,</span> <span class="mi">55</span><span class="p">],</span> <span class="mi">17</span><span class="p">)</span>
+<span class="n">transform_func</span><span class="p">(</span><span class="s">"b.1"</span><span class="p">,</span> <span class="s">"b.2"</span><span class="p">,</span> <span class="p">[</span><span class="s">"b"</span><span class="p">,</span> <span class="mi">55</span><span class="p">],</span> <span class="mi">17</span><span class="p">)</span>
+<span class="n">transform_func</span><span class="p">(</span><span class="s">"c.1"</span><span class="p">,</span> <span class="s">"c.2"</span><span class="p">,</span> <span class="p">[</span><span class="s">"c"</span><span class="p">,</span> <span class="mi">55</span><span class="p">],</span> <span class="mi">17</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@files_re</a><ul>
+<li><a class="reference internal" href="#files-re-tasks-or-file-names-matching-regex-input-pattern-output-pattern-extra-parameters"><em>@files_re</em> (<cite>tasks_or_file_names</cite>, <cite>matching_regex</cite>, [<cite>input_pattern</cite>], <cite>output_pattern</cite>, [<cite>extra_parameters</cite>,...])</a><ul>
+<li><a class="reference internal" href="#legacy-design-now-deprecated-we-suggest-using-transform-instead">Legacy design now deprecated. We suggest using <tt class="docutils literal"><span class="pre">@transform()</span></tt> instead</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="files.html"
+ title="previous chapter">@files</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../task.html"
+ title="next chapter">ruffus.Task</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/files_re.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../task.html" title="ruffus.Task"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="files.html" title="@files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/follows.html b/doc/_build/html/decorators/follows.html
new file mode 100644
index 0000000..696ebea
--- /dev/null
+++ b/doc/_build/html/decorators/follows.html
@@ -0,0 +1,274 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@follows — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@product" href="product.html" />
+ <link rel="prev" title="@active_if" href="active_if.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="product.html" title="@product"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="active_if.html" title="@active_if"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-follows"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+<li>More on @follows in the <tt class="docutils literal"><span class="pre">Ruffus</span></tt> <a class="reference internal" href="../tutorials/new_tutorial/transform_in_parallel.html#new-manual-follows"><em>Manual</em></a></li>
+</ul>
+<div class="last admonition note">
+<p class="first admonition-title">Note</p>
+<p>Only missing directories are created.</p>
+<p class="last">In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+directories are always created or available <em>before</em> the pipeline runs.</p>
+</div>
+</div>
+<div class="section" id="follows">
+<h1>@follows<a class="headerlink" href="#follows" title="Permalink to this headline">¶</a></h1>
+<span class="target" id="decorators-follows-mkdir"></span><div class="section" id="follows-task-task-name-mkdir-directory-name-more-tasks">
+<h2><em>@follows</em>(<a class="reference internal" href="#decorators-follows-task"><cite>task</cite></a> | <a class="reference internal" href="#decorators-follows-task-name"><cite>“task_name”</cite></a> | <a class="reference internal" href="mkdir.html#decorators-mkdir"><em>mkdir</em></a> (<a class="reference internal" href="#decorators-follows-directory-name"><cite>directory_name</cite></a>), [more_tasks, ...])<a class="headerlink" href="#follows-task-task-name-mkdir-dir [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Indicates either</p>
+<blockquote>
+<div><ul class="simple">
+<li>task dependencies</li>
+<li>that the task requires a directory to be created first <em>if necessary</em>. (Existing directories will not be overwritten)</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">def</span> <span class="nf">task1</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"doing task 1"</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">task1</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"doing task 2"</span>
+</pre></div>
+</div>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-follows-task">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>task</em>:</dt>
+<dd><p class="first last">a list of tasks which have to be run <strong>before</strong> this function</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-follows-task-name">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>“task_name”</em>:</dt>
+<dd><p class="first">Dependencies can be quoted function names.
+Quoted function names allow dependencies to be added before the function is defined.</p>
+<p class="last">Functions in other modules need to be fully qualified.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-follows-directory-name">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>directory_name</em>:</dt>
+<dd><p class="first">Directories which need to be created (<em>only if they don’t exist</em>) before
+the task is run can be specified via a <tt class="docutils literal"><span class="pre">mkdir</span></tt> indicator object:</p>
+<blockquote class="last">
+<div><div class="highlight-python"><pre>@follows(task_x, mkdir("/output/directory") ...)
+def task():
+ pass</pre>
+</div>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@follows</a><ul>
+<li><a class="reference internal" href="#follows-task-task-name-mkdir-directory-name-more-tasks"><em>@follows</em>(<cite>task</cite> | <cite>“task_name”</cite> | <tt class="docutils literal"><span class="pre">mkdir</span></tt> (<cite>directory_name</cite>), [more_tasks, ...])</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="active_if.html"
+ title="previous chapter">@active_if</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="product.html"
+ title="next chapter">@product</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/follows.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="#">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="product.html" title="@product"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="active_if.html" title="@active_if"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/graphviz.html b/doc/_build/html/decorators/graphviz.html
new file mode 100644
index 0000000..af11baa
--- /dev/null
+++ b/doc/_build/html/decorators/graphviz.html
@@ -0,0 +1,278 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@graphviz — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@mkdir" href="mkdir.html" />
+ <link rel="prev" title="@collate with add_inputs and inputs" href="collate_ex.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="mkdir.html" title="@mkdir"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="collate_ex.html" title="@collate with add_inputs and inputs"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-graphviz"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="graphviz">
+<h1>@graphviz<a class="headerlink" href="#graphviz" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="graphviz-graphviz-parameters">
+<h2><em>@graphviz</em> ( <a class="reference internal" href="#decorators-graphviz-graphviz-parameters"><cite>graphviz_parameters</cite></a>,...] )<a class="headerlink" href="#graphviz-graphviz-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><em>Contributed by Sean Davis, with improved syntax via Jake Biesinger</em></p>
+<dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Customise the graphic for each task in printed flowcharts by adding
+<a class="reference external" href="http://www.graphviz.org/doc/info/attrs.html">graphviz attributes</a>,
+(URL, shape, colour) to that node.</p>
+<ul class="last">
+<li><p class="first">This allows HTML formatting in the task names (using the <tt class="docutils literal"><span class="pre">label</span></tt> parameter as in the following example).
+HTML labels <strong>must</strong> be enclosed in <tt class="docutils literal"><span class="pre"><</span></tt> and <tt class="docutils literal"><span class="pre">></span></tt>. E.g.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">label</span> <span class="o">=</span> <span class="s">"<Line <BR/> wrapped task_name()>"</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">You can also opt to keep the task name and wrap it with a prefix and suffix:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">label_suffix</span> <span class="o">=</span> <span class="s">"??? "</span><span class="p">,</span> <span class="n">label_prefix</span> <span class="o">=</span> <span class="s">": What is this?"</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">The <tt class="docutils literal"><span class="pre">URL</span></tt> attribute allows the generation of clickable svg, and also client / server
+side image maps usable in web pages.
+See <a class="reference external" href="http://www.graphviz.org/content/output-formats#dimap">Graphviz documentation</a></p>
+</li>
+</ul>
+</dd>
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first highlight-python"><div class="highlight"><pre><span class="nd">@graphviz</span><span class="p">(</span><span class="n">URL</span><span class="o">=</span><span class="s">'"http://cnn.com"'</span><span class="p">,</span> <span class="n">fillcolor</span> <span class="o">=</span> <span class="s">'"#FFCCCC"'</span><span class="p">,</span>
+ <span class="n">color</span> <span class="o">=</span> <span class="s">'"#FF0000"'</span><span class="p">,</span> <span class="n">pencolor</span><span class="o">=</span><span class="s">'"#FF0000"'</span><span class="p">,</span> <span class="n">fontcolor</span><span class="o">=</span><span class="s">'"#4B6000"'</span><span class="p">,</span>
+ <span class="n">label_suffix</span> <span class="o">=</span> <span class="s">"???"</span><span class="p">,</span> <span class="n">label_prefix</span> <span class="o">=</span> <span class="s">"What is this?<BR/> "</span><span class="p">,</span>
+ <span class="n">label</span> <span class="o">=</span> <span class="s">"<What <FONT COLOR=</span><span class="se">\"</span><span class="s">red</span><span class="se">\"</span><span class="s">>is</FONT>this>"</span><span class="p">,</span>
+ <span class="n">shape</span><span class="o">=</span> <span class="s">"component"</span><span class="p">,</span> <span class="n">height</span> <span class="o">=</span> <span class="mf">1.5</span><span class="p">,</span> <span class="n">peripheries</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
+ <span class="n">style</span><span class="o">=</span><span class="s">"dashed"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task2</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c"># Can use dictionary if you wish...</span>
+<span class="n">graphviz_params</span> <span class="o">=</span> <span class="p">{</span><span class="s">"URL"</span><span class="p">:</span><span class="s">"http://cnn.com"</span><span class="p">,</span> <span class="s">"fontcolor"</span><span class="p">:</span> <span class="s">'"#FF00FF"'</span><span class="p">}</span>
+<span class="nd">@graphviz</span><span class="p">(</span><span class="o">**</span><span class="n">graphviz_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">myTask</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span><span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<a class="last reference internal image-reference" href="../_images/history_html_flowchart1.png"><img alt="../_images/history_html_flowchart1.png" src="../_images/history_html_flowchart1.png" style="width: 336.6px; height: 316.5px;" /></a>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-graphviz-graphviz-parameters">
+<div><ul>
+<li><p class="first">named <em>graphviz_parameters</em></p>
+<blockquote>
+<div><p>Including among others:</p>
+<blockquote>
+<div><ul class="simple">
+<li>URL (e.g. <tt class="docutils literal"><span class="pre">"www.ruffus.org.uk"</span></tt>)</li>
+<li>fillcolor</li>
+<li>color</li>
+<li>pencolor</li>
+<li>fontcolor</li>
+<li>label_suffix (appended to task name)</li>
+<li>label_prefix (precedes task name)</li>
+<li>label (replaces task name)</li>
+<li>shape (e.g. <tt class="docutils literal"><span class="pre">"component",</span> <span class="pre">"box",</span> <span class="pre">"diamond",</span> <span class="pre">"doubleoctagon"</span></tt> etc., see <a class="reference external" href="http://www.graphviz.org/doc/info/shapes.html">graphviz</a> )</li>
+<li>height</li>
+<li>peripheries (Number of borders)</li>
+<li>style (e.g. <tt class="docutils literal"><span class="pre">"solid",</span> <span class="pre">"wedged",</span> <span class="pre">"dashed"</span></tt> etc., see <a class="reference external" href="http://www.graphviz.org/doc/info/attrs.html#k:style">graphviz</a> )</li>
+</ul>
+</div></blockquote>
+<p>Colours may specified as <tt class="docutils literal"><span class="pre">'"#FFCCCC"',</span> <span class="pre">'red',</span> <span class="pre">'red:blue',</span> <span class="pre">'/bugn9/7'</span></tt> etc. see <a class="reference external" href="http://www.graphviz.org/doc/info/attrs.html#k:color">color names</a> and <a class="reference external" href="http://www.graphviz.org/doc/info/colors.html">colour schemes</a></p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@graphviz</a><ul>
+<li><a class="reference internal" href="#graphviz-graphviz-parameters"><em>@graphviz</em> ( <cite>graphviz_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="collate_ex.html"
+ title="previous chapter">@collate with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="mkdir.html"
+ title="next chapter">@mkdir</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/graphviz.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="#">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="mkdir.html" title="@mkdir"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="collate_ex.html" title="@collate with add_inputs and inputs"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/indicator_objects.html b/doc/_build/html/decorators/indicator_objects.html
new file mode 100644
index 0000000..a8722bd
--- /dev/null
+++ b/doc/_build/html/decorators/indicator_objects.html
@@ -0,0 +1,688 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Indicator Objects — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@originate" href="originate.html" />
+ <link rel="prev" title="Ruffus Decorators" href="decorators.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="originate.html" title="@originate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="decorators.html" title="Ruffus Decorators"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<p class="last"><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a></p>
+</div>
+<div class="section" id="indicator-objects">
+<span id="decorators-indicator-objects"></span><span id="index-0"></span><h1>Indicator Objects<a class="headerlink" href="#indicator-objects" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p>How <em>ruffus</em> disambiguates certain parameters to decorators.</p>
+<p>They are like <a class="reference external" href="http://docs.python.org/tutorial/controlflow.html#keyword-arguments">keyword arguments</a> in python, a little more verbose but they make the syntax much simpler.</p>
+<p>Indicator objects are also “self-documenting” so you can see
+exactly what is happening clearly.</p>
+</div></blockquote>
+<div class="section" id="formatter">
+<span id="decorators-formatter"></span><span id="index-1"></span><h2><em>formatter</em><a class="headerlink" href="#formatter" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>formatter([</strong> <tt class="docutils literal"><span class="pre">regex</span> <span class="pre">|</span> <span class="pre">None</span></tt> <strong>, regex | None...])</strong></p>
+<ul class="simple">
+<li>The optional enclosed parameters are a python regular expression strings</li>
+<li>Each regular expression matches a corresponding <em>Input</em> file name string</li>
+<li><em>formatter</em> parses each file name string into path and regular expression components</li>
+<li>Parsing fails altogether if the regular expression is not matched</li>
+</ul>
+<p>Path components include:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">basename</span></tt>: The <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.basename">base name</a> <em>excluding</em> <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.splitext">extension</a>, <tt class="docutils literal"><span class="pre">"file.name"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ext</span></tt> : The <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.splitext">extension</a>, <tt class="docutils literal"><span class="pre">".ext"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">path</span></tt> : The <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.dirname">dirname</a>, <tt class="docutils literal"><span class="pre">"/directory/to/a"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">subdir</span></tt> : A list of sub-directories in the <tt class="docutils literal"><span class="pre">path</span></tt> in reverse order, <tt class="docutils literal"><span class="pre">["a",</span> <span class="pre">"to",</span> <span class="pre">"directory",</span> <span class="pre">"/"]</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">subpath</span></tt> : A list of descending sub-paths in reverse order, <tt class="docutils literal"><span class="pre">["/directory/to/a",</span> <span class="pre">"/directory/to",</span> <span class="pre">"/directory",</span> <span class="pre">"/"]</span></tt></li>
+</ul>
+</div></blockquote>
+<p>The replacement string refers to these components using python <a class="reference external" href="http://docs.python.org/2/library/string.html#string-formatting">string.format</a> style curly braces. <tt class="docutils literal"><span class="pre">{NAME}</span></tt></p>
+<p>We refer to an element from the Nth input string by index, for example:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">"{ext[0]}"</span></tt> is the extension of the first input string.</li>
+<li><tt class="docutils literal"><span class="pre">"{basename[1]}"</span></tt> is the basename of the second input string.</li>
+<li><tt class="docutils literal"><span class="pre">"{basename[1][0:3]}"</span></tt> are the first three letters from the basename of the second input string.</li>
+</ul>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="split.html#decorators-split"><em>@split</em></a></li>
+<li><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a></li>
+<li><a class="reference internal" href="merge.html#decorators-merge"><em>@merge</em></a></li>
+<li><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a></li>
+<li><a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a></li>
+<li><a class="reference internal" href="product.html#decorators-product"><em>@product</em></a></li>
+<li><a class="reference internal" href="permutations.html#decorators-permutations"><em>@permutations</em></a></li>
+<li><a class="reference internal" href="combinations.html#decorators-combinations"><em>@combinations</em></a></li>
+<li><a class="reference internal" href="combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a></li>
+</ul>
+</dd>
+</dl>
+<p><strong>@transform example</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># create initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.c.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="hll"><span class="c"># formatter</span>
+</span><span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="c"># Input</span>
+
+<span class="hll"> <span class="n">formatter</span><span class="p">(</span><span class="s">".+/job(?P<JOBNUMBER>\d+).a.start"</span><span class="p">,</span> <span class="c"># Extract job number</span>
+</span><span class="hll"> <span class="s">".+/job[123].b.start"</span><span class="p">),</span> <span class="c"># Match only "b" files</span>
+</span>
+ <span class="p">[</span><span class="s">"{path[0]}/jobs{JOBNUMBER[0]}.output.a.1"</span><span class="p">,</span> <span class="c"># Replacement list</span>
+ <span class="s">"{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_parameters</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"input_parameters = "</span><span class="p">,</span> <span class="n">input_files</span>
+ <span class="k">print</span> <span class="s">"output_parameters = "</span><span class="p">,</span> <span class="n">output_parameters</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This produces:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="go">input_parameters = ['job1.a.start',</span>
+<span class="go"> 'job1.b.start']</span>
+<span class="go">output_parameters = ['/home/lg/src/temp/jobs1.output.a.1',</span>
+<span class="go"> '/home/lg/src/temp/jobs1.output.b.1', 45]</span>
+
+<span class="go">input_parameters = ['job2.a.start',</span>
+<span class="go"> 'job2.b.start']</span>
+<span class="go">output_parameters = ['/home/lg/src/temp/jobs2.output.a.1',</span>
+<span class="go"> '/home/lg/src/temp/jobs2.output.b.1', 45]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>@permutations example</strong>:</p>
+<blockquote>
+<div><p>Combinatoric decorators such as <a class="reference internal" href="product.html#decorators-product"><em>@product</em></a> or
+<a class="reference internal" href="permutations.html#decorators-permutations"><em>@product</em></a> behave much
+like nested for loops in enumerating, combining, and permutating the original sets
+of inputs.</p>
+<p>The replacement strings require an extra level of indirection to refer to
+parsed components:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># create initial files</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="s">'a.start'</span><span class="p">,</span> <span class="s">'b.start'</span><span class="p">,</span> <span class="s">'c.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># formatter</span>
+<span class="c">#</span>
+<span class="hll"><span class="nd">@permutations</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file in permutations</span>
+ <span class="mi">2</span><span class="p">,</span>
+<span class="hll">
+</span><span class="hll"> <span class="s">"{path[0][0]}/{basename[0][0]}_vs_{basename[1][0]}.product"</span><span class="p">,</span> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span> <span class="c"># path for 1st set of files, 1st file name</span>
+ <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+ <span class="s">"{basename[1][0]}"</span><span class="p">])</span> <span class="c"># basename for 2nd set of files, 1st file name</span>
+<span class="k">def</span> <span class="nf">product_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"input_parameter = "</span><span class="p">,</span> <span class="n">input_file</span>
+ <span class="k">print</span> <span class="s">"output_parameter = "</span><span class="p">,</span> <span class="n">output_parameter</span>
+ <span class="k">print</span> <span class="s">"shared_path = "</span><span class="p">,</span> <span class="n">shared_path</span>
+ <span class="k">print</span> <span class="s">"basenames = "</span><span class="p">,</span> <span class="n">basenames</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This produces:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">input_parameter = ('a.start', 'b.start')</span>
+<span class="go">output_parameter = /home/lg/src/oss/ruffus/a_vs_b.product</span>
+<span class="go">shared_path = /home/lg/src/oss/ruffus</span>
+<span class="go">basenames = ['a', 'b']</span>
+
+<span class="go">input_parameter = ('a.start', 'c.start')</span>
+<span class="go">output_parameter = /home/lg/src/oss/ruffus/a_vs_c.product</span>
+<span class="go">shared_path = /home/lg/src/oss/ruffus</span>
+<span class="go">basenames = ['a', 'c']</span>
+
+<span class="go">input_parameter = ('b.start', 'a.start')</span>
+<span class="go">output_parameter = /home/lg/src/oss/ruffus/b_vs_a.product</span>
+<span class="go">shared_path = /home/lg/src/oss/ruffus</span>
+<span class="go">basenames = ['b', 'a']</span>
+
+<span class="go">input_parameter = ('b.start', 'c.start')</span>
+<span class="go">output_parameter = /home/lg/src/oss/ruffus/b_vs_c.product</span>
+<span class="go">shared_path = /home/lg/src/oss/ruffus</span>
+<span class="go">basenames = ['b', 'c']</span>
+
+<span class="go">input_parameter = ('c.start', 'a.start')</span>
+<span class="go">output_parameter = /home/lg/src/oss/ruffus/c_vs_a.product</span>
+<span class="go">shared_path = /home/lg/src/oss/ruffus</span>
+<span class="go">basenames = ['c', 'a']</span>
+
+<span class="go">input_parameter = ('c.start', 'b.start')</span>
+<span class="go">output_parameter = /home/lg/src/oss/ruffus/c_vs_b.product</span>
+<span class="go">shared_path = /home/lg/src/oss/ruffus</span>
+<span class="go">basenames = ['c', 'b']</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="suffix">
+<span id="decorators-suffix"></span><span id="index-2"></span><h2><em>suffix</em><a class="headerlink" href="#suffix" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>suffix(</strong> <tt class="docutils literal"><span class="pre">string</span></tt> <strong>)</strong></p>
+<p>The enclosed parameter is a string which must match <em>exactly</em> to the end
+of a file name.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a></li>
+</ul>
+</dd>
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># Transforms ``*.c`` to ``*.o``::</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="regex">
+<span id="decorators-regex"></span><span id="index-3"></span><h2><em>regex</em><a class="headerlink" href="#regex" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>regex(</strong> <tt class="docutils literal"><span class="pre">regular_expression</span></tt> <strong>)</strong></p>
+<p>The enclosed parameter is a python regular expression string,
+which must be wrapped in a <tt class="docutils literal"><span class="pre">regex</span></tt> indicator object.</p>
+<p>See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax</p>
+<p><strong>Used by:</strong></p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a></li>
+<li><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a></li>
+<li><a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a></li>
+<li>The deprecated <a class="reference internal" href="files_re.html#decorators-files-re"><em>@files_re</em></a></li>
+</ul>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r".c$"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="add-inputs">
+<span id="decorators-add-inputs"></span><span id="index-4"></span><h2><em>add_inputs</em><a class="headerlink" href="#add-inputs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>add_inputs(</strong> <tt class="docutils literal"><span class="pre">input_file_pattern</span></tt> <strong>)</strong></p>
+<p>The enclosed parameter(s) are pattern strings or a nested structure which is added to the
+input for each job.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@transform</em></a></li>
+<li><a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@collate</em></a></li>
+<li><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a></li>
+</ul>
+</dd>
+</dl>
+<p><strong>Example @transform with suffix(...)</strong></p>
+<blockquote>
+<div><p>A common task in compiling C code is to include the corresponding header file for the source.
+To compile <tt class="docutils literal"><span class="pre">*.c</span></tt> to <tt class="docutils literal"><span class="pre">*.o</span></tt>, adding <tt class="docutils literal"><span class="pre">*.h</span></tt> and the common header <tt class="docutils literal"><span class="pre">universal.h</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">([</span><span class="s">r"\1.h"</span><span class="p">,</span> <spa [...]
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="c"># do something here</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="line-block">
+<div class="line">The starting files names are <tt class="docutils literal"><span class="pre">1.c</span></tt> and <tt class="docutils literal"><span class="pre">2.c</span></tt>.</div>
+<div class="line"><tt class="docutils literal"><span class="pre">suffix(".c")</span></tt> matches ”.c” so <tt class="docutils literal"><span class="pre">\1</span></tt> stands for the unmatched prefices <tt class="docutils literal"><span class="pre">"1"</span></tt> and <tt class="docutils literal"><span class="pre">"2"</span></tt></div>
+</div>
+<dl class="docutils">
+<dt>This will result in the following functional calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"1.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"1.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">([</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"2.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"2.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>A string like <tt class="docutils literal"><span class="pre">universal.h</span></tt> in <tt class="docutils literal"><span class="pre">add_inputs</span></tt> will added <em>as is</em>.
+<tt class="docutils literal"><span class="pre">r"\1.h"</span></tt>, however, performs suffix substitution, with the special form <tt class="docutils literal"><span class="pre">r"\1"</span></tt> matching everything up to the suffix.
+Remember to ‘escape’ <tt class="docutils literal"><span class="pre">r"\1"</span></tt> otherwise Ruffus will complain and throw an <tt class="docutils literal"><span class="pre">Exception</span></tt> to remind you.
+The most convenient way is to use a python “raw” string.</p>
+</div></blockquote>
+<p><strong>Example of add_inputs(...) with regex(...)</strong></p>
+<blockquote>
+<div><dl class="docutils">
+<dt>The suffix match (<tt class="docutils literal"><span class="pre">suffix(...)</span></tt>) is exactly equivalent to the following code using regular expression (<tt class="docutils literal"><span class="pre">regex(...)</span></tt>):</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"^(.+)\.c$"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">([</span><span class="s">r"\1.h"</span><span class= [...]
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="c"># do something here</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>The <tt class="docutils literal"><span class="pre">suffix(..)</span></tt> code is much simpler but the regular expression allows more complex substitutions.</p>
+</div></blockquote>
+<p><strong>add_inputs(...) preserves original inputs</strong></p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">add_inputs</span></tt> nests the the original input parameters in a list before adding additional dependencies.</p>
+<dl class="docutils">
+<dt>This can be seen in the following example:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span> <span class="p">[</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"A.c"</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
+ <span class="p">[</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"B.c"</span><span class="p">,</span> <span class="s">"C.c"</span><span class="p">,</span> <span class="mi">3</span><span class="p">]],</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">([</span><span class="s">r"\1.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">]),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="c"># do something here</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+<dt>This will result in the following functional calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([[</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"A.c"</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s">"1.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"1.o"</span><span [...]
+<span class="nb">compile</span><span class="p">([[</span><span class="s">"3.c"</span><span class="p">,</span> <span class="s">"B.c"</span><span class="p">,</span> <span class="s">"C.c"</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s">"2.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"2.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>The original parameters are retained unchanged as the first item in a list</p>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="inputs">
+<span id="decorators-inputs"></span><span id="index-5"></span><h2><em>inputs</em><a class="headerlink" href="#inputs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>inputs(</strong> <tt class="docutils literal"><span class="pre">input_file_pattern</span></tt> <strong>)</strong></p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@transform</em></a></li>
+<li><a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@collate</em></a></li>
+<li><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a></li>
+</ul>
+</dd>
+</dl>
+<p>The enclosed single parameter is a pattern string or a nested structure which is
+used to construct the input for each job.</p>
+<p>If more than one argument is supplied to inputs, an exception will be raised.</p>
+<p>Use a tuple or list (as in the following example) to send multiple input arguments to each job.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li>The advanced form of <a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>@transform</em></a></li>
+</ul>
+</dd>
+</dl>
+<p><strong>inputs(...) replaces original inputs</strong></p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">inputs(...)</span></tt> allows the original input parameters to be replaced wholescale.</p>
+<dl class="docutils">
+<dt>This can be seen in the following example:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span> <span class="p">[</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"A.c"</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
+ <span class="p">[</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"B.c"</span><span class="p">,</span> <span class="s">"C.c"</span><span class="p">,</span> <span class="mi">3</span><span class="p">]],</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="n">inputs</span><span class="p">([</span><span class="s">r"\1.py"</span><span class="p">,</span> <span class="s">"docs.rst"</span><span class="p">]),</span> <span class="s">".pyc"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="c"># do something here</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+<dt>This will result in the following functional calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([</span><span class="s">"1.py"</span><span class="p">,</span> <span class="s">"docs.rst"</span><span class="p">],</span> <span class="s">"1.pyc"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">([</span><span class="s">"2.py"</span><span class="p">,</span> <span class="s">"docs.rst"</span><span class="p">],</span> <span class="s">"2.pyc"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>In this example, the corresponding python files have been sneakily substituted
+without trace in the place of the C source files.</p>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="mkdir">
+<span id="decorators-indicator-objects-mkdir"></span><span id="index-6"></span><h2><em>mkdir</em><a class="headerlink" href="#mkdir" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>mkdir(</strong> <tt class="docutils literal"><span class="pre">directory_name1</span></tt> <strong>, [</strong> <tt class="docutils literal"><span class="pre">directory_name2</span></tt> <strong>, ...] )</strong></p>
+<p>The enclosed parameter is a directory name or a sequence of directory names.
+These directories will be created as part of the prerequisites of running a task.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="follows.html#decorators-follows"><em>@follows</em></a></li>
+</ul>
+</dd>
+<dt><strong>Example:</strong></dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="s">"/output/directory"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">task</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="touch-file">
+<span id="decorators-touch-file"></span><span id="index-7"></span><h2><em>touch_file</em><a class="headerlink" href="#touch-file" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>touch_file(</strong> <tt class="docutils literal"><span class="pre">file_name</span></tt> <strong>)</strong></p>
+<p>The enclosed parameter is a file name. This file will be <tt class="docutils literal"><span class="pre">touch</span></tt>-ed after a
+task is executed.</p>
+<p>This will change the date/time stamp of the <tt class="docutils literal"><span class="pre">file_name</span></tt> to the current date/time.
+If the file does not exist, an empty file will be created.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="posttask.html#decorators-posttask"><em>@posttask</em></a></li>
+</ul>
+</dd>
+<dt><strong>Example:</strong></dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="n">touch_file</span><span class="p">(</span><span class="s">"task_completed.flag"</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">do_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="output-from">
+<span id="decorators-output-from"></span><span id="index-8"></span><h2><em>output_from</em><a class="headerlink" href="#output-from" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>output_from (</strong> <tt class="docutils literal"><span class="pre">file_name_string1</span></tt> <strong>[,</strong> <tt class="docutils literal"><span class="pre">file_name_string1</span></tt> <strong>, ...] )</strong></p>
+<p>Indicates that any enclosed strings are not file names but refer to task functions.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="split.html#decorators-split"><em>@split</em></a></li>
+<li><a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a></li>
+<li><a class="reference internal" href="merge.html#decorators-merge"><em>@merge</em></a></li>
+<li><a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a></li>
+<li><a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a></li>
+<li><a class="reference internal" href="product.html#decorators-product"><em>@product</em></a></li>
+<li><a class="reference internal" href="permutations.html#decorators-permutations"><em>@permutations</em></a></li>
+<li><a class="reference internal" href="combinations.html#decorators-combinations"><em>@combinations</em></a></li>
+<li><a class="reference internal" href="combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a></li>
+<li><a class="reference internal" href="files.html#decorators-files"><em>@files</em></a></li>
+</ul>
+</dd>
+<dt><strong>Example:</strong></dt>
+<dd><div class="first highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">([</span><span class="s">"a.file"</span><span class="p">,</span> <span class="p">(</span><span class="s">"b.file"</span><span class="p">,</span> <span class="n">output_from</span><span class="p">(</span><span class="s">"task1"</span><span class="p">,</span> <span class="mi">76</span><span class="p">,</span> <span class="s">"task2"</span [...]
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>is equivalent to:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">([</span><span class="s">"a.file"</span><span class="p">,</span> <span class="p">(</span><span class="s">"b.file"</span><span class="p">,</span> <span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="mi">76</span><span class="p">,</span> <span class="n">task2</span><span class="p">))],</span> <span class="s">"*.split" [...]
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="combine">
+<span id="decorators-combine"></span><span id="index-9"></span><h2><em>combine</em><a class="headerlink" href="#combine" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>combine(</strong> <tt class="docutils literal"><span class="pre">arguments</span></tt> <strong>)</strong></p>
+<div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<p>This is deprecated syntax.</p>
+<p>Please do not use!</p>
+<p class="last"><a class="reference internal" href="merge.html#decorators-merge"><em>@merge</em></a> and <a class="reference internal" href="collate.html#decorators-collate"><em>@collate</em></a> are more powerful
+and have straightforward syntax.</p>
+</div>
+<p>Indicates that the <em>inputs</em> of <a class="reference internal" href="files_re.html#decorators-files-re"><em>@files_re</em></a> will be collated
+or summarised into <em>outputs</em> by category. See the <a class="reference internal" href="../tutorials/new_tutorial/deprecated_files_re.html#new-manual-files-re-combine"><em>Manual</em></a> or
+:ref:` @collate <new_manual.collate>` for examples.</p>
+<dl class="docutils">
+<dt><strong>Used by:</strong></dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="../tutorials/new_tutorial/deprecated_files_re.html#new-manual-files-re-combine"><em>@files_re</em></a></li>
+</ul>
+</dd>
+<dt><strong>Example:</strong></dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@files_re</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span> <span class="c"># inputs = all *.animal files</span>
+ <span class="s">r'mammals.([^.]+)'</span><span class="p">,</span> <span class="c"># regular expression</span>
+ <span class="n">combine</span><span class="p">(</span><span class="s">r'\1/animals.in_my_zoo'</span><span class="p">),</span> <span class="c"># single output file per species</span>
+ <span class="s">r'\1'</span> <span class="p">)</span> <span class="c"># species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="c"># summarise all animals of this species</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Indicator Objects</a><ul>
+<li><a class="reference internal" href="#formatter"><em>formatter</em></a></li>
+<li><a class="reference internal" href="#suffix"><em>suffix</em></a></li>
+<li><a class="reference internal" href="#regex"><em>regex</em></a></li>
+<li><a class="reference internal" href="#add-inputs"><em>add_inputs</em></a></li>
+<li><a class="reference internal" href="#inputs"><em>inputs</em></a></li>
+<li><a class="reference internal" href="#mkdir"><em>mkdir</em></a></li>
+<li><a class="reference internal" href="#touch-file"><em>touch_file</em></a></li>
+<li><a class="reference internal" href="#output-from"><em>output_from</em></a></li>
+<li><a class="reference internal" href="#combine"><em>combine</em></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="decorators.html"
+ title="previous chapter">Ruffus Decorators</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="originate.html"
+ title="next chapter">@originate</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/indicator_objects.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="#">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="originate.html" title="@originate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="decorators.html" title="Ruffus Decorators"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/jobs_limit.html b/doc/_build/html/decorators/jobs_limit.html
new file mode 100644
index 0000000..41cbede
--- /dev/null
+++ b/doc/_build/html/decorators/jobs_limit.html
@@ -0,0 +1,267 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@jobs_limit — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@posttask" href="posttask.html" />
+ <link rel="prev" title="@mkdir" href="mkdir.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="posttask.html" title="@posttask"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="mkdir.html" title="@mkdir"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-jobs-limit"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="jobs-limit">
+<h1>@jobs_limit<a class="headerlink" href="#jobs-limit" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="jobs-limit-maximum-num-of-jobs-name">
+<h2><em>@jobs_limit</em> ( <a class="reference internal" href="#decorators-jobs-limit-maximum-num-of-jobs"><cite>maximum_num_of_jobs</cite></a>, [ <a class="reference internal" href="#decorators-jobs-limit-name"><cite>name</cite></a> ])<a class="headerlink" href="#jobs-limit-maximum-num-of-jobs-name" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><div class="first last line-block">
+<div class="line">Manages the resources available for a task.</div>
+<div class="line">Limits the number of concurrent jobs which can be run in parallel for this task</div>
+<div class="line">Overrides the value for <tt class="docutils literal"><span class="pre">multiprocess</span></tt> in <a class="reference internal" href="../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a></div>
+<div class="line">If an optional <tt class="docutils literal"><span class="pre">name</span></tt> is given, the same limit is shared across all tasks with the same @job_limit name.</div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-jobs-limit-maximum-num-of-jobs">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>maximum_num_of_jobs</em></dt>
+<dd><p class="first last">The maximum number of concurrent jobs for this task. Must be an integer number
+greater than or equal to 1.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-jobs-limit-name">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>name</em></dt>
+<dd><p class="first last">Optional name for the limit. All tasks with the same name share the same limit if they
+are running concurrently.</p>
+</dd>
+</dl>
+</li>
+</ul>
+<dl class="docutils">
+<dt><strong>Example</strong></dt>
+<dd><div class="first highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># make list of 10 files</span>
+<span class="nd">@split</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"*.stage1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">make_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.stage1"</span> <span class="o">%</span> <span class="n">i</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">make_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".stage1"</span><span class="p">),</span> <span class="s">".stage2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage1</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">stage1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".stage2"</span><span class="p">),</span> <span class="s">".stage3"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage2</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">stage2</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>will run the 10 jobs of <tt class="docutils literal"><span class="pre">stage1</span></tt> 2 at a time, while `` stage2`` will
+run 5 at a time (from <tt class="docutils literal"><span class="pre">multiprocess</span> <span class="pre">=</span> <span class="pre">5</span></tt>):</p>
+<img alt="../_images/jobs_limit.png" class="last" src="../_images/jobs_limit.png" />
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@jobs_limit</a><ul>
+<li><a class="reference internal" href="#jobs-limit-maximum-num-of-jobs-name"><em>@jobs_limit</em> ( <cite>maximum_num_of_jobs</cite>, [ <cite>name</cite> ])</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="mkdir.html"
+ title="previous chapter">@mkdir</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="posttask.html"
+ title="next chapter">@posttask</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/jobs_limit.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="#">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="posttask.html" title="@posttask"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="mkdir.html" title="@mkdir"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/merge.html b/doc/_build/html/decorators/merge.html
new file mode 100644
index 0000000..7ff54fd
--- /dev/null
+++ b/doc/_build/html/decorators/merge.html
@@ -0,0 +1,269 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@merge — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@subdivide" href="subdivide.html" />
+ <link rel="prev" title="@transform" href="transform.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="subdivide.html" title="@subdivide"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform.html" title="@transform"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-merge"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="merge">
+<h1>@merge<a class="headerlink" href="#merge" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="merge-tasks-or-file-names-output-file-extra-parameters">
+<h2><em>@merge</em> ( <a class="reference internal" href="#decorators-merge-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="#decorators-merge-output-file"><cite>output_file</cite></a>, [<a class="reference internal" href="#decorators-merge-extra-parameters"><cite>extra_parameters</cite></a>,...] )<a class="headerlink" href="#merge-tasks-or-file-names-output-file-extra-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Merges multiple input files into a single output.</p>
+<p class="last">Only out of date tasks (comparing input and output files) will be run</p>
+</dd>
+</dl>
+<p><strong>Example</strong>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@merge</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="s">'all.summary'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">summarize</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">summary_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-merge-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-merge-output-file">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_file</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-merge-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters, ...</em></dt>
+<dd><p class="first last">Any optional extra parameters are passed verbatim to the task function</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<p>See <a class="reference internal" href="collate.html#decorators-collate"><em>here</em></a> for more advanced uses of merging.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@merge</a><ul>
+<li><a class="reference internal" href="#merge-tasks-or-file-names-output-file-extra-parameters"><em>@merge</em> ( <cite>tasks_or_file_names</cite>, <cite>output_file</cite>, [<cite>extra_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform.html"
+ title="previous chapter">@transform</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="subdivide.html"
+ title="next chapter">@subdivide</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/merge.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="#">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="subdivide.html" title="@subdivide"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform.html" title="@transform"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/mkdir.html b/doc/_build/html/decorators/mkdir.html
new file mode 100644
index 0000000..593b8cb
--- /dev/null
+++ b/doc/_build/html/decorators/mkdir.html
@@ -0,0 +1,422 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@mkdir — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@jobs_limit" href="jobs_limit.html" />
+ <link rel="prev" title="@graphviz" href="graphviz.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="@jobs_limit"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="graphviz.html" title="@graphviz"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-mkdir"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+<li>More on @mkdir in the <tt class="docutils literal"><span class="pre">Ruffus</span></tt> <a class="reference internal" href="../tutorials/new_tutorial/mkdir.html#new-manual-mkdir"><em>Manual</em></a></li>
+<li><a class="reference internal" href="follows.html#decorators-follows"><em>@follows(mkdir(“dir”))</em></a> specifies the creation of a <em>single</em> directory as a task pre-requisite.</li>
+</ul>
+</div>
+<div class="section" id="mkdir">
+<h1>@mkdir<a class="headerlink" href="#mkdir" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="mkdir-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern">
+<h2><em>@mkdir</em> ( <a class="reference internal" href="#decorators-mkdir-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a><em>(</em><a class="reference internal" href="#decorators-mkdir-suffix-string"><cite>suffix_string</cite></a><em>)</em>| <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a><em>(</em><a class="reference internal" [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><ul class="simple">
+<li>Prepares directories to receive <em>Output</em> files</li>
+<li>Used when <em>Output</em> path names are generated at runtime from <em>Inputs</em>. <strong>mkdir</strong> can make sure these runtime specified paths exist.</li>
+<li>Directory names are generated from <strong>Input</strong> using string substitution via <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter()</em></a>, <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix()</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex()</em></a>.</li>
+<li>Behaves essentially like <tt class="docutils literal"><span class="pre">@transform</span></tt> but with its own (internal) function which does the actual work of making a directory</li>
+<li>Does <em>not</em> invoke the host task function to which it is attached</li>
+<li>Makes specified directories using <a class="reference external" href="http://docs.python.org/2/library/os.html#os.makedirs">os.makedirs</a></li>
+<li>Multiple directories can be created in a list</li>
+</ul>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Only missing directories are created.</p>
+<p>In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.</p>
+<p class="last">Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+directories are always created or available <em>before</em> the pipeline runs.</p>
+</div>
+</div></blockquote>
+<p><strong>Simple Example</strong></p>
+<blockquote>
+<div><p>Creates multiple directories per job to hold the results of <a class="reference internal" href="transform.html#decorators-transform"><em>@transform</em></a></p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial files</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="s">'A.start'</span><span class="p">,</span>
+ <span class="s">'B.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="hll"><span class="c"># create files without making directories -> ERROR</span>
+</span><span class="nd">@transform</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+ <span class="p">[</span><span class="s">"{path[0]}/{basename[0]}/processed.txt"</span><span class="p">,</span>
+ <span class="s">"{path[0]}/{basename[0]}.tmp/tmp.processed.txt"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_files_without_mkdir</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_files</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_files</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="hll"><span class="c"># create files after making corresponding directories</span>
+</span><span class="nd">@mkdir</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+ <span class="p">[</span><span class="s">"{path[0]}/{basename[0]}"</span><span class="p">,</span> <span class="c"># create directory</span>
+ <span class="s">"{path[0]}/{basename[0]}.tmp"</span><span class="p">])</span> <span class="c"># create directory.tmp</span>
+<span class="nd">@transform</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+ <span class="p">[</span><span class="s">"{path[0]}/{basename[0]}/processed.txt"</span><span class="p">,</span>
+ <span class="s">"{path[0]}/{basename[0]}.tmp/tmp.processed.txt"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_files_with_mkdir</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_files</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_files</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_files_without_mkdir</span><span class="p">])</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_files_with_mkdir</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Running without making the directories first gives errors:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_files_without_mkdir</span><span class="p">])</span>
+<span class="go"> Job = [None -> A.start] completed</span>
+<span class="go"> Job = [None -> B.start] completed</span>
+<span class="go">Completed Task = create_initial_files</span>
+
+<span class="go"> Traceback (most recent call last):</span>
+<span class="go"> File "<stdin>", line 1, in <module></span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 3738, in pipeline_run</span>
+<span class="go"> raise job_errors</span>
+<span class="go"> ruffus.ruffus_exceptions.RethrownJobError:</span>
+
+<span class="go"> Original exception:</span>
+
+<span class="hll"><span class="gp">>>> </span><span class="c"># Exception #1</span>
+</span><span class="hll"><span class="gp">>>> </span><span class="c"># 'exceptions.IOError([Errno 2] No such file or directory: 'A/processed.txt')' raised in ...</span>
+</span><span class="hll"><span class="gp">>>> </span><span class="c"># Task = def create_files_without_mkdir(...):</span>
+</span><span class="hll"><span class="gp">>>> </span><span class="c"># Job = [A.start -> [processed.txt, tmp.processed.txt]]</span>
+</span></pre></div>
+</div>
+</div></blockquote>
+<p>Running after making the directories first:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_files_with_mkdir</span><span class="p">])</span>
+<span class="go"> Job = [None -> A.start] completed</span>
+<span class="go"> Job = [None -> B.start] completed</span>
+<span class="go">Completed Task = create_initial_files</span>
+<span class="go"> Make directories [A, A.tmp] completed</span>
+<span class="go"> Make directories [B, B.tmp] completed</span>
+<span class="go">Completed Task = (mkdir 1) before create_files_with_mkdir</span>
+<span class="go"> Job = [A.start -> [processed.txt, tmp.processed.txt]] completed</span>
+<span class="go"> Job = [B.start -> [processed.txt, tmp.processed.txt]] completed</span>
+<span class="go">Completed Task = create_files_with_mkdir</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Escaping regular expression patterns</strong></p>
+<blockquote>
+<div>A string like <tt class="docutils literal"><span class="pre">universal.h</span></tt> in <tt class="docutils literal"><span class="pre">add_inputs</span></tt> will added <em>as is</em>.
+<tt class="docutils literal"><span class="pre">r"\1.h"</span></tt>, however, performs suffix substitution, with the special form <tt class="docutils literal"><span class="pre">r"\1"</span></tt> matching everything up to the suffix.
+Remember to ‘escape’ <tt class="docutils literal"><span class="pre">r"\1"</span></tt> otherwise Ruffus will complain and throw an Exception to remind you.
+The most convenient way is to use a python “raw” string.</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-mkdir-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-mkdir-suffix-string">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>suffix_string</em></dt>
+<dd><p class="first">must be wrapped in a <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a> indicator object.
+The end of each input file name which matches <tt class="docutils literal"><span class="pre">suffix_string</span></tt> will be replaced by <tt class="docutils literal"><span class="pre">output_pattern</span></tt>.</p>
+<p>Input file names which do not match suffix_string will be ignored</p>
+<p>The non-suffix part of the match can be referred to using the <tt class="docutils literal"><span class="pre">"\1"</span></tt> pattern. This
+can be useful for putting the output in different directory, for example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@mkdir</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="s">r"my_path/\1.o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This results in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># 1.c -> my_path/1.o</span>
+<span class="c"># 2.c -> my_path/2.o</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"my_path/1.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"my_path/2.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>For convenience and visual clarity, the <tt class="docutils literal"><span class="pre">"\1"</span></tt> can be omitted from the output parameter.
+However, the <tt class="docutils literal"><span class="pre">"\1"</span></tt> is mandatory for string substitutions in additional parameters,</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@mkdir</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="p">[</span><span class="s">r"\1.o"</span><span class="p">,</span> <span class="s">".o"</span><span clas [...]
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Results in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">(</span><span class="s">"1.c"</span><span class="p">,</span> <span class="p">[</span><span class="s">"1.o"</span><span class="p">,</span> <span class="s">"1.o"</span><span class="p">],</span> <span class="s">"Compiling 1"</span><span class="p">,</span> <span class="s">"verbatim"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"2.c"</span><span class="p">,</span> <span class="p">[</span><span class="s">"2.o"</span><span class="p">,</span> <span class="s">"2.o"</span><span class="p">],</span> <span class="s">"Compiling 2"</span><span class="p">,</span> <span class="s">"verbatim"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p class="last">Since r”1” is optional for the output parameter, <tt class="docutils literal"><span class="pre">"\1.o"</span></tt> and <tt class="docutils literal"><span class="pre">".o"</span></tt> are equivalent.
+However, strings in other parameters which do not contain r”1” will be included verbatim, much
+like the string <tt class="docutils literal"><span class="pre">"verbatim"</span></tt> in the above example.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-mkdir-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first last">is a python regular expression string, which must be wrapped in
+a <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicator object
+See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax
+Each output file name is created using regular expression substitution with <tt class="docutils literal"><span class="pre">output_pattern</span></tt></p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-mkdir-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-mkdir-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@mkdir</a><ul>
+<li><a class="reference internal" href="#mkdir-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern"><em>@mkdir</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">suffix</span></tt><em>(</em><cite>suffix_string</cite><em>)</em>| <tt class="docutils literal"><span class="pre">regex</span></tt><em>(</em><cite>matching_regex</cite><em>)</em> | <tt class="docutils literal"><span class="pre">formatt [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="graphviz.html"
+ title="previous chapter">@graphviz</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="jobs_limit.html"
+ title="next chapter">@jobs_limit</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/mkdir.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="#">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="@jobs_limit"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="graphviz.html" title="@graphviz"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/originate.html b/doc/_build/html/decorators/originate.html
new file mode 100644
index 0000000..d6e08fc
--- /dev/null
+++ b/doc/_build/html/decorators/originate.html
@@ -0,0 +1,275 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@originate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@split" href="split.html" />
+ <link rel="prev" title="Indicator Objects" href="indicator_objects.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="split.html" title="@split"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="indicator_objects.html" title="Indicator Objects"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-originate"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="originate">
+<h1>@originate<a class="headerlink" href="#originate" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="originate-output-files-extra-parameters">
+<h2><em>@originate</em> ( <a class="reference internal" href="#decorators-originate-output-files"><cite>output_files</cite></a>, [<a class="reference internal" href="#decorators-originate-extra-parameters"><cite>extra_parameters</cite></a>,...] )<a class="headerlink" href="#originate-output-files-extra-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><ul class="first simple">
+<li>Creates (originates) a set of starting file without dependencies from scratch (<em>ex nihilo</em>!)</li>
+<li>Only called to create files which do not exist.</li>
+<li>Invoked onces (a job created) per item in the <tt class="docutils literal"><span class="pre">output_files</span></tt> list.</li>
+</ul>
+<div class="last admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The first argument for the task function is the <em>Output</em>. There is by definition no
+<em>Input</em> for <tt class="docutils literal"><span class="pre">@originate</span></tt></p>
+</div>
+</dd>
+</dl>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"a"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"c"</span><span class="p">,</span> <span class="s">"d"</span><span class="p">],</span> <span class="s">"extra"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">test</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="n">extra</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [None -> a, extra] completed</span>
+<span class="go"> Job = [None -> b, extra] completed</span>
+<span class="go"> Job = [None -> c, extra] completed</span>
+<span class="go"> Job = [None -> d, extra] completed</span>
+<span class="go">Completed Task = test</span>
+
+<span class="hll"><span class="gp">>>> </span><span class="c"># all files exist: nothing to do</span>
+</span><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+
+<span class="hll"><span class="gp">>>> </span><span class="c"># delete 'a' so that it is missing</span>
+</span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">os</span>
+<span class="gp">>>> </span><span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="s">"a"</span><span class="p">)</span>
+
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [None -> a, extra] completed</span>
+<span class="go">Completed Task = test</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-originate-output-files">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_files</em></dt>
+<dd><ul class="first last simple">
+<li>Can be a single file name or a list of files</li>
+<li>Each item in the list is treated as the <em>Output</em> of a separate job</li>
+</ul>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-originate-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Any extra parameters are passed verbatim to the task function</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@originate</a><ul>
+<li><a class="reference internal" href="#originate-output-files-extra-parameters"><em>@originate</em> ( <cite>output_files</cite>, [<cite>extra_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="indicator_objects.html"
+ title="previous chapter">Indicator Objects</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="split.html"
+ title="next chapter">@split</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/originate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="#">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="split.html" title="@split"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="indicator_objects.html" title="Indicator Objects"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/parallel.html b/doc/_build/html/decorators/parallel.html
new file mode 100644
index 0000000..162aef2
--- /dev/null
+++ b/doc/_build/html/decorators/parallel.html
@@ -0,0 +1,271 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@parallel — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@files" href="files.html" />
+ <link rel="prev" title="@check_if_uptodate" href="check_if_uptodate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="files.html" title="@files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="@check_if_uptodate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-parallel"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="parallel">
+<h1>@parallel<a class="headerlink" href="#parallel" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="parallel-job-params-job-params-parameter-generating-function">
+<h2><em>@parallel</em> ( [ [<a class="reference internal" href="#decorators-parallel-job-params"><cite>job_params</cite></a>, ...], [<a class="reference internal" href="#decorators-parallel-job-params"><cite>job_params</cite></a>, ...]...] | <a class="reference internal" href="#decorators-parallel-parameter-generating-function"><cite>parameter_generating_function</cite></a>)<a class="headerlink" href="#parallel-job-params-job-params-parameter-generating-function" title="Permalink to [...]
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">To apply the (task) function to a set of parameters in parallel without file dependency checking.</p>
+<p class="last">Most useful allied to <a class="reference internal" href="check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate()</em></a></p>
+</dd>
+</dl>
+<p><strong>Example</strong>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'A'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'B'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'C'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+<span class="nd">@parallel</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_task</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">param1</span><span class="p">,</span> <span class="n">param2</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">" Parallel task </span><span class="si">%s</span><span class="s">: "</span> <span class="o">%</span> <span class="n">name</span><span class="p">)</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s"> + </span><span class="si">%d</span><span class="s"> = </span><span class="si">%d</span><span class="se">\\</span><span class="s">n"</span> <span class="o">%</span> <span class="p">(</span><span class="n">param1</span><span class="p">,</span> <span class="n [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-parallel-job-params">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>job_params</em>:</dt>
+<dd><p class="first">Requires a sequence of parameters, one set for each job.</p>
+<p>Each set of parameters can be one or more items in a sequence which will be passed to
+the decorated task function iteratively (or in parallel)</p>
+<p>For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'A'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'B'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'C'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+<span class="nd">@parallel</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_task</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">param1</span><span class="p">,</span> <span class="n">param2</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Will result in the following function calls:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="n">parallel_task</span><span class="p">(</span><span class="s">'A'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
+<span class="n">parallel_task</span><span class="p">(</span><span class="s">'B'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
+<span class="n">parallel_task</span><span class="p">(</span><span class="s">'C'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-parallel-parameter-generating-function">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>parameter_generating_function</em></dt>
+<dd><ol class="first last arabic simple">
+<li>A generator yielding set of parameters (as above) in turn and on the fly</li>
+<li>A function returning a sequence of parameter sets, as above</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@parallel</a><ul>
+<li><a class="reference internal" href="#parallel-job-params-job-params-parameter-generating-function"><em>@parallel</em> ( [ [<cite>job_params</cite>, ...], [<cite>job_params</cite>, ...]...] | <cite>parameter_generating_function</cite>)</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="check_if_uptodate.html"
+ title="previous chapter">@check_if_uptodate</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="files.html"
+ title="next chapter">@files</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/parallel.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="#">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="files.html" title="@files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="@check_if_uptodate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/permutations.html b/doc/_build/html/decorators/permutations.html
new file mode 100644
index 0000000..299921f
--- /dev/null
+++ b/doc/_build/html/decorators/permutations.html
@@ -0,0 +1,356 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@permutations — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@combinations" href="combinations.html" />
+ <link rel="prev" title="@product" href="product.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="combinations.html" title="@combinations"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="product.html" title="@product"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-permutations"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="permutations">
+<h1>@permutations<a class="headerlink" href="#permutations" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="permutations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters">
+<h2><em>@permutations</em> ( <a class="reference internal" href="#decorators-permutations-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class="reference internal" href="#decorators-permutations-matching-formatter"><cite>matching_formatter</cite></a><em>)</em>, <a class="reference internal" href="#decorators-permutations-output-pattern"><cite>output_pattern< [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Generates the <strong>permutations</strong>, between all the elements of a set of <strong>Input</strong></p>
+<p>The effect is analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.permutations">itertools</a>
+function of the same name:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">permutations</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">permutations</span><span class="p">(</span><span class="s">"ABCD"</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
+<span class="go">['AB', 'AC', 'AD', 'BA', 'BC', 'BD', 'CA', 'CB', 'CD', 'DA', 'DB', 'DC']</span>
+</pre></div>
+</div>
+<p>Only out of date tasks (comparing input and output files) will be run</p>
+<p>Output file names and strings in the extra parameters
+are determined from <a class="reference internal" href="#decorators-permutations-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+<a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a>.</p>
+<p>The replacement strings require an extra level of indirection to refer to
+parsed components:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first level refers to which <em>set</em> in each tuple of inputs.</li>
+<li>The second level refers to which input file in any particular <em>set</em> of inputs.</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><p>Calculates the <strong>@permutations</strong> of <strong>A,B,C,D</strong> files</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @permutations</span>
+</span><span class="nd">@permutations</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 2 at a time</span>
+</span> <span class="mi">2</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}.permutations"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">permutations_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+<span class="go">A - B</span>
+<span class="go">A - C</span>
+<span class="go">A - D</span>
+<span class="go">B - A</span>
+<span class="go">B - C</span>
+<span class="go">B - D</span>
+<span class="go">C - A</span>
+<span class="go">C - B</span>
+<span class="go">C - D</span>
+<span class="go">D - A</span>
+<span class="go">D - B</span>
+<span class="go">D - C</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-permutations-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-permutations-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-permutations-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s) after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-permutations-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Optional extra parameters are passed to the functions after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@permutations</a><ul>
+<li><a class="reference internal" href="#permutations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters"><em>@permutations</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatter</cite><em>)</em>, <cite>output_pattern</cite>, [<cite>extra_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="product.html"
+ title="previous chapter">@product</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="combinations.html"
+ title="next chapter">@combinations</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/permutations.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="#">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="combinations.html" title="@combinations"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="product.html" title="@product"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/posttask.html b/doc/_build/html/decorators/posttask.html
new file mode 100644
index 0000000..c6391d9
--- /dev/null
+++ b/doc/_build/html/decorators/posttask.html
@@ -0,0 +1,262 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@posttask — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@active_if" href="active_if.html" />
+ <link rel="prev" title="@jobs_limit" href="jobs_limit.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="active_if.html" title="@active_if"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="@jobs_limit"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-posttask"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="posttask">
+<h1>@posttask<a class="headerlink" href="#posttask" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="posttask-function-touch-file-file-name">
+<h2><em>@posttask</em> (<a class="reference internal" href="#decorators-posttask-function"><cite>function</cite></a> | <a class="reference internal" href="indicator_objects.html#decorators-touch-file"><em>touch_file</em></a><em>(</em><a class="reference internal" href="#decorators-posttask-file-name"><cite>file_name</cite></a><em>)</em>)<a class="headerlink" href="#posttask-function-touch-file-file-name" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd>Calls functions to signal the completion of each task</dd>
+</dl>
+<p><strong>Example</strong>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="k">def</span> <span class="nf">task_finished</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"hooray"</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">task_finished</span><span class="p">)</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-posttask-function">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>function</em>:</dt>
+<dd><p class="first"><tt class="docutils literal"><span class="pre">function()</span></tt> will be called when the ruffus passes through a task.</p>
+<p class="last">This may happen even if all of the jobs are up-to-date:
+when a upstream task is out-of-date, and the execution passes through
+this point in the pipeline</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-posttask-file-name">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>file_name</em></dt>
+<dd><p class="first">Files to be <tt class="docutils literal"><span class="pre">touch</span></tt>-ed after the task is executed.</p>
+<p>This will change the date/time stamp of the <tt class="docutils literal"><span class="pre">file_name</span></tt> to the current date/time.
+If the file does not exist, an empty file will be created.</p>
+<p>Requires to be wrapped in a <a class="reference internal" href="indicator_objects.html#decorators-touch-file"><em>touch_file</em></a> indicator object:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">touch_file</span><span class="p">(</span><span class="s">"task_completed.flag"</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@posttask</a><ul>
+<li><a class="reference internal" href="#posttask-function-touch-file-file-name"><em>@posttask</em> (<cite>function</cite> | <tt class="docutils literal"><span class="pre">touch_file</span></tt><em>(</em><cite>file_name</cite><em>)</em>)</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="jobs_limit.html"
+ title="previous chapter">@jobs_limit</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="active_if.html"
+ title="next chapter">@active_if</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/posttask.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="#">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="active_if.html" title="@active_if"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="@jobs_limit"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/product.html b/doc/_build/html/decorators/product.html
new file mode 100644
index 0000000..098d430
--- /dev/null
+++ b/doc/_build/html/decorators/product.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@product — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@permutations" href="permutations.html" />
+ <link rel="prev" title="@follows" href="follows.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="permutations.html" title="@permutations"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="follows.html" title="@follows"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-product"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="product">
+<h1>@product<a class="headerlink" href="#product" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="product-tasks-or-file-names-formatter-matching-formatter-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters">
+<h2><em>@product</em> ( <a class="reference internal" href="#decorators-product-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class="reference internal" href="#decorators-product-matching-formatter"><cite>matching_formatter</cite></a><em>)</em>, [<a class="reference internal" href="#decorators-product-tasks-or-file-names"><cite>tasks_or_file_names</cite></a [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Generates the Cartesian <strong>product</strong>, i.e. all vs all comparisons, between sets of input files.</p>
+<p>The effect is analogous to the python <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.product">itertools</a>
+function of the same name, i.e. a nested for loop.</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">product</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># product('ABC', 'XYZ') --> AX AY AZ BX BY BZ CX CY CZ</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">product</span><span class="p">(</span><span class="s">'ABC'</span><span class="p">,</span> <span class="s">'XYZ'</span><span class="p">)]</span>
+<span class="go">['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']</span>
+</pre></div>
+</div>
+<p>Only out of date tasks (comparing input and output files) will be run</p>
+<p>Output file names and strings in the extra parameters
+are determined from <a class="reference internal" href="#decorators-product-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+<a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a>.</p>
+<p>The replacement strings require an extra level of indirection to refer to
+parsed components:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first level refers to which <em>set</em> of inputs (e.g. <strong>A,B</strong> or <strong>P,Q</strong> or <strong>X,Y</strong>
+in the following example.)</li>
+<li>The second level refers to which input file in any particular <em>set</em> of inputs.</li>
+</ol>
+</div></blockquote>
+<dl class="docutils">
+<dt>For example, <tt class="docutils literal"><span class="pre">'{basename[2][0]}'</span></tt> is the <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.basename">basename</a> for</dt>
+<dd><ul class="first last simple">
+<li>the third set of inputs (<strong>X,Y</strong>) and</li>
+<li>the first file name string in each <strong>Input</strong> of that set (<tt class="docutils literal"><span class="pre">"x.1_start"</span></tt> and <tt class="docutils literal"><span class="pre">"y.1_start"</span></tt>)</li>
+</ul>
+</dd>
+</dl>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><p>Calculates the <strong>@product</strong> of <strong>A,B</strong> and <strong>P,Q</strong> and <strong>X, Y</strong> files</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"><span class="c"># Three sets of initial files</span>
+</span><span class="nd">@originate</span><span class="p">([</span> <span class="s">'a.start'</span><span class="p">,</span> <span class="s">'b.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ab</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="nd">@originate</span><span class="p">([</span> <span class="s">'p.start'</span><span class="p">,</span> <span class="s">'q.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_pq</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'x.1_start'</span><span class="p">,</span> <span class="s">'x.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'y.1_start'</span><span class="p">,</span> <span class="s">'y.2_start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_xy</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+<span class="hll">
+</span><span class="c"># @product</span>
+<span class="hll"><span class="nd">@product</span><span class="p">(</span> <span class="n">create_initial_files_ab</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 1</span>
+
+<span class="hll"> <span class="n">create_initial_files_pq</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 2</span>
+
+<span class="hll"> <span class="n">create_initial_files_xy</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 3</span>
+<span class="hll">
+</span><span class="hll"> <span class="s">"{path[0][0]}/"</span> <span class="c"># Output Replacement string</span>
+</span><span class="hll"> <span class="s">"{basename[0][0]}_vs_"</span> <span class="c">#</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}_vs_"</span> <span class="c">#</span>
+</span> <span class="s">"{basename[2][0]}.product"</span><span class="p">,</span> <span class="c">#</span>
+<span class="hll">
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+<span class="hll">
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># Extra parameter: basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+</span> <span class="s">"{basename[2][0]}"</span><span class="p">,</span> <span class="c"># 3rd</span>
+ <span class="p">])</span>
+<span class="k">def</span> <span class="nf">product_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"# basenames = "</span><span class="p">,</span> <span class="s">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">"input_parameter = "</span><span class="p">,</span> <span class="n">input_file</span>
+ <span class="k">print</span> <span class="s">"output_parameter = "</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="hll">
+</span><span class="go"># basenames = a p x</span>
+<span class="go">input_parameter = ('a.start', 'p.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_p_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a p y</span>
+<span class="go">input_parameter = ('a.start', 'p.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_p_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a q x</span>
+<span class="go">input_parameter = ('a.start', 'q.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_q_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a q y</span>
+<span class="go">input_parameter = ('a.start', 'q.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_q_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b p x</span>
+<span class="go">input_parameter = ('b.start', 'p.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_p_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b p y</span>
+<span class="go">input_parameter = ('b.start', 'p.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_p_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b q x</span>
+<span class="go">input_parameter = ('b.start', 'q.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_q_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b q y</span>
+<span class="go">input_parameter = ('b.start', 'q.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_q_vs_y.product</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-product-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-product-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-product-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s) after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-product-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Optional extra parameters are passed to the functions after string
+substitution</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@product</a><ul>
+<li><a class="reference internal" href="#product-tasks-or-file-names-formatter-matching-formatter-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters"><em>@product</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatter</cite><em>)</em>, [<cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_forma [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="follows.html"
+ title="previous chapter">@follows</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="permutations.html"
+ title="next chapter">@permutations</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/product.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="#">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="permutations.html" title="@permutations"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="follows.html" title="@follows"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/split.html b/doc/_build/html/decorators/split.html
new file mode 100644
index 0000000..f2928ea
--- /dev/null
+++ b/doc/_build/html/decorators/split.html
@@ -0,0 +1,295 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@split — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@transform" href="transform.html" />
+ <link rel="prev" title="@originate" href="originate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform.html" title="@transform"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="originate.html" title="@originate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-split"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="split">
+<h1>@split<a class="headerlink" href="#split" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="split-tasks-or-file-names-output-files-extra-parameters">
+<h2><em>@split</em> ( <a class="reference internal" href="#decorators-split-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="#decorators-split-output-files"><cite>output_files</cite></a>, [<a class="reference internal" href="#decorators-split-extra-parameters"><cite>extra_parameters</cite></a>,...] )<a class="headerlink" href="#split-tasks-or-file-names-output-files-extra-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><div class="first last line-block">
+<div class="line">Splits a single set of input files into multiple output file names, where the number of
+output files may not be known beforehand.</div>
+<div class="line">Only out of date tasks (comparing input and output files) will be run</div>
+</div>
+</dd>
+</dl>
+<p><strong>Example</strong>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"big_file"</span><span class="p">,</span> <span class="s">'*.little_files'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_big_to_small</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"input_file = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">input_file</span>
+ <span class="k">print</span> <span class="s">"output_file = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">output_file</span>
+</pre></div>
+</div>
+<p>.</p>
+<blockquote>
+<div><p>will produce:</p>
+<div class="highlight-python"><pre>input_file = big_file
+output_file = *.little_files</pre>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-split-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><p class="first">(Nested) list of file name strings (as in the example above).</p>
+<blockquote>
+<div><div class="line-block">
+<div class="line">File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</div>
+<div class="line">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></div>
+</div>
+</div></blockquote>
+</li>
+<li><p class="first">Task / list of tasks.</p>
+<blockquote>
+<div><p>File names are taken from the output of the specified task(s)</p>
+</div></blockquote>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-split-output-files">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_files</em></dt>
+<dd><p class="first">Specifies the resulting output file name(s).</p>
+<div class="line-block">
+<div class="line">These are used <strong>only</strong> to check if the task is up to date.</div>
+<div class="line">Normally you would use either a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> (e.g. <tt class="docutils literal"><span class="pre">*.little_files</span></tt> as above) or a “sentinel file”
+to indicate that the task has completed successfully.</div>
+<div class="line">You can of course do both:</div>
+</div>
+<blockquote class="last">
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"big_file"</span><span class="p">,</span> <span class="p">[</span><span class="s">"sentinel.file"</span><span class="p">,</span> <span class="s">"*.little_files"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">split_big_to_small</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-split-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt>[<em>extra_parameters, ...</em>]</dt>
+<dd><p class="first last">Any extra parameters are passed verbatim to the task function</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="split-with-regex-add-inputs-and-inputs">
+<h1>@split with <tt class="docutils literal"><span class="pre">regex(...)</span></tt>, <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt><a class="headerlink" href="#split-with-regex-add-inputs-and-inputs" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div>This deprecated syntax is a synonym for <a class="reference internal" href="subdivide.html#decorators-subdivide"><em>@subdivide</em></a>.</div></blockquote>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@split</a><ul>
+<li><a class="reference internal" href="#split-tasks-or-file-names-output-files-extra-parameters"><em>@split</em> ( <cite>tasks_or_file_names</cite>, <cite>output_files</cite>, [<cite>extra_parameters</cite>,...] )</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#split-with-regex-add-inputs-and-inputs">@split with <tt class="docutils literal"><span class="pre">regex(...)</span></tt>, <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="originate.html"
+ title="previous chapter">@originate</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform.html"
+ title="next chapter">@transform</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/split.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="#">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform.html" title="@transform"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="originate.html" title="@originate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/subdivide.html b/doc/_build/html/decorators/subdivide.html
new file mode 100644
index 0000000..e630991
--- /dev/null
+++ b/doc/_build/html/decorators/subdivide.html
@@ -0,0 +1,402 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@subdivide — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@transform with add_inputs and inputs" href="transform_ex.html" />
+ <link rel="prev" title="@merge" href="merge.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform_ex.html" title="@transform with add_inputs and inputs"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="merge.html" title="@merge"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-subdivide"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="subdivide">
+<h1>@subdivide<a class="headerlink" href="#subdivide" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="subdivide-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters">
+<h2><em>@subdivide</em> ( <a class="reference internal" href="#decorators-subdivide-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a><em>(</em><a class="reference internal" href="#decorators-subdivide-matching-regex"><cite>matching_regex</cite></a><em>)</em> | <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a><em>(</em><a class [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><ul>
+<li><p class="first">Subdivides a set of <em>Inputs</em> each further into multiple <em>Outputs</em>.</p>
+</li>
+<li><p class="first"><strong>Many to Even More</strong> operator</p>
+</li>
+<li><p class="first">The number of files in each <em>Output</em> can be set at runtime by the use of globs</p>
+</li>
+<li><p class="first">Output file names are specified using the <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicators from <a class="reference internal" href="#decorators-subdivide-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of specified tasks, or a list of file names, or a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> matching pattern.</p>
+</li>
+<li><dl class="first docutils">
+<dt>Additional inputs or dependencies can be added dynamically to the task:</dt>
+<dd><p class="first"><a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>add_inputs</em></a> nests the the original input parameters in a list before adding additional dependencies.</p>
+<p class="last"><a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> replaces the original input parameters wholescale.</p>
+</dd>
+</dl>
+</li>
+<li><p class="first">Only out of date tasks (comparing input and output files) will be run.</p>
+</li>
+</ul>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The use of <strong>split</strong> is a synonym for subdivide is deprecated.</p>
+</div>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">randint</span>
+<span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">os</span>
+
+<span class="nd">@originate</span><span class="p">([</span><span class="s">'0.start'</span><span class="p">,</span> <span class="s">'1.start'</span><span class="p">,</span> <span class="s">'2.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="hll"><span class="c"># Subdivide each of 3 start files further into [NNN1, NNN2, NNN3] number of files</span>
+</span><span class="hll"><span class="c"># where NNN1, NNN2, NNN3 are determined at run time</span>
+</span><span class="c">#</span>
+<span class="nd">@subdivide</span><span class="p">(</span><span class="n">create_files</span><span class="p">,</span> <span class="n">formatter</span><span class="p">(),</span>
+ <span class="s">"{path[0]}/{basename[0]}.*.step1"</span><span class="p">,</span> <span class="c"># Output parameter: Glob matches any number of output file names</span>
+ <span class="s">"{path[0]}/{basename[0]}"</span><span class="p">)</span> <span class="c"># Extra parameter: Append to this for output file names</span>
+<span class="k">def</span> <span class="nf">subdivide_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">,</span> <span class="n">output_file_name_root</span><span class="p">):</span>
+ <span class="c">#</span>
+<span class="hll"> <span class="c"># IMPORTANT: cleanup rubbish from previous run first</span>
+</span> <span class="c">#</span>
+ <span class="k">for</span> <span class="n">oo</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">oo</span><span class="p">)</span>
+ <span class="c"># The number of output files is decided at run time</span>
+ <span class="n">number_of_output_files</span> <span class="o">=</span> <span class="n">randint</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="mi">4</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">ii</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">number_of_output_files</span><span class="p">):</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="s">"{output_file_name_root}.{ii}.step1"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="c"># Each output of subdivide_files results in a separate job for downstream tasks</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">subdivide_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".step1"</span><span class="p">),</span> <span class="s">".step2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">analyse_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [None -> 0.start] completed</span>
+<span class="go"> Job = [None -> 1.start] completed</span>
+<span class="go"> Job = [None -> 2.start] completed</span>
+<span class="go">Completed Task = create_files</span>
+<span class="go"> Job = [0.start -> 0.*.step1, 0] completed</span>
+<span class="go"> Job = [1.start -> 1.*.step1, 1] completed</span>
+<span class="go"> Job = [2.start -> 2.*.step1, 2] completed</span>
+<span class="go">Completed Task = subdivide_files</span>
+<span class="go"> Job = [0.0.step1 -> 0.0.step2] completed</span>
+<span class="go"> Job = [0.1.step1 -> 0.1.step2] completed</span>
+<span class="go"> Job = [0.2.step1 -> 0.2.step2] completed</span>
+<span class="go"> Job = [1.0.step1 -> 1.0.step2] completed</span>
+<span class="go"> Job = [1.1.step1 -> 1.1.step2] completed</span>
+<span class="go"> Job = [1.2.step1 -> 1.2.step2] completed</span>
+<span class="go"> Job = [1.3.step1 -> 1.3.step2] completed</span>
+<span class="go"> Job = [2.0.step1 -> 2.0.step2] completed</span>
+<span class="go"> Job = [2.1.step1 -> 2.1.step2] completed</span>
+<span class="go"> Job = [2.2.step1 -> 2.2.step2] completed</span>
+<span class="go"> Job = [2.3.step1 -> 2.3.step2] completed</span>
+<span class="go">Completed Task = analyse_files</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-subdivide-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-subdivide-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first last">is a python regular expression string, which must be wrapped in
+a <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicator object
+See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-subdivide-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-subdivide-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s). Can include glob patterns.
+Strings are subject to <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a>
+substitution.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-subdivide-input-pattern-or-glob">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>input_pattern</em></dt>
+<dd><p class="first">Specifies the resulting input(s) to each job.
+Must be wrapped in an <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> or an <a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>inputs</em></a> indicator object.</p>
+<p>Can be a:</p>
+<ol class="arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><p class="first">(Nested) list of file name strings.</p>
+</li>
+</ol>
+<p class="last">Strings are subject to <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> substitution.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-subdivide-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>extra_parameters</em></dt>
+<dd><p class="first last">Any extra parameters are consumed by the task function and not forwarded further down the pipeline.
+Strings are subject to <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a>
+substitution.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@subdivide</a><ul>
+<li><a class="reference internal" href="#subdivide-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters"><em>@subdivide</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">regex</span></tt><em>(</em><cite>matching_regex</cite><em>)</em> | <tt class="docutils literal"><span class="pre">formatter</span></tt><em>(</em><cite>matching_formatte [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="merge.html"
+ title="previous chapter">@merge</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform_ex.html"
+ title="next chapter">@transform with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/subdivide.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="#">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform_ex.html" title="@transform with add_inputs and inputs"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="merge.html" title="@merge"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/transform.html b/doc/_build/html/decorators/transform.html
new file mode 100644
index 0000000..dfb2d40
--- /dev/null
+++ b/doc/_build/html/decorators/transform.html
@@ -0,0 +1,381 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@transform — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@merge" href="merge.html" />
+ <link rel="prev" title="@split" href="split.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="merge.html" title="@merge"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="split.html" title="@split"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-transform"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="transform">
+<h1>@transform<a class="headerlink" href="#transform" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters">
+<h2><em>@transform</em> ( <a class="reference internal" href="#decorators-transform-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a><em>(</em><a class="reference internal" href="#decorators-transform-suffix-string"><cite>suffix_string</cite></a><em>)</em>| <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a><em>(</em><a class="referenc [...]
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">Applies the task function to transform data from input to output files.</p>
+<p>Output file names are specified from <a class="reference internal" href="#decorators-transform-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of specified tasks, or a list of file names, or a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> matching pattern.</p>
+<p>String replacement occurs either through suffix matches via <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a> or
+the <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicators.</p>
+<p class="last">Only out of date tasks (comparing input and output files) will be run</p>
+</dd>
+</dl>
+<p><strong>Simple Example</strong></p>
+<blockquote>
+<div><p>Transforms <tt class="docutils literal"><span class="pre">*.c</span></tt> to <tt class="docutils literal"><span class="pre">*.o</span></tt>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Same example with a regular expression:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">regex</span><span class="p">(</span><span class="s">r".c$"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Both result in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># 1.c -> 1.o</span>
+<span class="c"># 2.c -> 2.o</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"1.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"2.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p><strong>Escaping regular expression patterns</strong></p>
+<blockquote>
+<div>A string like <tt class="docutils literal"><span class="pre">universal.h</span></tt> in <tt class="docutils literal"><span class="pre">add_inputs</span></tt> will added <em>as is</em>.
+<tt class="docutils literal"><span class="pre">r"\1.h"</span></tt>, however, performs suffix substitution, with the special form <tt class="docutils literal"><span class="pre">r"\1"</span></tt> matching everything up to the suffix.
+Remember to ‘escape’ <tt class="docutils literal"><span class="pre">r"\1"</span></tt> otherwise Ruffus will complain and throw an Exception to remind you.
+The most convenient way is to use a python “raw” string.</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-transform-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-suffix-string">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>suffix_string</em></dt>
+<dd><p class="first">must be wrapped in a <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a> indicator object.
+The end of each input file name which matches <tt class="docutils literal"><span class="pre">suffix_string</span></tt> will be replaced by <tt class="docutils literal"><span class="pre">output_pattern</span></tt>.</p>
+<p>Input file names which do not match suffix_string will be ignored</p>
+<p>The non-suffix part of the match can be referred to using the <tt class="docutils literal"><span class="pre">"\1"</span></tt> pattern. This
+can be useful for putting the output in different directory, for example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="s">r"my_path/\1.o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This results in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># 1.c -> my_path/1.o</span>
+<span class="c"># 2.c -> my_path/2.o</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"my_path/1.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"my_path/2.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>For convenience and visual clarity, the <tt class="docutils literal"><span class="pre">"\1"</span></tt> can be omitted from the output parameter.
+However, the <tt class="docutils literal"><span class="pre">"\1"</span></tt> is mandatory for string substitutions in additional parameters,</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="p">[</span><span class="s">r"\1.o"</span><span class="p">,</span> <span class="s">".o"</span><span [...]
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Results in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">(</span><span class="s">"1.c"</span><span class="p">,</span> <span class="p">[</span><span class="s">"1.o"</span><span class="p">,</span> <span class="s">"1.o"</span><span class="p">],</span> <span class="s">"Compiling 1"</span><span class="p">,</span> <span class="s">"verbatim"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"2.c"</span><span class="p">,</span> <span class="p">[</span><span class="s">"2.o"</span><span class="p">,</span> <span class="s">"2.o"</span><span class="p">],</span> <span class="s">"Compiling 2"</span><span class="p">,</span> <span class="s">"verbatim"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p class="last">Since r”1” is optional for the output parameter, <tt class="docutils literal"><span class="pre">"\1.o"</span></tt> and <tt class="docutils literal"><span class="pre">".o"</span></tt> are equivalent.
+However, strings in other parameters which do not contain r”1” will be included verbatim, much
+like the string <tt class="docutils literal"><span class="pre">"verbatim"</span></tt> in the above example.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first last">is a python regular expression string, which must be wrapped in
+a <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicator object
+See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax
+Each output file name is created using regular expression substitution with <tt class="docutils literal"><span class="pre">output_pattern</span></tt></p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt>[<em>extra_parameters, ...</em>]</dt>
+<dd><p class="first">Any extra parameters are passed to the task function.</p>
+<p>If <tt class="docutils literal"><span class="pre">regex(matching_regex)</span></tt> or <tt class="docutils literal"><span class="pre">formatter(...)`</span></tt> is used, then substitution
+is first applied to (even nested) string parameters. Other data types are passed
+verbatim.</p>
+<p>For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"a.c"</span><span class="p">,</span> <span class="s">"b.c"</span><span class="p">],</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*).c"</span><span class="p">),</span> <span class="s">r"\1.o"</span><span class="p">,</span> <span class="s">r"\1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>will result in the following function calls:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">(</span><span class="s">"a.c"</span><span class="p">,</span> <span class="s">"a.o"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"b.c"</span><span class="p">,</span> <span class="s">"b.o"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<p>See <a class="reference internal" href="transform_ex.html#decorators-transform-ex"><em>here</em></a> for more advanced uses of transform.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@transform</a><ul>
+<li><a class="reference internal" href="#transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters"><em>@transform</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">suffix</span></tt><em>(</em><cite>suffix_string</cite><em>)</em>| <tt class="docutils literal"><span class="pre">regex</span></tt><em>(</em><cite>matching_regex</cite><em>)</em> | <tt class="docutils literal"> [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="split.html"
+ title="previous chapter">@split</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="merge.html"
+ title="next chapter">@merge</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/transform.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="#">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="merge.html" title="@merge"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="split.html" title="@split"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/decorators/transform_ex.html b/doc/_build/html/decorators/transform_ex.html
new file mode 100644
index 0000000..bc749eb
--- /dev/null
+++ b/doc/_build/html/decorators/transform_ex.html
@@ -0,0 +1,406 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>@transform with add_inputs and inputs — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="@collate" href="collate.html" />
+ <link rel="prev" title="@subdivide" href="subdivide.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="collate.html" title="@collate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="subdivide.html" title="@subdivide"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="decorators-transform-ex"></span><div class="admonition seealso" id="index-0">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="decorators.html#decorators"><em>Decorators</em></a> for more decorators</li>
+</ul>
+</div>
+<div class="section" id="transform-with-add-inputs-and-inputs">
+<h1>@transform with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt><a class="headerlink" href="#transform-with-add-inputs-and-inputs" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-inputs-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters">
+<h2><em>@transform</em> ( <a class="reference internal" href="#decorators-transform-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a><em>(</em><a class="reference internal" href="#decorators-transform-suffix-string"><cite>suffix_string</cite></a><em>)</em>| <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a><em>(</em><a class="referenc [...]
+<blockquote>
+<div><dl class="docutils">
+<dt><strong>Purpose:</strong></dt>
+<dd><p class="first">This variant of <tt class="docutils literal"><span class="pre">@transform</span></tt> allows additional inputs or dependencies to be added
+dynamically to the task.</p>
+<p>Output file names and strings in the extra parameters
+are determined from <a class="reference internal" href="#decorators-transform-tasks-or-file-names"><cite>tasks_or_file_names</cite></a>, i.e. from the output
+of up stream tasks, or a list of file names.</p>
+<p>This variant of <tt class="docutils literal"><span class="pre">@transform</span></tt> allows input file names to be derived in the same way.</p>
+<p>String replacement occurs either through suffix matches via <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a> or
+the <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> or <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicators.</p>
+<p><tt class="docutils literal"><span class="pre">@collate</span></tt> groups together all <strong>Input</strong> which result in identical <strong>Output</strong> and <strong>extra</strong>
+parameters.</p>
+<p>It is a <strong>many to fewer</strong> operation.</p>
+<p><a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>add_inputs</em></a> nests the the original input parameters in a list before adding additional dependencies.</p>
+<p><a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> replaces the original input parameters wholescale.</p>
+<p class="last">Only out of date tasks (comparing input and output files) will be run</p>
+</dd>
+</dl>
+<p><strong>Example of</strong> <a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>add_inputs</em></a></p>
+<blockquote>
+<div><p>A common task in compiling C code is to include the corresponding header file for the source.</p>
+<dl class="docutils">
+<dt>To compile <tt class="docutils literal"><span class="pre">*.c</span></tt> to <tt class="docutils literal"><span class="pre">*.o</span></tt>, adding <tt class="docutils literal"><span class="pre">*.h</span></tt> and the common header <tt class="docutils literal"><span class="pre">universal.h</span></tt>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"2.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">([</span><span class="s">r"\1.h"</span><span class="p">,</ [...]
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+<dt>This will result in the following functional calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"1.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"1.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">([</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"2.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"2.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<p><strong>Example of</strong> <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a></p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">inputs(...)</span></tt> allows the original input parameters to be replaced wholescale.</p>
+<dl class="docutils">
+<dt>This can be seen in the following example:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span> <span class="p">[</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"A.c"</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
+ <span class="p">[</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"B.c"</span><span class="p">,</span> <span class="s">"C.c"</span><span class="p">,</span> <span class="mi">3</span><span class="p">]],</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="n">inputs</span><span class="p">([</span><span class="s">r"\1.py"</span><span class="p">,</span> <span class="s">"docs.rst"</span><span class="p">]),</span> <span class="s">".pyc"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</dd>
+<dt>This will result in the following functional calls:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([</span><span class="s">"1.py"</span><span class="p">,</span> <span class="s">"docs.rst"</span><span class="p">],</span> <span class="s">"1.pyc"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">([</span><span class="s">"2.py"</span><span class="p">,</span> <span class="s">"docs.rst"</span><span class="p">],</span> <span class="s">"2.pyc"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="decorators-transform-tasks-or-file-names">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>tasks_or_file_names</em></dt>
+<dd><p class="first">can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><dl class="first last docutils">
+<dt>File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.</dt>
+<dd><p class="first last">E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-suffix-string">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>suffix_string</em></dt>
+<dd><p class="first">must be wrapped in a <a class="reference internal" href="indicator_objects.html#decorators-suffix"><em>suffix</em></a> indicator object.
+The end of each file name which matches suffix_string will be replaced by <cite>output_pattern</cite>.
+Thus:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"a.c"</span><span class="p">,</span> <span class="s">"b.c"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>will result in the following function calls:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">(</span><span class="s">"a.c"</span><span class="p">,</span> <span class="s">"a.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">(</span><span class="s">"b.c"</span><span class="p">,</span> <span class="s">"b.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+<p class="last">File names which do not match suffix_string will be ignored</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-matching-regex">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_regex</em></dt>
+<dd><p class="first last">is a python regular expression string, which must be wrapped in
+a <a class="reference internal" href="indicator_objects.html#decorators-regex"><em>regex</em></a> indicator object
+See python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>
+documentation for details of regular expression syntax
+Each output file name is created using regular expression substitution with <tt class="docutils literal"><span class="pre">output_pattern</span></tt></p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-matching-formatter">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>matching_formatter</em></dt>
+<dd><p class="first last">a <a class="reference internal" href="indicator_objects.html#decorators-formatter"><em>formatter</em></a> indicator object containing optionally
+a python <a class="reference external" href="http://docs.python.org/library/re.html">regular expression (re)</a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-input-pattern-or-glob">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>input_pattern</em></dt>
+<dd><p class="first">Specifies the resulting input(s) to each job.
+Must be wrapped in an <a class="reference internal" href="indicator_objects.html#decorators-inputs"><em>inputs</em></a> or an <a class="reference internal" href="indicator_objects.html#decorators-add-inputs"><em>inputs</em></a> indicator object.</p>
+<p>Can be a:</p>
+<ol class="last arabic">
+<li><dl class="first docutils">
+<dt>Task / list of tasks (as in the example above).</dt>
+<dd><p class="first last">File names are taken from the output of the specified task(s)</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>(Nested) list of file name strings.</dt>
+<dd><p class="first last">Strings will be subject to substitution.
+File names containing <tt class="docutils literal"><span class="pre">*[]?</span></tt> will be expanded as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>.
+E.g.:<tt class="docutils literal"><span class="pre">"a.*"</span> <span class="pre">=></span> <span class="pre">"a.1",</span> <span class="pre">"a.2"</span></tt></p>
+</dd>
+</dl>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-output-pattern">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_pattern</em></dt>
+<dd><p class="first last">Specifies the resulting output file name(s).</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="decorators-transform-extra-parameters">
+<div><ul>
+<li><dl class="first docutils">
+<dt>[<em>extra_parameters, ...</em>]</dt>
+<dd><p class="first">Any extra parameters are passed to the task function.</p>
+<p>If the <tt class="docutils literal"><span class="pre">regex(...)</span></tt> or <tt class="docutils literal"><span class="pre">formatter(...)</span></tt> parameter is used, then substitution
+is first applied to (even nested) string parameters. Other data types are passed
+verbatim.</p>
+<p>For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"a.c"</span><span class="p">,</span> <span class="s">"b.c"</span><span class="p">],</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*).c"</span><span class="p">),</span> <span class="n">inputs</span><span class="p">(</span><span class="s">r"\1.c"</span><span class="p">,</span> <span clas [...]
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">file_name_root</span><span class="p">):</span>
+ <span class="c"># do something here</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>will result in the following function calls:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([</span><span class="s">"1.c"</span><span class="p">,</span> <span class="s">"1.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"1.o"</span><span class="p">,</span> <span class="s">"1"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">([</span><span class="s">"2.c"</span><span class="p">,</span> <span class="s">"2.h"</span><span class="p">,</span> <span class="s">"universal.h"</span><span class="p">],</span> <span class="s">"2.o"</span><span class="p">,</span> <span class="s">"2"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<p>See <a class="reference internal" href="transform.html#decorators-transform"><em>here</em></a> for more straightforward ways to use transform.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">@transform with <tt class="docutils literal"><span class="pre">add_inputs</span></tt> and <tt class="docutils literal"><span class="pre">inputs</span></tt></a><ul>
+<li><a class="reference internal" href="#transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-inputs-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters"><em>@transform</em> ( <cite>tasks_or_file_names</cite>, <tt class="docutils literal"><span class="pre">suffix</span></tt><em>(</em><cite>suffix_string</cite><em>)</em>| <tt class="docutils literal"><span class="pre">regex</span></tt><em>(</em><cite>matching_regex</cite><em [...]
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="subdivide.html"
+ title="previous chapter">@subdivide</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="collate.html"
+ title="next chapter">@collate</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/decorators/transform_ex.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="originate.html">@originate</a> </li>
+ <li><a href="split.html">@split</a> </li>
+ <li><a href="transform.html">@transform</a> </li>
+ <li><a href="merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="subdivide.html">@subdivide</a> </li>
+ <li><a href="#">@transform (add_inputs) </a> </li>
+ <li><a href="collate.html">@collate</a> </li>
+ <li><a href="collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="graphviz.html">@graphviz</a> </li>
+ <li><a href="mkdir.html">@mkdir</a> </li>
+ <li><a href="follows.html">@follows / mkdir</a> </li>
+ <li><a href="posttask.html">@posttask touch_file</a> </li>
+ <li><a href="active_if.html">@active_if</a> </li>
+ <li><a href="jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="product.html">@product </a> </li>
+ <li><a href="permutations.html">@permutations </a> </li>
+ <li><a href="combinations.html">@combinations </a> </li>
+ <li><a href="combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="parallel.html">@parallel</a> </li>
+ <li><a href="check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="collate.html" title="@collate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="subdivide.html" title="@subdivide"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/design.html b/doc/_build/html/design.html
new file mode 100644
index 0000000..3e47d17
--- /dev/null
+++ b/doc/_build/html/design.html
@@ -0,0 +1,503 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Design & Architecture — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Major Features added to Ruffus" href="history.html" />
+ <link rel="prev" title="Installation" href="installation.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="history.html" title="Major Features added to Ruffus"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="installation.html" title="Installation"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="design-architecture">
+<span id="index-0"></span><h1>Design & Architecture<a class="headerlink" href="#design-architecture" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p>The <em>ruffus</em> module has the following design goals:</p>
+<blockquote>
+<div><ul class="simple">
+<li>Simplicity.</li>
+<li>Intuitive</li>
+<li>Lightweight</li>
+<li>Unintrusive</li>
+<li>Flexible/Powerful</li>
+</ul>
+</div></blockquote>
+<p>Computational pipelines, especially in science, are best thought of in terms of data
+flowing through successive, dependent stages (<strong>ruffus</strong> calls these <a class="reference internal" href="glossary.html#term-task"><em class="xref std std-term">task</em></a>s).
+Traditionally, files have been used to
+link pipelined stages together. This means that computational pipelines can be managed
+using traditional software construction (<cite>build</cite>) systems.</p>
+</div></blockquote>
+<div class="section" id="gnu-make">
+<h2><cite>GNU Make</cite><a class="headerlink" href="#gnu-make" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The grand-daddy of these is UNIX <a class="reference external" href="http://en.wikipedia.org/wiki/Make_(software)">make</a>.
+<a class="reference external" href="http://www.gnu.org/software/make/">GNU make</a> is ubiquitous in the linux world for
+installing and compiling software.
+It has been widely used to build computational pipelines because it supports:</p>
+<ul class="simple">
+<li>Stopping and restarting computational processes</li>
+<li>Running multiple, even thousands of jobs in parallel</li>
+</ul>
+</div></blockquote>
+<div class="section" id="deficiencies-of-make-gmake">
+<span id="design-make-syntax-ugly"></span><h3>Deficiencies of <cite>make</cite> / <cite>gmake</cite><a class="headerlink" href="#deficiencies-of-make-gmake" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>However, make and <a class="reference external" href="http://www.gnu.org/software/make/">GNU make</a> use a specialised (domain-specific)
+language, which has is been much criticised because of poor support for modern
+programming languages features, such as variable scope, pattern matching, debugging.
+Make scripts require large amounts of often obscure shell scripting
+and makefiles can quickly become unmaintainable.</div></blockquote>
+</div>
+</div>
+<div class="section" id="scons-rake-and-other-make-alternatives">
+<span id="design-scons-and-rake"></span><h2><cite>Scons</cite>, <cite>Rake</cite> and other <cite>Make</cite> alternatives<a class="headerlink" href="#scons-rake-and-other-make-alternatives" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Many attempts have been made to produce a more modern version of make, with less of its
+historical baggage. These include the Java-based <a class="reference external" href="http://ant.apache.org/">Apache ant</a> which is specified in xml.</p>
+<p>More interesting are a new breed of build systems whose scripts are written in modern programming
+languages, rather than a specially-invented “build” specificiation syntax.
+These include the Python <a class="reference external" href="http://www.scons.org/">scons</a>, Ruby <a class="reference external" href="http://rake.rubyforge.org/">rake</a> and
+its python port <a class="reference external" href="http://packages.python.org/Smithy/">Smithy</a>.</p>
+<p>The great advantages are that computation pipelines do not need to be artificially parcelled out
+between (the often second-class) workflow management code, and the logic which does the real computation
+in the pipeline. It also means that workflow management can use all the standard language and library
+features, for example, to read in directories, match file names using regular expressions and so on.</p>
+<p><strong>Ruffus</strong> is much like scons in that the modern dynamic programming language python is used seamlessly
+throughout its pipeline scripts.</p>
+</div></blockquote>
+<div class="section" id="implicit-dependencies-disadvantages-of-make-scons-rake">
+<span id="design-implicit-dependencies"></span><h3>Implicit dependencies: disadvantages of <cite>make</cite> / <cite>scons</cite> / <cite>rake</cite><a class="headerlink" href="#implicit-dependencies-disadvantages-of-make-scons-rake" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Although Python <a class="reference external" href="http://www.scons.org/">scons</a> and Ruby <a class="reference external" href="http://rake.rubyforge.org/">rake</a>
+are in many ways more powerful and easier to use for building software, they are still an
+imperfect fit to the world of computational pipelines.</p>
+<p>This is a result of the way dependencies are specified, an essential part of their design inherited
+from <a class="reference external" href="http://www.gnu.org/software/make/">GNU make</a>.</p>
+<p>The order of operations in all of these tools is specified in a <em>declarative</em> rather than
+<em>imperative</em> manner. This means that the sequence of steps that a build should take are
+not spelled out explicity and directly. Instead recipes are provided for turning input files
+of each type to another.</p>
+<p>So, for example, knowing that <tt class="docutils literal"><span class="pre">a->b</span></tt>, <tt class="docutils literal"><span class="pre">b->c</span></tt>, <tt class="docutils literal"><span class="pre">c->d</span></tt>, the build
+system can infer how to get from <tt class="docutils literal"><span class="pre">a</span></tt> to <tt class="docutils literal"><span class="pre">d</span></tt> by performing the necessary operations in the correct order.</p>
+<dl class="docutils">
+<dt>This is immensely powerful for three reasons:</dt>
+<dd><ol class="first last arabic">
+<li><p class="first">The plumbing, such as dependency checking, passing output
+from one stage to another, are handled automatically by the build system. (This is the whole point!)</p>
+</li>
+<li><p class="first">The same <em>recipe</em> can be re-used at different points in the build.</p>
+</li>
+<li><div class="first line-block">
+<div class="line">Intermediate files do not need to be retained.</div>
+<div class="line">Given the automatic inference that <tt class="docutils literal"><span class="pre">a->b->c->d</span></tt>,
+we don’t need to keep <tt class="docutils literal"><span class="pre">b</span></tt> and <tt class="docutils literal"><span class="pre">c</span></tt> files around once <tt class="docutils literal"><span class="pre">d</span></tt> has been produced.</div>
+<div class="line"><br /></div>
+</div>
+</li>
+</ol>
+</dd>
+</dl>
+<p>The disadvantage is that because stages are specified only indirectly, in terms of
+file name matches, the flow through a complex build or a pipeline can be difficult to trace, and nigh
+impossible to debug when there are problems.</p>
+</div></blockquote>
+</div>
+<div class="section" id="explicit-dependencies-in-ruffus">
+<span id="design-explicit-dependencies-in-ruffus"></span><h3>Explicit dependencies in <cite>Ruffus</cite><a class="headerlink" href="#explicit-dependencies-in-ruffus" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><strong>Ruffus</strong> takes a different approach. The order of operations is specified explicitly rather than inferred
+indirectly from the input and output types. So, for example, we would explicitly specify three successive and
+linked operations <tt class="docutils literal"><span class="pre">a->b</span></tt>, <tt class="docutils literal"><span class="pre">b->c</span></tt>, <tt class="docutils literal"><span class="pre">c->d</span></tt>. The build system knows that the operations always proceed in
+this order.</p>
+<p>Looking at a <strong>Ruffus</strong> script, it is always clear immediately what is the succession of computational steps
+which will be taken.</p>
+<p><strong>Ruffus</strong> values clarity over syntactic cleverness.</p>
+</div></blockquote>
+</div>
+<div class="section" id="static-dependencies-what-make-scons-rake-can-t-do-easily">
+<span id="design-static-dependencies"></span><h3>Static dependencies: What <cite>make</cite> / <cite>scons</cite> / <cite>rake</cite> can’t do (easily)<a class="headerlink" href="#static-dependencies-what-make-scons-rake-can-t-do-easily" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference external" href="http://www.gnu.org/software/make/">GNU make</a>, <a class="reference external" href="http://www.scons.org/">scons</a> and <a class="reference external" href="http://rake.rubyforge.org/">rake</a>
+work by infer a static dependency (diacyclic) graph between all the files which
+are used by a computational pipeline. These tools locate the target that they are supposed
+to build and work backward through the dependency graph from that target,
+rebuilding anything that is out of date.This is perfect for building software,
+where the list of files data files can be computed <strong>statically</strong> at the beginning of the build.</p>
+<p>This is not ideal matches for scientific computational pipelines because:</p>
+<blockquote>
+<div><ul>
+<li><div class="first line-block">
+<div class="line">Though the <em>stages</em> of a pipeline (i.e. <cite>compile</cite> or <cite>DNA alignment</cite>) are
+invariably well-specified in advance, the number of
+operations (<em>job</em>s) involved at each stage may not be.</div>
+<div class="line"><br /></div>
+</div>
+</li>
+<li><div class="first line-block">
+<div class="line">A common approach is to break up large data sets into manageable chunks which
+can be operated on in parallel in computational clusters or farms
+(See <a class="reference external" href="http://en.wikipedia.org/wiki/Embarrassingly_parallel">embarassingly parallel problems</a>).</div>
+<div class="line">This means that the number of parallel operations or jobs varies with the data (the number of manageable chunks),
+and dependency trees cannot be calculated statically beforehand.</div>
+<div class="line"><br /></div>
+</div>
+</li>
+</ul>
+</div></blockquote>
+<p>Computational pipelines require <strong>dynamic</strong> dependencies which are not calculated up-front, but
+at each stage of the pipeline</p>
+<p>This is a <em>known</em> issue with traditional build systems each of which has partial strategies to work around
+this problem:</p>
+<blockquote>
+<div><ul class="simple">
+<li>gmake always builds the dependencies when first invoked, so dynamic dependencies require (complex!) recursive calls to gmake</li>
+<li><a class="reference external" href="http://objectmix.com/ruby/759716-rake-dependencies-unknown-prior-running-tasks-2.html">Rake dependencies unknown prior to running tasks</a>.</li>
+<li><a class="reference external" href="http://www.scons.org/wiki/DynamicSourceGenerator">Scons: Using a Source Generator to Add Targets Dynamically</a></li>
+</ul>
+</div></blockquote>
+<p><strong>Ruffus</strong> explicitly and straightforwardly handles tasks which produce an indeterminate (i.e. runtime dependent)
+number of output, using its <strong>@split</strong>, <strong>@transform</strong>, <strong>merge</strong> function annotations.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="managing-pipelines-stage-by-stage-using-ruffus">
+<h2>Managing pipelines stage-by-stage using <strong>Ruffus</strong><a class="headerlink" href="#managing-pipelines-stage-by-stage-using-ruffus" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Ruffus</strong> manages pipeline stages directly.</p>
+<blockquote>
+<div><ol class="arabic">
+<li><div class="first line-block">
+<div class="line">The computational operations for each stage of the pipeline are written by you, in
+separate python functions.</div>
+<div class="line">(These correspond to <a class="reference external" href="http://www.gnu.org/software/make/manual/make.html#Pattern-Rules">gmake pattern rules</a>)</div>
+<div class="line"><br /></div>
+</div>
+</li>
+<li><div class="first line-block">
+<div class="line">The dependencies between pipeline stages (python functions) are specified up-front.</div>
+<div class="line">These can be displayed as a flow chart.</div>
+</div>
+<img alt="_images/front_page_flowchart.png" src="_images/front_page_flowchart.png" />
+</li>
+<li><p class="first"><strong>Ruffus</strong> makes sure pipeline stage functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if necessary.</p>
+</li>
+<li><p class="first">Data file timestamps can be used to automatically determine if all or any parts
+of the pipeline are out-of-date and need to be rerun.</p>
+</li>
+<li><p class="first">Separate pipeline stages, and operations within each pipeline stage,
+can be run in parallel provided they are not inter-dependent.</p>
+</li>
+</ol>
+</div></blockquote>
+<p>Another way of looking at this is that <strong>ruffus</strong> re-constructs datafile dependencies dynamically
+on-the-fly when it gets to each stage of the pipeline, giving much more flexibility.</p>
+</div></blockquote>
+<div class="section" id="disadvantages-of-the-ruffus-design">
+<h3>Disadvantages of the Ruffus design<a class="headerlink" href="#disadvantages-of-the-ruffus-design" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Are there any disadvantages to this trade-off for additional clarity?</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Each pipeline stage needs to take the right input and output. For example if we specified the
+steps in the wrong order: <tt class="docutils literal"><span class="pre">a->b</span></tt>, <tt class="docutils literal"><span class="pre">c->d</span></tt>, <tt class="docutils literal"><span class="pre">b->c</span></tt>, then no useful output would be produced.</li>
+<li>We cannot re-use the same recipes in different parts of the pipeline</li>
+<li>Intermediate files need to be retained.</li>
+</ol>
+</div></blockquote>
+<p>In our experience, it is always obvious when pipeline operations are in the wrong order, precisely because the
+order of computation is the very essense of the design of each pipeline. Ruffus produces extra diagnostics when
+no output is created in a pipeline stage (usually happens for incorrectly specified regular expressions.)</p>
+<p>Re-use of recipes is as simple as an extra call to common function code.</p>
+<p>Finally, some users have proposed future enhancements to <strong>Ruffus</strong> to handle unnecessary temporary / intermediate files.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="alternatives-to-ruffus">
+<span id="index-1"></span><h2>Alternatives to <strong>Ruffus</strong><a class="headerlink" href="#alternatives-to-ruffus" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A comparison of more make-like tools is available from <a class="reference external" href="http://biowiki.org/MakeComparison">Ian Holmes’ group</a>.</p>
+<p>Build systems include:</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference external" href="http://www.gnu.org/software/make/">GNU make</a></li>
+<li><a class="reference external" href="http://www.scons.org/">scons</a></li>
+<li><a class="reference external" href="http://ant.apache.org/">ant</a></li>
+<li><a class="reference external" href="http://rake.rubyforge.org/">rake</a></li>
+</ul>
+</div></blockquote>
+<p>There are also complete workload managements systems such as Condor.
+Various bioinformatics pipelines are also available, including that used by the
+leading genome annotation website Ensembl, Pegasys, GPIPE, Taverna, Wildfire, MOWserv,
+Triana, Cyrille2 etc. These all are either hardwired to specific databases, and tasks,
+or have steep learning curves for both the scientist/developer and the IT system
+administrators.</p>
+<p><strong>Ruffus</strong> is designed to be lightweight and unintrusive enough to use for writing pipelines
+with just 10 lines of code.</p>
+</div></blockquote>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<p><strong>Bioinformatics workload managements systems</strong></p>
+<blockquote class="last">
+<div><dl class="docutils">
+<dt>Condor:</dt>
+<dd><a class="reference external" href="http://www.cs.wisc.edu/condor/description.html">http://www.cs.wisc.edu/condor/description.html</a></dd>
+<dt>Ensembl Analysis pipeline:</dt>
+<dd><a class="reference external" href="http://www.ncbi.nlm.nih.gov/pubmed/15123589">http://www.ncbi.nlm.nih.gov/pubmed/15123589</a></dd>
+<dt>Pegasys:</dt>
+<dd><a class="reference external" href="http://www.ncbi.nlm.nih.gov/pubmed/15096276">http://www.ncbi.nlm.nih.gov/pubmed/15096276</a></dd>
+<dt>GPIPE:</dt>
+<dd><a class="reference external" href="http://www.biomedcentral.com/pubmed/15096276">http://www.biomedcentral.com/pubmed/15096276</a></dd>
+<dt>Taverna:</dt>
+<dd><a class="reference external" href="http://www.ncbi.nlm.nih.gov/pubmed/15201187">http://www.ncbi.nlm.nih.gov/pubmed/15201187</a></dd>
+<dt>Wildfire:</dt>
+<dd><a class="reference external" href="http://www.biomedcentral.com/pubmed/15788106">http://www.biomedcentral.com/pubmed/15788106</a></dd>
+<dt>MOWserv:</dt>
+<dd><a class="reference external" href="http://www.biomedcentral.com/pubmed/16257987">http://www.biomedcentral.com/pubmed/16257987</a></dd>
+<dt>Triana:</dt>
+<dd><a class="reference external" href="http://dx.doi.org/10.1007/s10723-005-9007-3">http://dx.doi.org/10.1007/s10723-005-9007-3</a></dd>
+<dt>Cyrille2:</dt>
+<dd><a class="reference external" href="http://www.biomedcentral.com/1471-2105/9/96">http://www.biomedcentral.com/1471-2105/9/96</a></dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="acknowledgements">
+<span id="index-2"></span><h3>Acknowledgements<a class="headerlink" href="#acknowledgements" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul>
+<li><p class="first">Bruce Eckel’s insightful article on
+<a class="reference external" href="http://www.artima.com/weblogs/viewpost.jsp?thread=241209">A Decorator Based Build System</a>
+was the obvious inspiration for the use of decorators in <em>Ruffus</em>.</p>
+<dl class="docutils">
+<dt>The rest of the <em>Ruffus</em> takes uses a different approach. In particular:</dt>
+<dd><ol class="first last arabic">
+<li><p class="first"><em>Ruffus</em> uses task-based not file-based dependencies</p>
+</li>
+<li><p class="first"><em>Ruffus</em> tries to have minimal impact on the functions it decorates.</p>
+<p>Bruce Eckel’s design wraps functions in “rule” objects.</p>
+<p><em>Ruffus</em> tasks are added as attributes of the functions which can be still be
+called normally. This is how <em>Ruffus</em> decorators can be layered in any order
+onto the same task.</p>
+</li>
+</ol>
+</dd>
+</dl>
+</li>
+<li><p class="first">Languages like c++ and Java would probably use a “mixin” approach.
+Python’s easy support for reflection and function references,
+as well as the necessity of marshalling over process boundaries, dictated the
+internal architecture of <em>Ruffus</em>.</p>
+</li>
+<li><p class="first">The <a class="reference external" href="http://www.boost.org">Boost Graph library</a> for text book implementations of directed
+graph traversals.</p>
+</li>
+<li><p class="first"><a class="reference external" href="http://www.graphviz.org/">Graphviz</a>. Just works. Wonderful.</p>
+</li>
+<li><p class="first">Andreas Heger, Christoffer Nellåker and Grant Belgard for driving Ruffus towards
+ever simpler syntax.</p>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Design & Architecture</a><ul>
+<li><a class="reference internal" href="#gnu-make"><cite>GNU Make</cite></a><ul>
+<li><a class="reference internal" href="#deficiencies-of-make-gmake">Deficiencies of <cite>make</cite> / <cite>gmake</cite></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#scons-rake-and-other-make-alternatives"><cite>Scons</cite>, <cite>Rake</cite> and other <cite>Make</cite> alternatives</a><ul>
+<li><a class="reference internal" href="#implicit-dependencies-disadvantages-of-make-scons-rake">Implicit dependencies: disadvantages of <cite>make</cite> / <cite>scons</cite> / <cite>rake</cite></a></li>
+<li><a class="reference internal" href="#explicit-dependencies-in-ruffus">Explicit dependencies in <cite>Ruffus</cite></a></li>
+<li><a class="reference internal" href="#static-dependencies-what-make-scons-rake-can-t-do-easily">Static dependencies: What <cite>make</cite> / <cite>scons</cite> / <cite>rake</cite> can’t do (easily)</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#managing-pipelines-stage-by-stage-using-ruffus">Managing pipelines stage-by-stage using <strong>Ruffus</strong></a><ul>
+<li><a class="reference internal" href="#disadvantages-of-the-ruffus-design">Disadvantages of the Ruffus design</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#alternatives-to-ruffus">Alternatives to <strong>Ruffus</strong></a><ul>
+<li><a class="reference internal" href="#acknowledgements">Acknowledgements</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="installation.html"
+ title="previous chapter">Installation</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="history.html"
+ title="next chapter">Major Features added to Ruffus</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/design.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="history.html" title="Major Features added to Ruffus"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="installation.html" title="Installation"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/drmaa_wrapper_functions.html b/doc/_build/html/drmaa_wrapper_functions.html
new file mode 100644
index 0000000..e370aa7
--- /dev/null
+++ b/doc/_build/html/drmaa_wrapper_functions.html
@@ -0,0 +1,407 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>drmaa functions — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Installation" href="installation.html" />
+ <link rel="prev" title="Pipeline functions" href="pipeline_functions.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="installation.html" title="Installation"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_functions.html" title="Pipeline functions"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="drmaa-functions"></span><div class="section" id="id1">
+<h1>drmaa functions<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">drmaa_wrapper</span></tt> is not exported automatically by ruffus and must be specified explicitly:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="hll"><span class="c"># imported ruffus.drmaa_wrapper explicitly</span>
+</span><span class="kn">from</span> <span class="nn">ruffus.drmaa_wrapper</span> <span class="kn">import</span> <span class="n">run_job</span><span class="p">,</span> <span class="n">error_drmaa_job</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<span class="target" id="drmaa-wrapper-run-job"></span><div class="section" id="index-0">
+<span id="id2"></span><h2><em>run_job</em><a class="headerlink" href="#index-0" title="Permalink to this headline">¶</a></h2>
+<p><strong>run_job</strong> (<a class="reference internal" href="#drmaa-wrapper-run-job-cmd-str"><cite>cmd_str</cite></a>, <a class="reference internal" href="#drmaa-wrapper-run-job-job-name"><cite>job_name</cite></a> = None, <a class="reference internal" href="#drmaa-wrapper-run-job-job-other-options"><cite>job_other_options</cite></a> = None, <a class="reference internal" href="#drmaa-wrapper-run-job-job-script-directory"><cite>job_script_directory</cite></a> = None, <a class="referenc [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">ruffus.drmaa_wrapper.run_job</span></tt> dispatches a command with arguments to a cluster or Grid Engine node and waits for the command to complete.</p>
+<p>It is the semantic equivalent of calling <a class="reference external" href="http://docs.python.org/2/library/os.html#os.system">os.system</a> or
+<a class="reference external" href="http://docs.python.org/2/library/subprocess.html#subprocess.check_call">subprocess.check_output</a>.</p>
+</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus.drmaa_wrapper</span> <span class="kn">import</span> <span class="n">run_job</span><span class="p">,</span> <span class="n">error_drmaa_job</span>
+<span class="kn">import</span> <span class="nn">drmaa</span>
+<span class="n">my_drmaa_session</span> <span class="o">=</span> <span class="n">drmaa</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+<span class="n">my_drmaa_session</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
+
+<span class="n">run_job</span><span class="p">(</span><span class="s">"ls"</span><span class="p">,</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="s">"test"</span><span class="p">,</span>
+ <span class="n">job_other_options</span><span class="o">=</span><span class="s">"-P mott-flint.prja -q short.qa"</span><span class="p">,</span>
+ <span class="n">job_script_directory</span> <span class="o">=</span> <span class="s">"test_dir"</span><span class="p">,</span>
+ <span class="n">job_environment</span><span class="o">=</span><span class="p">{</span> <span class="s">'BASH_ENV'</span> <span class="p">:</span> <span class="s">'~/.bashrc'</span> <span class="p">},</span>
+ <span class="n">retain_job_scripts</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span> <span class="n">drmaa_session</span><span class="o">=</span><span class="n">my_drmaa_session</span><span class="p">)</span>
+<span class="n">run_job</span><span class="p">(</span><span class="s">"ls"</span><span class="p">,</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="s">"test"</span><span class="p">,</span>
+ <span class="n">job_other_options</span><span class="o">=</span><span class="s">"-P mott-flint.prja -q short.qa"</span><span class="p">,</span>
+ <span class="n">job_script_directory</span> <span class="o">=</span> <span class="s">"test_dir"</span><span class="p">,</span>
+ <span class="n">job_environment</span><span class="o">=</span><span class="p">{</span> <span class="s">'BASH_ENV'</span> <span class="p">:</span> <span class="s">'~/.bashrc'</span> <span class="p">},</span>
+ <span class="n">retain_job_scripts</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
+ <span class="n">drmaa_session</span><span class="o">=</span><span class="n">my_drmaa_session</span><span class="p">,</span>
+ <span class="n">working_directory</span> <span class="o">=</span> <span class="s">"/gpfs1/well/mott-flint/lg/src/oss/ruffus/doc"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># catch exceptions</span>
+<span class="c">#</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="n">stdout_res</span><span class="p">,</span> <span class="n">stderr_res</span> <span class="o">=</span> <span class="n">run_job</span><span class="p">(</span><span class="n">cmd</span><span class="p">,</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="n">job_name</span><span class="p">,</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="p">,</span>
+ <span class="n">drmaa_session</span> <span class="o">=</span> <span class="n">drmaa_session</span><span class="p">,</span>
+ <span class="n">run_locally</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">local_run</span><span class="p">,</span>
+ <span class="n">job_other_options</span> <span class="o">=</span> <span class="n">get_queue_name</span><span class="p">())</span>
+
+<span class="c"># relay all the stdout, stderr, drmaa output to diagnose failures</span>
+<span class="k">except</span> <span class="n">error_drmaa_job</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span>
+ <span class="p">[</span><span class="s">"Failed to run:"</span><span class="p">,</span>
+ <span class="n">cmd</span><span class="p">,</span>
+ <span class="n">err</span><span class="p">,</span>
+ <span class="n">stdout_res</span><span class="p">,</span>
+ <span class="n">stderr_res</span><span class="p">])))</span>
+
+<span class="n">my_drmaa_session</span><span class="o">.</span><span class="n">exit</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-cmd-str">
+<div><ul>
+<li><p class="first"><em>cmd_str</em></p>
+<blockquote>
+<div><p>The command which will be run remotely including all parameters</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-job-name">
+<div><ul>
+<li><p class="first"><em>job_name</em></p>
+<blockquote>
+<div><p>A descriptive name for the command. This will be displayed by <a class="reference external" href="http://gridscheduler.sourceforge.net/htmlman/htmlman1/qstat.html">SGE qstat</a>, for example.
+Defaults to “ruffus_job”</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-job-other-options">
+<div><ul>
+<li><p class="first"><em>job_other_options</em></p>
+<blockquote>
+<div><p>Other drmaa parameters can be passed verbatim as a string.</p>
+<p>Examples for SGE include project name (<tt class="docutils literal"><span class="pre">-P</span> <span class="pre">project_name</span></tt>), parallel environment (<tt class="docutils literal"><span class="pre">-pe</span> <span class="pre">parallel_environ</span></tt>), account (<tt class="docutils literal"><span class="pre">-A</span> <span class="pre">account_string</span></tt>), resource (<tt class="docutils literal"><span class="pre">-l</span> <span class="pre">resource=expression</ [...]
+queue name (<tt class="docutils literal"><span class="pre">-q</span> <span class="pre">a_queue_name</span></tt>), queue priority (<tt class="docutils literal"><span class="pre">-p</span> <span class="pre">15</span></tt>).</p>
+<p>These are parameters which you normally need to include when submitting jobs interactively, for example via
+<a class="reference external" href="http://gridscheduler.sourceforge.net/htmlman/htmlman1/qsub.html">SGE qsub</a>
+or <a class="reference external" href="http://apps.man.poznan.pl/trac/slurm-drmaa/wiki/WikiStart#Nativespecification">SLURM</a> (<a class="reference external" href="https://computing.llnl.gov/linux/slurm/srun.html">srun</a>)</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-job-script-directory">
+<div><ul>
+<li><p class="first"><em>job_script_directory</em></p>
+<blockquote>
+<div><p>The directory where drmaa temporary script files will be found. Defaults to the current working directory.</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-job-environment">
+<div><ul>
+<li><p class="first"><em>job_environment</em></p>
+<blockquote>
+<div><p>A dictionary of key / values with environment variables. E.g. <tt class="docutils literal"><span class="pre">"{'BASH_ENV':</span> <span class="pre">'~/.bashrc'}"</span></tt></p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-working-directory">
+<div><ul>
+<li><p class="first"><em>working_directory</em></p>
+<blockquote>
+<div><ul class="simple">
+<li>Sets the working directory.</li>
+<li>Should be a fully qualified path.</li>
+<li>Defaults to the current working directory.</li>
+</ul>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-retain-job-scripts">
+<div><ul>
+<li><p class="first"><em>retain_job_scripts</em></p>
+<blockquote>
+<div><p>Do not delete temporary script files containg drmaa commands. Useful for
+debugging, running on the command line directly, and can provide a useful record of the commands.</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-logger">
+<div><ul>
+<li><p class="first"><em>logger</em></p>
+<blockquote>
+<div><p>For logging messages indicating the progress of the pipeline in terms of tasks and jobs. Takes objects with the standard python
+<a class="reference external" href="https://docs.python.org/2/library/logging.html">logging</a> module interface.</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-drmaa-session">
+<div><ul>
+<li><p class="first"><em>drmaa_session</em></p>
+<blockquote>
+<div><p>A shared drmaa session created and managed separately.</p>
+<p>In the main part of your <strong>Ruffus</strong> pipeline script somewhere there should be code looking like this:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># start shared drmaa session for all jobs / tasks in pipeline</span>
+<span class="c">#</span>
+<span class="kn">import</span> <span class="nn">drmaa</span>
+<span class="n">drmaa_session</span> <span class="o">=</span> <span class="n">drmaa</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+<span class="n">drmaa_session</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
+
+
+<span class="c">#</span>
+<span class="c"># pipeline functions</span>
+<span class="c">#</span>
+
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="n">cmdline</span><span class="o">.</span><span class="n">run</span> <span class="p">(</span><span class="n">options</span><span class="p">,</span> <span class="n">multithread</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">jobs</span><span class="p">)</span>
+ <span class="n">drmaa_session</span><span class="o">.</span><span class="n">exit</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-run-locally">
+<div><ul>
+<li><p class="first"><em>run_locally</em></p>
+<blockquote>
+<div><p>Runs commands locally using the standard python <a class="reference external" href="https://docs.python.org/2/library/subprocess.html">subprocess</a> module
+rather than dispatching remotely. This allows scripts to be debugged easily</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-touch-only">
+<div><ul>
+<li><p class="first"><em>touch_only</em></p>
+<blockquote>
+<div><p>Create or update <a class="reference internal" href="#drmaa-wrapper-run-job-output-files"><em>Output files</em></a>
+only to simulate the running of the pipeline.
+Does not dispatch commands remotely or locally. This is most useful to force a
+pipeline to acknowledge that a particular part is now up-to-date.</p>
+<p>See also: <a class="reference internal" href="pipeline_functions.html#pipeline-functions-pipeline-run-touch-files-only"><em>pipeline_run(touch_files_only=True)</em></a></p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="drmaa-wrapper-run-job-output-files">
+<div><ul>
+<li><p class="first"><em>output_files</em></p>
+<blockquote>
+<div><p>Output files which will be created or updated if <a class="reference internal" href="#drmaa-wrapper-run-job-touch-only"><em>touch_only</em></a> <tt class="docutils literal"><span class="pre">=True</span></tt></p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">drmaa functions</a><ul>
+<li><a class="reference internal" href="#index-0"><em>run_job</em></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="pipeline_functions.html"
+ title="previous chapter">Pipeline functions</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="installation.html"
+ title="next chapter">Installation</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/drmaa_wrapper_functions.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="##drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="installation.html" title="Installation"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_functions.html" title="Pipeline functions"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/examples/bioinformatics/index.html b/doc/_build/html/examples/bioinformatics/index.html
new file mode 100644
index 0000000..1442ddc
--- /dev/null
+++ b/doc/_build/html/examples/bioinformatics/index.html
@@ -0,0 +1,483 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Construction of a simple pipeline to run BLAST jobs — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Part 2: A slightly more practical pipeline to run blasts jobs" href="part2.html" />
+ <link rel="prev" title="Why Ruffus?" href="../../why_ruffus.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="part2.html" title="Part 2: A slightly more practical pipeline to run blasts jobs"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../../why_ruffus.html" title="Why Ruffus?"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="construction-of-a-simple-pipeline-to-run-blast-jobs">
+<span id="examples-bioinformatics-part1"></span><h1>Construction of a simple pipeline to run BLAST jobs<a class="headerlink" href="#construction-of-a-simple-pipeline-to-run-blast-jobs" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This is a simple example to illustrate the convenience <strong>Ruffus</strong>
+brings to simple tasks in bioinformatics.</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li><strong>Split</strong> a problem into multiple fragments that can be</li>
+<li><strong>Run in parallel</strong> giving partial solutions that can be</li>
+<li><strong>Recombined</strong> into the complete solution.</li>
+</ol>
+</div></blockquote>
+<p>The example code runs a <a class="reference external" href="http://blast.ncbi.nlm.nih.gov/">ncbi</a>
+<a class="reference external" href="http://en.wikipedia.org/wiki/BLAST">blast</a> search for four sequences
+against the human <a class="reference external" href="http://en.wikipedia.org/wiki/RefSeq">refseq</a> protein sequence database.</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li><strong>Split</strong> each of the four sequences into a separate file.</li>
+<li><strong>Run in parallel</strong> Blastall on each sequence file</li>
+<li><strong>Recombine</strong> the BLAST results by simple concatenation.</li>
+</ol>
+</div></blockquote>
+<p>In real life,</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference external" href="http://blast.ncbi.nlm.nih.gov/">BLAST</a> already provides support for multiprocessing</li>
+<li>Sequence files would be split in much larger chunks, with many sequences</li>
+<li>The jobs would be submitted to large computational farms (in our case, using the SunGrid Engine).</li>
+<li>The High Scoring Pairs (HSPs) would be parsed / filtered / stored in your own formats.</li>
+</ul>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>This bioinformatics example is intended to showcase <em>some</em> of the features of Ruffus.</p>
+<blockquote class="last">
+<div><ol class="arabic simple">
+<li>See the <a class="reference internal" href="../../tutorials/new_tutorial/introduction.html#new-manual-introduction"><em>manual</em></a> to learn about the various features in Ruffus.</li>
+</ol>
+</div></blockquote>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="prerequisites">
+<h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="ruffus">
+<h3>1. Ruffus<a class="headerlink" href="#ruffus" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>To install Ruffus on most systems with python installed:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>easy_install -U ruffus</pre>
+</div>
+</div></blockquote>
+<p>Otherwise, <a class="reference external" href="http://code.google.com/p/ruffus/downloads/list">download</a> Ruffus and run:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>tar -xvzf ruffus-xxx.tar.gz
+cd ruffus-xxx
+./setup install</pre>
+</div>
+</div></blockquote>
+<p>where xxx is the latest Ruffus version.</p>
+</div></blockquote>
+</div>
+<div class="section" id="blast">
+<h3>2. BLAST<a class="headerlink" href="#blast" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>This example assumes that the <a class="reference external" href="http://blast.ncbi.nlm.nih.gov/">BLAST</a> <tt class="docutils literal"><span class="pre">blastall</span></tt> and <tt class="docutils literal"><span class="pre">formatdb</span></tt> executables are
+installed and on the search path. Otherwise download from <a class="reference external" href="http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download">here</a>.</div></blockquote>
+</div>
+<div class="section" id="human-refseq-sequence-database">
+<h3>3. human refseq sequence database<a class="headerlink" href="#human-refseq-sequence-database" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>We also need to download the human refseq sequence file and format the ncbi database:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>wget ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz
+gunzip human.protein.faa.gz
+
+formatdb -i human.protein.faa</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="test-sequences">
+<h3>4. test sequences<a class="headerlink" href="#test-sequences" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>Query sequences in FASTA format can be found in <a class="reference external" href="../../_static/examples/bioinformatics/original.fa">original.fa</a></div></blockquote>
+</div>
+</div>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>The code for this example can be found <a class="reference internal" href="part1_code.html#examples-bioinformatics-part1-code"><em>here</em></a> and
+pasted into the python command shell.</div></blockquote>
+</div>
+<div class="section" id="step-1-splitting-up-the-query-sequences">
+<h2>Step 1. Splitting up the query sequences<a class="headerlink" href="#step-1-splitting-up-the-query-sequences" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>We want each of our sequences in the query file <a class="reference external" href="../../_static/examples/bioinformatics/original.fa">original.fa</a> to be placed
+in a separate files named <tt class="docutils literal"><span class="pre">XXX.segment</span></tt> where <tt class="docutils literal"><span class="pre">XXX</span></tt> = 1 -> the number of sequences.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">current_file_index</span> <span class="o">=</span> <span class="mi">0</span>
+<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="s">"original.fa"</span><span class="p">):</span>
+ <span class="c"># start a new file for each accession line</span>
+ <span class="k">if</span> <span class="n">line</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s">'>'</span><span class="p">:</span>
+ <span class="n">current_file_index</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">current_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.segment"</span> <span class="o">%</span> <span class="n">current_file_index</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">current_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>To use this in a pipeline, we only need to wrap this in a function, “decorated” with the Ruffus
+keyword <a class="reference internal" href="../../tutorials/new_tutorial/split.html#new-manual-split"><em>@split</em></a>:</p>
+<blockquote>
+<div><img alt="../../_images/examples_bioinformatics_split.jpg" src="../../_images/examples_bioinformatics_split.jpg" />
+</div></blockquote>
+<div class="line-block">
+<div class="line">This indicates that we are splitting up the input file <a class="reference external" href="../../_static/examples/bioinformatics/original.fa">original.fa</a> into however many
+<tt class="docutils literal"><span class="pre">*.segment</span></tt> files as it takes.</div>
+<div class="line">The pipelined function itself takes two arguments, for the input and output.</div>
+</div>
+<p>We shall see later this simple <a class="reference internal" href="../../tutorials/new_tutorial/split.html#new-manual-split"><em>@split</em></a> decorator already gives all the benefits of:</p>
+<blockquote>
+<div><ul class="simple">
+<li>Dependency checking</li>
+<li>Flowchart printing</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-2-run-blast-jobs-in-parallel">
+<h2>Step 2. Run BLAST jobs in parallel<a class="headerlink" href="#step-2-run-blast-jobs-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Assuming that blast is already installed, sequence matches can be found with this python
+code:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">os</span><span class="o">.</span><span class="n">system</span><span class="p">(</span><span class="s">"blastall -p blastp -d human.protein.faa -i 1.segment > 1.blastResult"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>To pipeline this, we need to simply wrap in a function, decorated with the <strong>Ruffus</strong>
+keyword <a class="reference internal" href="../../tutorials/new_tutorial/transform.html#new-manual-transform"><em>@transform</em></a>.</p>
+<blockquote>
+<div><img alt="../../_images/examples_bioinformatics_transform.jpg" src="../../_images/examples_bioinformatics_transform.jpg" />
+</div></blockquote>
+<p>This indicates that we are taking all the output files from the previous <tt class="docutils literal"><span class="pre">splitFasta</span></tt>
+operation (<tt class="docutils literal"><span class="pre">*.segment</span></tt>) and <a class="reference internal" href="../../tutorials/new_tutorial/transform.html#new-manual-transform"><em>@transform</em></a>-ing each to a new file with the <tt class="docutils literal"><span class="pre">.blastResult</span></tt>
+suffix. Each of these transformation operations can run in parallel if specified.</p>
+</div></blockquote>
+</div>
+<div class="section" id="step-3-combining-blast-results">
+<h2>Step 3. Combining BLAST results<a class="headerlink" href="#step-3-combining-blast-results" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>The following python code will concatenate the results together</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"final.blast_results"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">glob</span><span class="p">(</span><span class="s">"*.blastResults"</span><span class="p">):</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">i</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>To pipeline this, we need again to decorate with the <strong>Ruffus</strong> keyword <a class="reference internal" href="../../tutorials/new_tutorial/merge.html#new-manual-merge"><em>@merge</em></a>.</p>
+<blockquote>
+<div><img alt="../../_images/examples_bioinformatics_merge.jpg" src="../../_images/examples_bioinformatics_merge.jpg" />
+</div></blockquote>
+<p>This indicates that we are taking all the output files from the previous <tt class="docutils literal"><span class="pre">runBlast</span></tt>
+operation (<tt class="docutils literal"><span class="pre">*.blastResults</span></tt>) and <a class="reference internal" href="../../tutorials/new_tutorial/merge.html#new-manual-merge"><em>@merge</em></a>-ing them to the new file <tt class="docutils literal"><span class="pre">final.blast_results</span></tt>.</p>
+</div></blockquote>
+</div>
+<div class="section" id="step-4-running-the-pipeline">
+<h2>Step 4. Running the pipeline<a class="headerlink" href="#step-4-running-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>We can run the completed pipeline using a maximum of 4 parallel processes by calling
+<a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a> :</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Though we have only asked Ruffus to run <tt class="docutils literal"><span class="pre">combineBlastResults</span></tt>, it traces all the dependencies
+of this task and runs all the necessary parts of the pipeline.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The full code for this example can be found <a class="reference internal" href="part1_code.html#examples-bioinformatics-part1-code"><em>here</em></a>
+suitable for pasting into the python command shell.</p>
+</div>
+<p>The <tt class="docutils literal"><span class="pre">verbose</span></tt> parameter causes the following output to be printed to stderr as the pipeline
+runs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+<span class="go"> Job = [original.fa -> *.segment] completed</span>
+<span class="go">Completed Task = splitFasta</span>
+<span class="go"> Job = [1.segment -> 1.blastResult] completed</span>
+<span class="go"> Job = [3.segment -> 3.blastResult] completed</span>
+<span class="go"> Job = [2.segment -> 2.blastResult] completed</span>
+<span class="go"> Job = [4.segment -> 4.blastResult] completed</span>
+<span class="go">Completed Task = runBlast</span>
+<span class="go"> Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] -> final.blast_results] completed</span>
+<span class="go">Completed Task = combineBlastResults</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-5-testing-dependencies">
+<h2>Step 5. Testing dependencies<a class="headerlink" href="#step-5-testing-dependencies" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If we invoked <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a> again, nothing
+further would happen because the
+pipeline is now up-to-date. But what if the pipeline had not run to completion?</p>
+<p>We can simulate the failure of one of the <tt class="docutils literal"><span class="pre">blastall</span></tt> jobs by deleting its results:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="s">"4.blastResult"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Let us use the <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout</em></a>
+function to print out the dependencies of the pipeline at a high <tt class="docutils literal"><span class="pre">verbose</span></tt> level which
+will show both complete and incomplete jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">sys</span>
+<span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which are up-to-date:</span>
+
+<span class="go">Task = splitFasta</span>
+<span class="go"> "Split sequence file into as many fragments as appropriate depending on the size of</span>
+<span class="go"> original_fasta"</span>
+
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = runBlast</span>
+<span class="go"> "Run blast"</span>
+<span class="go"> Job = [4.segment</span>
+<span class="go"> ->4.blastResult]</span>
+<span class="go"> Job needs update: Missing file 4.blastResult</span>
+
+<span class="go">Task = combineBlastResults</span>
+<span class="go"> "Combine blast results"</span>
+<span class="go"> Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult]</span>
+<span class="go"> ->final.blast_results]</span>
+<span class="go"> Job needs update: Missing file 4.blastResult</span>
+
+<span class="go">________________________________________</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Only the parts of the pipeline which involve the missing BLAST result will be rerun.
+We can confirm this by invoking the pipeline.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+
+<span class="go"> Job = [1.segment -> 1.blastResult] unnecessary: already up to date</span>
+<span class="go"> Job = [2.segment -> 2.blastResult] unnecessary: already up to date</span>
+<span class="go"> Job = [3.segment -> 3.blastResult] unnecessary: already up to date</span>
+<span class="go"> Job = [4.segment -> 4.blastResult] completed</span>
+<span class="go">Completed Task = runBlast</span>
+<span class="go"> Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] -> final.blast_results] completed</span>
+<span class="go">Completed Task = combineBlastResults</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="what-is-next">
+<h2>What is next?<a class="headerlink" href="#what-is-next" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>In the <a class="reference internal" href="part2.html#examples-bioinformatics-part2"><em>next (short) part</em></a>,
+we shall add some standard (boilerplate) code to
+turn this BLAST pipeline into a (slightly more) useful python program.</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Construction of a simple pipeline to run BLAST jobs</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#prerequisites">Prerequisites</a><ul>
+<li><a class="reference internal" href="#ruffus">1. Ruffus</a></li>
+<li><a class="reference internal" href="#blast">2. BLAST</a></li>
+<li><a class="reference internal" href="#human-refseq-sequence-database">3. human refseq sequence database</a></li>
+<li><a class="reference internal" href="#test-sequences">4. test sequences</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#step-1-splitting-up-the-query-sequences">Step 1. Splitting up the query sequences</a></li>
+<li><a class="reference internal" href="#step-2-run-blast-jobs-in-parallel">Step 2. Run BLAST jobs in parallel</a></li>
+<li><a class="reference internal" href="#step-3-combining-blast-results">Step 3. Combining BLAST results</a></li>
+<li><a class="reference internal" href="#step-4-running-the-pipeline">Step 4. Running the pipeline</a></li>
+<li><a class="reference internal" href="#step-5-testing-dependencies">Step 5. Testing dependencies</a></li>
+<li><a class="reference internal" href="#what-is-next">What is next?</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../../why_ruffus.html"
+ title="previous chapter">Why <em>Ruffus</em>?</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="part2.html"
+ title="next chapter">Part 2: A slightly more practical pipeline to run blasts jobs</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/examples/bioinformatics/index.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="part2.html" title="Part 2: A slightly more practical pipeline to run blasts jobs"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../../why_ruffus.html" title="Why Ruffus?"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/examples/bioinformatics/part1_code.html b/doc/_build/html/examples/bioinformatics/part1_code.html
new file mode 100644
index 0000000..983deeb
--- /dev/null
+++ b/doc/_build/html/examples/bioinformatics/part1_code.html
@@ -0,0 +1,249 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus code — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Ruffus code" href="part2_code.html" />
+ <link rel="prev" title="Part 2: A slightly more practical pipeline to run blasts jobs" href="part2.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="part2_code.html" title="Ruffus code"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="part2.html" title="Part 2: A slightly more practical pipeline to run blasts jobs"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-code">
+<span id="examples-bioinformatics-part1-code"></span><h1>Ruffus code<a class="headerlink" href="#ruffus-code" title="Permalink to this headline">¶</a></h1>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">sys</span>
+
+<span class="n">exe_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">0</span><sp [...]
+<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span c [...]
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+
+<span class="n">original_fasta</span> <span class="o">=</span> <span class="s">"original.fa"</span>
+<span class="n">database_file</span> <span class="o">=</span> <span class="s">"human.protein.faa"</span>
+
+<span class="nd">@split</span><span class="p">(</span><span class="n">original_fasta</span><span class="p">,</span> <span class="s">"*.segment"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">splitFasta</span> <span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">segments</span><span class="p">):</span>
+ <span class="sd">"""Split sequence file into</span>
+<span class="sd"> as many fragments as appropriate</span>
+<span class="sd"> depending on the size of original_fasta"""</span>
+ <span class="n">current_file_index</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">original_fasta</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># start a new file for each accession line</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">line</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s">'>'</span><span class="p">:</span>
+ <span class="n">current_file_index</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">current_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.segment"</span> <span class="o">%</span> <span class="n">current_file_index</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">current_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">splitFasta</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".segment"</span><span class="p">),</span> <span class="s">".blastResult"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">runBlast</span><span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">blastResultFile</span><span class="p">):</span>
+ <span class="sd">"""Run blast"""</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">system</span><span class="p">(</span><span class="s">"blastall -p blastp -d </span><span class="si">%s</span><span class="s"> -i </span><span class="si">%s</span><span class="s"> > </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">database_file</span><span class="p">,</span> <span class="n">seqFile</span><span class="p">,</span> <span class="n">blastResultFile</span><span class="p">))</span>
+
+
+<span class="nd">@merge</span><span class="p">(</span><span class="n">runBlast</span><span class="p">,</span> <span class="s">"final.blast_results"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">combineBlastResults</span> <span class="p">(</span><span class="n">blastResultFiles</span><span class="p">,</span> <span class="n">combinedBlastResultFile</span><span class="p">):</span>
+ <span class="sd">"""Combine blast results"""</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">combinedBlastResultFile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">blastResultFiles</span><span class="p">:</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">i</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Simulate interuption of the pipeline by</span>
+<span class="c"># deleting the output of one of the BLAST jobs</span>
+<span class="c">#</span>
+<span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="s">"4.blastResult"</span><span class="p">)</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Re-running the pipeline</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="part2.html"
+ title="previous chapter">Part 2: A slightly more practical pipeline to run blasts jobs</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="part2_code.html"
+ title="next chapter">Ruffus code</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/examples/bioinformatics/part1_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="part2_code.html" title="Ruffus code"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="part2.html" title="Part 2: A slightly more practical pipeline to run blasts jobs"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/examples/bioinformatics/part2.html b/doc/_build/html/examples/bioinformatics/part2.html
new file mode 100644
index 0000000..2596bde
--- /dev/null
+++ b/doc/_build/html/examples/bioinformatics/part2.html
@@ -0,0 +1,347 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Part 2: A slightly more practical pipeline to run blasts jobs — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Ruffus code" href="part1_code.html" />
+ <link rel="prev" title="Construction of a simple pipeline to run BLAST jobs" href="index.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="part1_code.html" title="Ruffus code"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="index.html" title="Construction of a simple pipeline to run BLAST jobs"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="part-2-a-slightly-more-practical-pipeline-to-run-blasts-jobs">
+<span id="examples-bioinformatics-part2"></span><h1>Part 2: A slightly more practical pipeline to run blasts jobs<a class="headerlink" href="#part-2-a-slightly-more-practical-pipeline-to-run-blasts-jobs" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="index.html#examples-bioinformatics-part1"><em>Previously</em></a>, we had built
+a simple pipeline to split up a FASTA file of query sequences so
+that these can be matched against a sequence database in parallel.</p>
+<p>We shall wrap this code so that</p>
+<blockquote>
+<div><ul class="simple">
+<li>It is more robust to interruptions</li>
+<li>We can specify the file names on the command line</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-1-cleaning-up-any-leftover-junk-from-previous-pipeline-runs">
+<h2>Step 1. Cleaning up any leftover junk from previous pipeline runs<a class="headerlink" href="#step-1-cleaning-up-any-leftover-junk-from-previous-pipeline-runs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="line-block">
+<div class="line">We split up each of our sequences in the query file <a class="reference external" href="../../_static/examples/bioinformatics/original.fa">original.fa</a>
+into a separate files named <tt class="docutils literal"><span class="pre">XXX.segment</span></tt> where <tt class="docutils literal"><span class="pre">XXX</span></tt> is the number of sequences in
+the FASTA file.</div>
+</div>
+<div class="line-block">
+<div class="line">However, if we start with 6 sequences (giving <tt class="docutils literal"><span class="pre">1.segment</span></tt> ... <tt class="docutils literal"><span class="pre">6.segment</span></tt>), and we
+then edited <a class="reference external" href="../../_static/examples/bioinformatics/original.fa">original.fa</a>
+so that only 5 were left, the file <tt class="docutils literal"><span class="pre">6.segment</span></tt> would still be left
+hanging around as an unwanted, extraneous and confusing orphan.</div>
+</div>
+<p>As a general rule, it is a good idea to clean up the results of a previous run in
+a <em class="xref std std-ref">@split</em> operation:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"original.fa"</span><span class="p">,</span> <span class="s">"*.segment"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">splitFasta</span> <span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">segments</span><span class="p">):</span>
+
+ <span class="c">#</span>
+ <span class="c"># Clean up any segment files from previous runs before creating new one</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="s">"*.segment"</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
+
+ <span class="c"># code as before...</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-2-adding-a-flag-file-to-mark-successful-completion">
+<span id="examples-bioinformatics-part2-step2"></span><h2>Step 2. Adding a “flag” file to mark successful completion<a class="headerlink" href="#step-2-adding-a-flag-file-to-mark-successful-completion" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>When pipelined tasks are interrupted half way through an operation, the output may
+only contain part of the results in an incomplete or inconsistent state.
+There are three general options to deal with this:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Catch any interrupting conditions and delete the incomplete output</li>
+<li>Tag successfully completed output with a special marker at the end of the file</li>
+<li>Create an empty “flag” file whose only point is to signal success</li>
+</ol>
+</div></blockquote>
+<p>Option (3) is the most reliable way and involves the least amount of work in Ruffus.
+We add flag files with the suffix <tt class="docutils literal"><span class="pre">.blastSuccess</span></tt> for our parallel BLAST jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">splitFasta</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".segment"</span><span class="p">),</span> <span class="p">[</span><span class="s">".blastResult"</span><span class="p">,</span> <span class="s">".blastSuccess"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">runBlast</span><span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+
+ <span class="n">blastResultFile</span><span class="p">,</span> <span class="n">flag_file</span> <span class="o">=</span> <span class="n">output_files</span>
+
+ <span class="c">#</span>
+ <span class="c"># Existing code unchanged</span>
+ <span class="c">#</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">system</span><span class="p">(</span><span class="s">"blastall -p blastp -d human.protein.faa "</span><span class="o">+</span>
+ <span class="s">"-i </span><span class="si">%s</span><span class="s"> > </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">blastResultFile</span><span class="p">))</span>
+
+ <span class="c">#</span>
+ <span class="c"># "touch" flag file to indicate success</span>
+ <span class="c">#</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">flag_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-3-allowing-the-script-to-be-invoked-on-the-command-line">
+<h2>Step 3. Allowing the script to be invoked on the command line<a class="headerlink" href="#step-3-allowing-the-script-to-be-invoked-on-the-command-line" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>We allow the query sequence file, as well as the sequence database and end results
+to be specified at runtime using the standard python <a class="reference external" href="http://docs.python.org/library/optparse.html">optparse</a> module.
+We find this approach to run time arguments generally useful for many Ruffus scripts.
+The full code can be <a class="reference internal" href="part2_code.html#examples-bioinformatics-part2-code"><em>viewed here</em></a> and
+<a class="reference external" href="../../_static/examples/bioinformatics/run_parallel_blast.py">downloaded from run_parallel_blast.py</a>.</p>
+<p>The different options can be inspected by running the script with the <tt class="docutils literal"><span class="pre">--help</span></tt> or <tt class="docutils literal"><span class="pre">-h</span></tt>
+argument.</p>
+<p>The following options are useful for developing Ruffus scripts:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>--verbose | -v : Print more detailed messages for each additional verbose level.
+ E.g. run_parallel_blast --verbose --verbose --verbose ... (or -vvv)
+
+--jobs | -j : Specifies the number of jobs (operations) to run in parallel.
+
+--flowchart FILE : Print flowchart of the pipeline to FILE. Flowchart format
+ depends on extension. Alternatives include (".dot", ".jpg",
+ "*.svg", "*.png" etc). Formats other than ".dot" require
+ the dot program to be installed (http://www.graphviz.org/).
+
+--just_print | -n Only print a trace (description) of the pipeline.
+ The level of detail is set by --verbose.</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-4-printing-out-a-flowchart-for-the-pipeline">
+<h2>Step 4. Printing out a flowchart for the pipeline<a class="headerlink" href="#step-4-printing-out-a-flowchart-for-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <tt class="docutils literal"><span class="pre">--flowchart</span></tt> argument results in a call to <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt>
+This prints out a flowchart of the pipeline. Valid formats include ”.dot”, ”.jpg”, ”.svg”, ”.png”
+but all except for the first require the <tt class="docutils literal"><span class="pre">dot</span></tt> program to be installed
+(<a class="reference external" href="http://www.graphviz.org/">http://www.graphviz.org/</a>).</p>
+<p>The state of the pipeline is reflected in the flowchart:</p>
+<img alt="../../_images/examples_bioinformatics_pipeline.jpg" src="../../_images/examples_bioinformatics_pipeline.jpg" />
+</div></blockquote>
+</div>
+<div class="section" id="step-5-errors">
+<h2>Step 5. Errors<a class="headerlink" href="#step-5-errors" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Because Ruffus scripts are just normal python functions, you can debug them using
+your usual tools, or jump to the offending line(s) even when the pipeline is running in
+parallel.</p>
+<p>For example, these are the what the error messages would look like if we had mis-spelt <tt class="docutils literal"><span class="pre">blastal</span></tt>.
+In <a class="reference internal" href="part2_code.html#examples-bioinformatics-part2-code"><em>run_parallel_blast.py</em></a>,
+python exceptions are raised if the <tt class="docutils literal"><span class="pre">blastall</span></tt> command fails.</p>
+<p>Each of the exceptions for the parallel operations are printed out with the
+offending lines (line 204), and problems (<tt class="docutils literal"><span class="pre">blastal</span></tt> not found)
+highlighted in red.</p>
+<blockquote>
+<div><img alt="../../_images/examples_bioinformatics_error.png" src="../../_images/examples_bioinformatics_error.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="step-6-will-it-run">
+<h2>Step 6. Will it run?<a class="headerlink" href="#step-6-will-it-run" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>The full code can be <a class="reference internal" href="part2_code.html#examples-bioinformatics-part2-code"><em>viewed here</em></a> and
+<a class="reference external" href="../../_static/examples/bioinformatics/run_parallel_blast.py">downloaded from run_parallel_blast.py</a>.</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Part 2: A slightly more practical pipeline to run blasts jobs</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#step-1-cleaning-up-any-leftover-junk-from-previous-pipeline-runs">Step 1. Cleaning up any leftover junk from previous pipeline runs</a></li>
+<li><a class="reference internal" href="#step-2-adding-a-flag-file-to-mark-successful-completion">Step 2. Adding a “flag” file to mark successful completion</a></li>
+<li><a class="reference internal" href="#step-3-allowing-the-script-to-be-invoked-on-the-command-line">Step 3. Allowing the script to be invoked on the command line</a></li>
+<li><a class="reference internal" href="#step-4-printing-out-a-flowchart-for-the-pipeline">Step 4. Printing out a flowchart for the pipeline</a></li>
+<li><a class="reference internal" href="#step-5-errors">Step 5. Errors</a></li>
+<li><a class="reference internal" href="#step-6-will-it-run">Step 6. Will it run?</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="index.html"
+ title="previous chapter">Construction of a simple pipeline to run BLAST jobs</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="part1_code.html"
+ title="next chapter">Ruffus code</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/examples/bioinformatics/part2.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="part1_code.html" title="Ruffus code"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="index.html" title="Construction of a simple pipeline to run BLAST jobs"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/examples/bioinformatics/part2_code.html b/doc/_build/html/examples/bioinformatics/part2_code.html
new file mode 100644
index 0000000..ed971e0
--- /dev/null
+++ b/doc/_build/html/examples/bioinformatics/part2_code.html
@@ -0,0 +1,445 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus code — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?"" href="../paired_end_data.py.html" />
+ <link rel="prev" title="Ruffus code" href="part1_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../paired_end_data.py.html" title="Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?""
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="part1_code.html" title="Ruffus code"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-code">
+<span id="examples-bioinformatics-part2-code"></span><h1>Ruffus code<a class="headerlink" href="#ruffus-code" title="Permalink to this headline">¶</a></h1>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#!/usr/bin/env python</span>
+<span class="sd">"""</span>
+
+<span class="sd"> run_parallel_blast.py</span>
+<span class="sd"> [--log_file PATH]</span>
+<span class="sd"> [--quiet]</span>
+
+<span class="sd">"""</span>
+
+<span class="c">################################################################################</span>
+<span class="c">#</span>
+<span class="c"># run_parallel_blast</span>
+<span class="c">#</span>
+<span class="c">#</span>
+<span class="c"># Copyright (c) 4/21/2010 Leo Goodstadt</span>
+<span class="c">#</span>
+<span class="c"># Permission is hereby granted, free of charge, to any person obtaining a copy</span>
+<span class="c"># of this software and associated documentation files (the "Software"), to deal</span>
+<span class="c"># in the Software without restriction, including without limitation the rights</span>
+<span class="c"># to use, copy, modify, merge, publish, distribute, sublicense, and/or sell</span>
+<span class="c"># copies of the Software, and to permit persons to whom the Software is</span>
+<span class="c"># furnished to do so, subject to the following conditions:</span>
+<span class="c">#</span>
+<span class="c"># The above copyright notice and this permission notice shall be included in</span>
+<span class="c"># all copies or substantial portions of the Software.</span>
+<span class="c">#</span>
+<span class="c"># THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR</span>
+<span class="c"># IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,</span>
+<span class="c"># FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE</span>
+<span class="c"># AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER</span>
+<span class="c"># LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,</span>
+<span class="c"># OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN</span>
+<span class="c"># THE SOFTWARE.</span>
+<span class="c">#################################################################################</span>
+<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">sys</span>
+<span class="n">exe_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">0</span><sp [...]
+<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span cl [...]
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># options</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">from</span> <span class="nn">optparse</span> <span class="kn">import</span> <span class="n">OptionParser</span>
+<span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span> <span class="nn">os</span>
+
+<span class="n">exe_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">0</span><sp [...]
+
+
+<span class="n">parser</span> <span class="o">=</span> <span class="n">OptionParser</span><span class="p">(</span><span class="n">version</span><span class="o">=</span><span class="s">"%prog 1.0"</span><span class="p">,</span> <span class="n">usage</span> <span class="o">=</span> <span class="s">"</span><span class="se">\n\n</span><span class="s"> %prog --input_file QUERY_FASTA --database_file FASTA_DATABASE [more_options]"</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"-i"</span><span class="p">,</span> <span class="s">"--input_file"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"input_file"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"FILE"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"string"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Name and path of query sequence file in FASTA format. "</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"-d"</span><span class="p">,</span> <span class="s">"--database_file"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"database_file"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"FILE"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"string"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Name and path of FASTA database to search. "</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"--result_file"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"result_file"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"FILE"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"string"</span><span class="p">,</span>
+ <span class="n">default</span><span class="o">=</span><span class="s">"final.blast_results"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Name and path of where the files should end up. "</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"-t"</span><span class="p">,</span> <span class="s">"--temp_directory"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"temp_directory"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"PATH"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"string"</span><span class="p">,</span>
+ <span class="n">default</span><span class="o">=</span><span class="s">"tmp"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Name and path of temporary directory where calculations "</span>
+ <span class="s">"should take place. "</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># general options: verbosity / logging</span>
+<span class="c">#</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"-v"</span><span class="p">,</span> <span class="s">"--verbose"</span><span class="p">,</span> <span class="n">dest</span> <span class="o">=</span> <span class="s">"verbose"</span><span class="p">,</span>
+ <span class="n">action</span><span class="o">=</span><span class="s">"count"</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Print more detailed messages for each additional verbose level."</span>
+ <span class="s">" E.g. run_parallel_blast --verbose --verbose --verbose ... (or -vvv)"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># pipeline</span>
+<span class="c">#</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"-j"</span><span class="p">,</span> <span class="s">"--jobs"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"jobs"</span><span class="p">,</span>
+ <span class="n">default</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"jobs"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"int"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Specifies the number of jobs (operations) to run in parallel."</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"--flowchart"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"flowchart"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"FILE"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"string"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Print flowchart of the pipeline to FILE. Flowchart format "</span>
+ <span class="s">"depends on extension. Alternatives include ('.dot', '.jpg', "</span>
+ <span class="s">"'*.svg', '*.png' etc). Formats other than '.dot' require "</span>
+ <span class="s">"the dot program to be installed (http://www.graphviz.org/)."</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"-n"</span><span class="p">,</span> <span class="s">"--just_print"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"just_print"</span><span class="p">,</span>
+ <span class="n">action</span><span class="o">=</span><span class="s">"store_true"</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Only print a trace (description) of the pipeline. "</span>
+ <span class="s">" The level of detail is set by --verbose."</span><span class="p">)</span>
+
+<span class="p">(</span><span class="n">options</span><span class="p">,</span> <span class="n">remaining_args</span><span class="p">)</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+
+
+<span class="k">if</span> <span class="ow">not</span> <span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">options</span><span class="o">.</span><span class="n">database_file</span><span class="p">:</span>
+ <span class="n">parser</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"</span><span class="se">\n\n\t</span><span class="s">Missing parameter --database_file FILE</span><span class="se">\n\n</span><span class="s">"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">options</span><span class="o">.</span><span class="n">input_file</span><span class="p">:</span>
+ <span class="n">parser</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"</span><span class="se">\n\n\t</span><span class="s">Missing parameter --input_file FILE</span><span class="se">\n\n</span><span class="s">"</span><span class="p">)</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># imports</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">subprocess</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Functions</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">def</span> <span class="nf">run_cmd</span><span class="p">(</span><span class="n">cmd_str</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Throw exception if run command fails</span>
+<span class="sd"> """</span>
+ <span class="n">process</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">Popen</span><span class="p">(</span><span class="n">cmd_str</span><span class="p">,</span> <span class="n">stdout</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">PIPE</span><span class="p">,</span>
+ <span class="n">stderr</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">shell</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+ <span class="n">stdout_str</span><span class="p">,</span> <span class="n">stderr_str</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">communicate</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">process</span><span class="o">.</span><span class="n">returncode</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"Failed to run '</span><span class="si">%s</span><span class="s">'</span><span class="se">\n</span><span class="si">%s%s</span><span class="s">Non-zero exit status </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">cmd_str</span><span class="p">,</span> <span class="n">stdout_str</span><span class="p">,</span> <span class="n">stderr_str</span><span class="p">,</span> <span class="n">process</span><span class="o">.</span><span class="n">returncode</span><span class="p">))</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Logger</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s">"run_parallel_blast"</span><span class="p">)</span>
+<span class="c">#</span>
+<span class="c"># We are interesting in all messages</span>
+<span class="c">#</span>
+<span class="k">if</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">)</span>
+ <span class="n">stderrhandler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">StreamHandler</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="p">)</span>
+ <span class="n">stderrhandler</span><span class="o">.</span><span class="n">setFormatter</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">Formatter</span><span class="p">(</span><span class="s">" </span><span class="si">%(message)s</span><span class="s">"</span><span class="p">))</span>
+ <span class="n">stderrhandler</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">)</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">stderrhandler</span><span class="p">)</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Pipeline tasks</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="n">original_fasta</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">input_file</span>
+<span class="n">database_file</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">database_file</span>
+<span class="n">temp_directory</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">temp_directory</span>
+<span class="n">result_file</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">result_file</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="n">temp_directory</span><span class="p">))</span>
+
+<span class="nd">@split</span><span class="p">(</span><span class="n">original_fasta</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">temp_directory</span><span class="p">,</span> <span class="s">"*.segment"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">splitFasta</span> <span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">segments</span><span class="p">):</span>
+ <span class="sd">"""Split sequence file into</span>
+<span class="sd"> as many fragments as appropriate</span>
+<span class="sd"> depending on the size of original_fasta"""</span>
+ <span class="c">#</span>
+ <span class="c"># Clean up any segment files from previous runs before creating new one</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">segments</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="n">current_file_index</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">original_fasta</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># start a new file for each accession line</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="n">line</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s">'>'</span><span class="p">:</span>
+ <span class="n">current_file_index</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">file_name</span> <span class="o">=</span> <span class="s">"</span><span class="si">%d</span><span class="s">.segment"</span> <span class="o">%</span> <span class="n">current_file_index</span>
+ <span class="n">file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">temp_directory</span><span class="p">,</span> <span class="n">file_name</span><span class="p">)</span>
+ <span class="n">current_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">file_path</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">current_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">splitFasta</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".segment"</span><span class="p">),</span> <span class="p">[</span><span class="s">".blastResult"</span><span class="p">,</span> <span class="s">".blastSuccess"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">runBlast</span><span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="n">blastResultFile</span><span class="p">,</span> <span class="n">flag_file</span> <span class="o">=</span> <span class="n">output_files</span>
+ <span class="c">#</span>
+ <span class="n">run_cmd</span><span class="p">(</span><span class="s">"blastall -p blastp -d human.protein.faa -i </span><span class="si">%s</span><span class="s"> > </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">seqFile</span><span class="p">,</span> <span class="n">blastResultFile</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="c"># "touch" flag file to indicate success</span>
+ <span class="c">#</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">flag_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="nd">@merge</span><span class="p">(</span><span class="n">runBlast</span><span class="p">,</span> <span class="n">result_file</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">combineBlastResults</span> <span class="p">(</span><span class="n">blastResult_and_flag_Files</span><span class="p">,</span> <span class="n">combinedBlastResultFile</span><span class="p">):</span>
+ <span class="sd">"""Combine blast results"""</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">combinedBlastResultFile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">blastResult_file</span><span class="p">,</span> <span class="n">flag_file</span> <span class="ow">in</span> <span class="n">blastResult_and_flag_Files</span><span class="p">:</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">blastResult_file</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+
+
+
+
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Print list of tasks</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">if</span> <span class="n">options</span><span class="o">.</span><span class="n">just_print</span><span class="p">:</span>
+ <span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">verbose</span><span class="o">=</span><span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class="p">)</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Print flowchart</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">elif</span> <span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">:</span>
+ <span class="c"># use file extension for output format</span>
+ <span class="n">output_format</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">)[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">:]</span>
+ <span class="n">pipeline_printout_graph</span> <span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">,</span> <span class="s">"w"</span><span class="p">),</span>
+ <span class="n">output_format</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">combineBlastResults</span><span class="p">],</span>
+ <span class="n">no_key_legend</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Run Pipeline</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="k">else</span><span class="p">:</span>
+ <span class="n">pipeline_run</span><span class="p">([</span><span class="n">combineBlastResults</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">jobs</span><span class="p">,</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="part1_code.html"
+ title="previous chapter">Ruffus code</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../paired_end_data.py.html"
+ title="next chapter">Example code for <tt class="docutils literal"><span class="pre">FAQ</span> <span class="pre">Good</span> <span class="pre">practices:</span> <span class="pre">"What</span> <span class="pre">is</span> <span class="pre">the</span> <span class="pre">best</span> <span class="pre">way</span> <span class="pre">of</span> <span class="pre">handling</span> <span class="pre">data</span> <span class="pre">in</span> <span class="pre">file</span> <spa [...]
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/examples/bioinformatics/part2_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../paired_end_data.py.html" title="Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?""
+ >next</a> |</li>
+ <li class="right" >
+ <a href="part1_code.html" title="Ruffus code"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/examples/paired_end_data.py.html b/doc/_build/html/examples/paired_end_data.py.html
new file mode 100644
index 0000000..b042688
--- /dev/null
+++ b/doc/_build/html/examples/paired_end_data.py.html
@@ -0,0 +1,305 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Example code for FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?" — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../index.html" />
+ <link rel="next" title="Ruffus Decorators" href="../decorators/decorators.html" />
+ <link rel="prev" title="Ruffus code" href="bioinformatics/part2_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../decorators/decorators.html" title="Ruffus Decorators"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="bioinformatics/part2_code.html" title="Ruffus code"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="example-code-for-faq-good-practices-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc">
+<span id="faq-paired-files-code"></span><h1>Example code for <a class="reference internal" href="../faq.html#faq-paired-files"><em>FAQ Good practices: “What is the best way of handling data in file pairs (or triplets etc.)?”</em></a><a class="headerlink" href="#example-code-for-faq-good-practices-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="../tutorials/new_tutorial/subdivide_collate.html#new-manual-collate"><em>@collate</em></a></li>
+</ul>
+</div>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#!/usr/bin/env python</span>
+<span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span> <span class="nn">os</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">ruffus.cmdline</span> <span class="kn">as</span> <span class="nn">cmdline</span>
+<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">check_call</span>
+
+<span class="n">parser</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">get_argparse</span><span class="p">(</span><span class="n">description</span><span class="o">=</span><span class="s">"Parimala's pipeline?"</span><span class="p">)</span>
+
+<span class="hll"><span class="c"># .</span>
+</span><span class="hll"><span class="c"># Very flexible handling of input files .</span>
+</span><span class="hll"><span class="c"># .</span>
+</span><span class="hll"><span class="c"># input files can be specified flexibly as: .</span>
+</span><span class="hll"><span class="c"># --input a.fastq b.fastq .</span>
+</span><span class="hll"><span class="c"># --input a.fastq --input b.fastq .</span>
+</span><span class="hll"><span class="c"># --input *.fastq --input other/*.fastq .</span>
+</span><span class="hll"><span class="c"># --input "*.fastq" .</span>
+</span><span class="hll"><span class="c"># .</span>
+</span><span class="hll"><span class="c"># The last form is expanded in the script and avoids limitations on command .</span>
+</span><span class="hll"><span class="c"># line lengths .</span>
+</span><span class="hll"><span class="c"># .</span>
+</span><span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s">'-i'</span><span class="p">,</span> <span class="s">'--input'</span><span class="p">,</span> <span class="n">nargs</span><span class="o">=</span><span class="s">'+'</span><span class="p">,</span> <span class="n">metavar</span><span class="o">=</span><span class="s">"FILE"</span><span class="p">,</span> <span class="n">act [...]
+
+<span class="n">options</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+
+<span class="c"># standard python logger which can be synchronised across concurrent Ruffus tasks</span>
+<span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="s">"PARIMALA"</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span> [...]
+
+<span class="hll"><span class="c"># .</span>
+</span><span class="hll"><span class="c"># Useful code to turn input files into a flat list .</span>
+</span><span class="hll"><span class="c"># .</span>
+</span><span class="kn">from</span> <span class="nn">glob</span> <span class="kn">import</span> <span class="n">glob</span>
+<span class="n">original_data_files</span> <span class="o">=</span> <span class="p">[</span><span class="n">fn</span> <span class="k">for</span> <span class="n">grouped</span> <span class="ow">in</span> <span class="n">options</span><span class="o">.</span><span class="n">input</span> <span class="k">for</span> <span class="n">glob_spec</span> <span class="ow">in</span> <span class="n">grouped</span> <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span cl [...]
+<span class="k">if</span> <span class="ow">not</span> <span class="n">original_data_files</span><span class="p">:</span>
+ <span class="n">original_data_files</span> <span class="o">=</span> <span class="p">[[</span><span class="s">"C1W1_R1.fastq.gz"</span><span class="p">,</span> <span class="s">"C1W1_R2.fastq.gz"</span><span class="p">]]</span>
+ <span class="c">#raise Exception ("No matching files specified with --input.")</span>
+
+<span class="c"># <<<---- pipelined functions go here</span>
+
+<span class="hll"><span class="c">#_________________________________________________________________________________</span>
+</span><span class="hll"><span class="c"># .</span>
+</span><span class="hll"><span class="c"># Group together file pairs .</span>
+</span><span class="hll"><span class="c">#_________________________________________________________________________________</span>
+</span><span class="nd">@collate</span><span class="p">(</span><span class="n">original_data_files</span><span class="p">,</span>
+ <span class="c"># match file name up to the "R1.fastq.gz"</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">"([^/]+)R[12].fastq.gz$"</span><span class="p">),</span>
+ <span class="c"># Create output parameter supplied to next task</span>
+ <span class="p">[</span><span class="s">"{path[0]}/{1[0]}paired.R1.fastq.gz"</span><span class="p">,</span> <span class="c"># paired file 1</span>
+ <span class="s">"{path[0]}/{1[0]}paired.R2.fastq.gz"</span><span class="p">],</span> <span class="c"># paired file 2</span>
+ <span class="c"># Extra parameters for our own convenience and use</span>
+ <span class="p">[</span><span class="s">"{path[0]}/{1[0]}unpaired.R1.fastq.gz"</span><span class="p">,</span> <span class="c"># unpaired file 1</span>
+ <span class="s">"{path[0]}/{1[0]}unpaired.R2.fastq.gz"</span><span class="p">],</span> <span class="c"># unpaired file 2</span>
+ <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">trim_fastq</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_paired_files</span><span class="p">,</span> <span class="n">discarded_unpaired_files</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">input_files</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"One of read pairs </span><span class="si">%s</span><span class="s"> missing"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_files</span><span class="p">,))</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="p">(</span><span class="s">"java -jar ~/SPRING-SUMMER_2014/Softwares/Trimmomatic/Trimmomatic-0.32/trimmomatic-0.32.jar "</span>
+ <span class="s">" PE -phred33 "</span>
+ <span class="s">" {input_files[0]} {input_files[1]} "</span>
+ <span class="s">" {output_paired_files[0]} {output_paired_files[1]} "</span>
+ <span class="s">" {discarded_unpaired_files[0]} {discarded_unpaired_files[1]} "</span>
+ <span class="s">" LEADING:30 TRAILING:30 SLIDINGWINDOW:4:15 MINLEN:50 "</span>
+ <span class="p">)</span>
+
+ <span class="n">check_call</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+ <span class="k">with</span> <span class="n">logger_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"Hooray trim_fastq worked"</span><span class="p">)</span>
+
+<span class="hll"><span class="c">#_________________________________________________________________________________</span>
+</span><span class="hll"><span class="c"># .</span>
+</span><span class="hll"><span class="c"># Each file pair now makes its way down the rest of the pipeline as .</span>
+</span><span class="hll"><span class="c"># a couple .</span>
+</span><span class="hll"><span class="c">#_________________________________________________________________________________</span>
+</span><span class="nd">@transform</span><span class="p">(</span><span class="n">trim_fastq</span><span class="p">,</span>
+ <span class="c"># regular expression match on first of pe files</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">"([^/]+)paired.R1.fastq.gz$"</span><span class="p">),</span>
+ <span class="c"># Output parameter supplied to next task</span>
+ <span class="s">"{path[0]}/{1[0]}.sam"</span>
+
+ <span class="c"># Extra parameters for our own convenience and use</span>
+ <span class="s">"{path[0]}/{1[0]}.pe_soap_pe"</span><span class="p">,</span> <span class="c"># soap intermediate file</span>
+ <span class="s">"{path[0]}/{1[0]}.pe_soap_se"</span><span class="p">,</span> <span class="c"># soap intermediate file</span>
+ <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">align_seq</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">soap_pe_output_file</span><span class="p">,</span> <span class="n">soap_se_output_file</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">input_files</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"One of read pairs </span><span class="si">%s</span><span class="s"> missing"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_files</span><span class="p">,))</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="p">(</span><span class="s">"~/SPRING-SUMMER_2014/Softwares/soap2.21release/soap "</span>
+ <span class="s">" -a {input_files[0]} "</span>
+ <span class="s">" -b {input_files[1]} "</span>
+ <span class="s">" -D Y55_genome.fa.index* "</span>
+ <span class="s">" -o {soap_pe_output_file} -2 {soap_se_output_file} -m 400 -x 600"</span><span class="p">)</span>
+
+ <span class="n">check_call</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+
+ <span class="c">#Soap_to_sam</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="s">" perl ~/SPRING-SUMMER_2014/Softwares/soap2sam.pl -p {soap_pe_output_file} > {output_file}"</span>
+
+ <span class="n">check_call</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+
+ <span class="k">with</span> <span class="n">logger_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"Hooray align_seq worked"</span><span class="p">)</span>
+
+
+<span class="n">cmdline</span><span class="o">.</span><span class="n">run</span> <span class="p">(</span><span class="n">options</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="bioinformatics/part2_code.html"
+ title="previous chapter">Ruffus code</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../decorators/decorators.html"
+ title="next chapter">Ruffus Decorators</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../_sources/examples/paired_end_data.py.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../decorators/originate.html">@originate</a> </li>
+ <li><a href="../decorators/split.html">@split</a> </li>
+ <li><a href="../decorators/transform.html">@transform</a> </li>
+ <li><a href="../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../decorators/collate.html">@collate</a> </li>
+ <li><a href="../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../decorators/product.html">@product </a> </li>
+ <li><a href="../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../decorators/decorators.html" title="Ruffus Decorators"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="bioinformatics/part2_code.html" title="Ruffus code"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../index.html">Home</a> | </li>
+ <li><a href="../contents.html">Contents</a> | </li>
+ <li><a href="../installation.html">Install</a> | </li>
+ <li><a href="../tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../faq.html">FAQ</a> | </li>
+ <li><a href="../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../gallery.html">Gallery</a> | </li>
+ <li><a href="../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/faq.html b/doc/_build/html/faq.html
new file mode 100644
index 0000000..3f93593
--- /dev/null
+++ b/doc/_build/html/faq.html
@@ -0,0 +1,1115 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>FAQ — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Glossary" href="glossary.html" />
+ <link rel="prev" title="Implementation Tips" href="implementation_notes.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="glossary.html" title="Glossary"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="implementation_notes.html" title="Implementation Tips"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="#">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="faq">
+<h1>FAQ<a class="headerlink" href="#faq" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="citations">
+<h2>Citations<a class="headerlink" href="#citations" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="q-how-should-ruffus-be-cited-in-academic-publications">
+<h3>Q. How should <em>Ruffus</em> be cited in academic publications?<a class="headerlink" href="#q-how-should-ruffus-be-cited-in-academic-publications" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The official publication describing the original version of <em>Ruffus</em> is:</p>
+<blockquote>
+<div><a class="reference external" href="http://bioinformatics.oxfordjournals.org/content/early/2010/09/16/bioinformatics.btq524">Leo Goodstadt (2010)</a> : <strong>Ruffus: a lightweight Python library for computational pipelines.</strong> <em>Bioinformatics</em> 26(21): 2778-2779</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="good-practices">
+<h2>Good practices<a class="headerlink" href="#good-practices" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="q-what-is-the-best-way-of-keeping-my-data-and-workings-separate">
+<h3>Q. What is the best way of keeping my data and workings separate?<a class="headerlink" href="#q-what-is-the-best-way-of-keeping-my-data-and-workings-separate" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>It is good practice to run your pipeline in a temporary, “working” directory away from your original data.</p>
+<p>The first step of your pipeline might be to make softlinks to your original data in your working directory.
+This is example (relatively paranoid) code to do just this:</p>
+<div class="highlight-python"><div class="highlight"><pre> <span class="k">def</span> <span class="nf">re_symlink</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">soft_link_name</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="hll"><span class="sd"> Helper function: relinks soft symbolic link if necessary</span>
+</span><span class="sd"> """</span>
+<span class="hll"> <span class="c"># Guard agains soft linking to oneself: Disastrous consequences of deleting the original files!!</span>
+</span> <span class="k">if</span> <span class="n">input_file</span> <span class="o">==</span> <span class="n">soft_link_name</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"Warning: No symbolic link made. You are using the original data directory as the working directory."</span><span class="p">)</span>
+ <span class="k">return</span>
+ <span class="c"># Soft link already exists: delete for relink?</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">lexists</span><span class="p">(</span><span class="n">soft_link_name</span><span class="p">):</span>
+ <span class="c"># do not delete or overwrite real (non-soft link) file</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">islink</span><span class="p">(</span><span class="n">soft_link_name</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> exists and is not a link"</span> <span class="o">%</span> <span class="n">soft_link_name</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">soft_link_name</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"Can't unlink </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">soft_link_name</span><span class="p">))</span>
+ <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"os.symlink(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">soft_link_name</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="c"># symbolic link relative to original directory so that the entire path</span>
+ <span class="c"># can be moved around with breaking everything</span>
+ <span class="c">#</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">symlink</span><span class="p">(</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">input_file</span><span class="p">),</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">soft_link_name</span><span class="p">))),</span> <span class="n">soft_link_name</span><span class="p">)</span>
+
+ <span class="c">#</span>
+ <span class="c"># First task should soft link data to working directory</span>
+ <span class="c">#</span>
+ <span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
+ <span class="nd">@mkdir</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">working_dir</span><span class="p">)</span>
+ <span class="nd">@transform</span><span class="p">(</span> <span class="n">input_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+ <span class="c"># move to working directory</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"{basename[0]}{ext[0]}"</span><span class="p">),</span>
+ <span class="n">logger</span><span class="p">,</span> <span class="n">logging_mutex</span>
+ <span class="p">)</span>
+ <span class="k">def</span> <span class="nf">soft_link_inputs_to_working_directory</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">soft_link_name</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Make soft link in working directory</span>
+<span class="sd"> """</span>
+ <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Linking files </span><span class="si">%(input_file)s</span><span class="s"> -> </span><span class="si">%(soft_link_name)s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="nb">locals</span><span class="p">())</span>
+ <span class="n">re_symlink</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">soft_link_name</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="q-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc">
+<span id="faq-paired-files"></span><h3>Q. What is the best way of handling data in file pairs (or triplets etc.)<a class="headerlink" href="#q-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>In Bioinformatics, DNA data often consists of only the nucleotide sequence at the two ends of larger fragments.
+The <a class="reference external" href="http://www.illumina.com/technology/next-generation-sequencing/paired-end-sequencing_assay.ilmn">paired_end</a> or
+<a class="reference external" href="http://en.wikipedia.org/wiki/Shotgun_sequencing#Whole_genome_shotgun_sequencing">mate pair</a> data frequently
+consists of of file pairs with conveniently related names such as “<em>.R1.fastq” and “</em>.R2.fastq”.</p>
+<p>At some point in data pipeline, these file pairs or triplets must find each other and be analysed in the same job.</p>
+<p>Provided these file pairs or triplets are named consistently, an easiest way to regroup them is to use the
+Ruffus <a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#new-manual-collate"><em>@collate</em></a> decorator. For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@collate</span><span class="p">(</span><span class="n">original_data_files</span><span class="p">,</span>
+
+ <span class="c"># match file name up to the "R1.fastq.gz"</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">"([^/]+)R[12].fastq.gz$"</span><span class="p">),</span>
+
+ <span class="c"># Create output parameter supplied to next task</span>
+ <span class="s">"{path[0]}/{1[0]}.sam"</span><span class="p">,</span>
+ <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">handle_paired_end</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_paired_files</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">):</span>
+ <span class="c"># check that we really have a pair of two files not an orphaned singleton</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">input_files</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"One of read pairs </span><span class="si">%s</span><span class="s"> missing"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_files</span><span class="p">,))</span>
+
+ <span class="c"># do stuff here</span>
+</pre></div>
+</div>
+<p>This (incomplete, untested) <a class="reference internal" href="examples/paired_end_data.py.html#faq-paired-files-code"><em>example code</em></a> shows what this would look like <em>in vivo</em>.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="general">
+<h2>General<a class="headerlink" href="#general" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="q-ruffus-won-t-create-dependency-graphs">
+<h3>Q. <em>Ruffus</em> won’t create dependency graphs<a class="headerlink" href="#q-ruffus-won-t-create-dependency-graphs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. You need to have installed <tt class="docutils literal"><span class="pre">dot</span></tt> from <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> to produce
+pretty flowcharts likes this:</p>
+<blockquote>
+<div><img alt="_images/pretty_flowchart.png" src="_images/pretty_flowchart.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="q-ruffus-seems-to-be-hanging-in-the-same-place">
+<h3>Q. <em>Ruffus</em> seems to be hanging in the same place<a class="headerlink" href="#q-ruffus-seems-to-be-hanging-in-the-same-place" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. If <em>ruffus</em> is interrupted, for example, by a Ctrl-C,
+you will often find the following lines of code highlighted:</p>
+<div class="highlight-python"><pre>File "build/bdist.linux-x86_64/egg/ruffus/task.py", line 1904, in pipeline_run
+File "build/bdist.linux-x86_64/egg/ruffus/task.py", line 1380, in run_all_jobs_in_task
+File "/xxxx/python2.6/multiprocessing/pool.py", line 507, in next
+ self._cond.wait(timeout)
+File "/xxxxx/python2.6/threading.py", line 237, in wait
+ waiter.acquire()</pre>
+</div>
+<p>This is <em>not</em> where <em>ruffus</em> is hanging but the boundary between the main programme process
+and the sub-processes which run <em>ruffus</em> jobs in parallel.</p>
+<p>This is naturally where broken execution threads get washed up onto.</p>
+</div></blockquote>
+</div>
+<div class="section" id="q-regular-expression-substitutions-don-t-work">
+<h3>Q. Regular expression substitutions don’t work<a class="headerlink" href="#q-regular-expression-substitutions-don-t-work" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. If you are using the special regular expression forms <tt class="docutils literal"><span class="pre">"\1"</span></tt>, <tt class="docutils literal"><span class="pre">"\2"</span></tt> etc.
+to refer to matching groups, remember to ‘escape’ the subsitution pattern string.
+The best option is to use <a class="reference external" href="http://docs.python.org/library/re.html">‘raw’ python strings</a>.
+For example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="s">r"\1_substitutes\2correctly\3four\4times"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Ruffus will throw an exception if it sees an unescaped <tt class="docutils literal"><span class="pre">"\1"</span></tt> or <tt class="docutils literal"><span class="pre">"\2"</span></tt> in a file name.</p>
+</div></blockquote>
+</div>
+<div class="section" id="q-how-to-force-a-pipeline-to-appear-up-to-date">
+<h3>Q. How to force a pipeline to appear up to date?<a class="headerlink" href="#q-how-to-force-a-pipeline-to-appear-up-to-date" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><em>I have made a trivial modification to one of my data files and now Ruffus wants to rerun my month long pipeline. How can I convince Ruffus that everything is fine and to leave things as they are?</em></p>
+<p>The standard way to do what you are trying to do is to touch all the files downstream...
+That way the modification times of your analysis files would postdate your existing files.
+You can do this manually but Ruffus also provides direct support:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span> <span class="p">(</span><span class="n">touch_files_only</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>pipeline_run will run your script normally stepping over up-to-date tasks and starting
+with jobs which look out of date. However, after that, none of your pipeline task functions
+will be called, instead, each non-up-to-date file is <a class="reference external" href="https://en.wikipedia.org/wiki/Touch_(Unix)">touch</a>-ed in
+turn so that the file modification dates follow on successively.</p>
+<p>See the documentation for <a class="reference internal" href="pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a></p>
+<p>It is even simpler if you are using the new Ruffus.cmdline support from version 2.4. You can just type</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>your script --touch_files_only <span class="o">[</span>--other_options_of_your_own_etc<span class="o">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>See <a class="reference internal" href="tutorials/new_tutorial/command_line.html#new-manual-cmdline"><em>command line</em></a> documentation.</p>
+</div></blockquote>
+</div>
+<div class="section" id="q-how-can-i-use-my-own-decorators-with-ruffus">
+<h3>Q. How can I use my own decorators with Ruffus?<a class="headerlink" href="#q-how-can-i-use-my-own-decorators-with-ruffus" title="Permalink to this headline">¶</a></h3>
+<p>(Thanks to Radhouane Aniba for contributing to this answer.)</p>
+<ol class="upperalpha simple">
+<li>With care! If the following two points are observed:</li>
+</ol>
+<div class="section" id="use-wraps-from-functools-or-michele-simionato-s-decorator-module">
+<h4>1. Use <a class="reference external" href="https://docs.python.org/2/library/functools.html#functools.wraps">@wraps</a> from <tt class="docutils literal"><span class="pre">functools</span></tt> or Michele Simionato’s <a class="reference external" href="https://pypi.python.org/pypi/decorator">decorator</a> module<a class="headerlink" href="#use-wraps-from-functools-or-michele-simionato-s-decorator-module" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><p>These will automatically forward attributes from the task function correctly:</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">__name__</span></tt> and <tt class="docutils literal"><span class="pre">__module__</span></tt> is used to identify functions uniquely in a Ruffus pipeline, and</li>
+<li><tt class="docutils literal"><span class="pre">pipeline_task</span></tt> is used to hold per task data</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="always-call-ruffus-decorators-first-before-your-own-decorators">
+<h4>2. Always call Ruffus decorators first before your own decorators.<a class="headerlink" href="#always-call-ruffus-decorators-first-before-your-own-decorators" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><p>Otherwise, your decorator will be ignored.</p>
+<p>So this works:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">prev_task</span><span class="p">)</span>
+<span class="nd">@custom_decorator</span><span class="p">(</span><span class="n">something</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">test</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This is a bit futile</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># ignore @custom_decorator</span>
+<span class="nd">@custom_decorator</span><span class="p">(</span><span class="n">something</span><span class="p">)</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">prev_task</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">test</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This order dependency is an unfortunate quirk of how python decorators work. The last (rather futile)
+piece of code is equivalent to:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">test</span> <span class="o">=</span> <span class="n">custom_decorator</span><span class="p">(</span><span class="n">something</span><span class="p">)(</span><span class="n">ruffus</span><span class="o">.</span><span class="n">follows</span><span class="p">(</span><span class="n">prev_task</span><span class="p">)(</span><span class="n">test</span><span class="p">))</span>
+</pre></div>
+</div>
+<p>Unfortunately, Ruffus has no idea that someone else (<tt class="docutils literal"><span class="pre">custom_decorator</span></tt>) is also modifying the <tt class="docutils literal"><span class="pre">test()</span></tt> function
+after it (<tt class="docutils literal"><span class="pre">ruffus.follows</span></tt>) has had its go.</p>
+</div></blockquote>
+</div>
+<div class="section" id="example-decorator">
+<h4>Example decorator:<a class="headerlink" href="#example-decorator" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><p>Let us look at a decorator to time jobs:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span> <span class="nn">time</span>
+<span class="k">def</span> <span class="nf">time_func_call</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="sd">"""prints elapsed time to standard out, or any other file-like object with a .write() method.</span>
+<span class="sd"> """</span>
+ <span class="n">start</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
+ <span class="c"># Run the decorated function.</span>
+ <span class="n">ret</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="c"># Stop the timer.</span>
+ <span class="n">end</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
+ <span class="n">elapsed</span> <span class="o">=</span> <span class="n">end</span> <span class="o">-</span> <span class="n">start</span>
+ <span class="n">stream</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"{} took {} seconds</span><span class="se">\n</span><span class="s">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">__name__</span><span class="p">,</span> <span class="n">elapsed</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">ret</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="nd">@time_job</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"First task"</span>
+
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">)</span>
+<span class="nd">@time_job</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Second task"</span>
+
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">second_task</span><span class="p">)</span>
+<span class="nd">@time_job</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Final task"</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>What would <tt class="docutils literal"><span class="pre">@time_job</span></tt> look like?</p>
+</div></blockquote>
+</div>
+<div class="section" id="using-functools-wraps">
+<h4>1. Using functools <a class="reference external" href="https://docs.python.org/2/library/functools.html#functools.wraps">@wraps</a><a class="headerlink" href="#using-functools-wraps" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">functools</span>
+<span class="k">def</span> <span class="nf">time_job</span><span class="p">(</span><span class="n">stream</span><span class="o">=</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">actual_time_job</span><span class="p">(</span><span class="n">func</span><span class="p">):</span>
+ <span class="nd">@functools.wraps</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">wrapper</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">time_func_call</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">wrapper</span>
+ <span class="k">return</span> <span class="n">actual_time_job</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="using-michele-simionato-s-decorator-module">
+<h4>2. Using Michele Simionato’s <a class="reference external" href="https://pypi.python.org/pypi/decorator">decorator</a> module<a class="headerlink" href="#using-michele-simionato-s-decorator-module" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">decorator</span>
+<span class="k">def</span> <span class="nf">time_job</span><span class="p">(</span><span class="n">stream</span><span class="o">=</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">time_job</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">time_func_call</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">decorator</span><span class="o">.</span><span class="n">decorator</span><span class="p">(</span><span class="n">time_job</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="by-hand-using-a-callable-object">
+<h4>2. By hand, using a <a class="reference external" href="https://docs.python.org/2/reference/datamodel.html#emulating-callable-objects">callable object</a><a class="headerlink" href="#by-hand-using-a-callable-object" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="k">class</span> <span class="nc">time_job</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stream</span><span class="o">=</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">stream</span> <span class="o">=</span> <span class="n">stream</span>
+ <span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">inner</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">time_func_call</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">stream</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="c"># remember to forward __name__</span>
+ <span class="n">inner</span><span class="o">.</span><span class="n">__name__</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="n">inner</span><span class="o">.</span><span class="n">__module__</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">__module__</span>
+ <span class="n">inner</span><span class="o">.</span><span class="n">__doc__</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">__doc__</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="s">"pipeline_task"</span><span class="p">):</span>
+ <span class="n">inner</span><span class="o">.</span><span class="n">pipeline_task</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">pipeline_task</span>
+ <span class="k">return</span> <span class="n">inner</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="q-can-a-task-function-in-a-ruffus-pipeline-be-called-normally-outside-of-ruffus">
+<h3>Q. Can a task function in a <em>Ruffus</em> pipeline be called normally outside of Ruffus?<a class="headerlink" href="#q-can-a-task-function-in-a-ruffus-pipeline-be-called-normally-outside-of-ruffus" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. Yes. Most python decorators wrap themselves around a function. However, <em>Ruffus</em> leaves the
+original function untouched and unwrapped. Instead, <em>Ruffus</em> adds a <tt class="docutils literal"><span class="pre">pipeline_task</span></tt> attribute
+to the task function to signal that this is a pipelined function.</p>
+<p>This means the original task function can be called just like any other python function.</p>
+</div></blockquote>
+</div>
+<div class="section" id="q-my-ruffus-tasks-create-two-files-at-a-time-why-is-the-second-one-ignored-in-successive-stages-of-my-pipeline">
+<h3>Q. My <em>Ruffus</em> tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?<a class="headerlink" href="#q-my-ruffus-tasks-create-two-files-at-a-time-why-is-the-second-one-ignored-in-successive-stages-of-my-pipeline" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><em>This is my code:</em></p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="s">"start.input"</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">".+"</span><span class="p">),</span> <span class="p">(</span><span class="s">"first_output.txt"</span><span class="p">,</span> <span class="s">"second_output.txt"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="n">o</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".txt"</span><span class="p">),</span> <span class="s">".result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">task2</span><span class="p">],</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="highlight-python"><pre>________________________________________
+Tasks which will be run:
+
+Task = task1
+ Job = [start.input
+ ->[first_output.txt, second_output.txt]]
+
+Task = task2
+ Job = [[first_output.txt, second_output.txt]
+ ->first_output.result]
+
+________________________________________</pre>
+</div>
+</div></blockquote>
+<p>A: This code produces a single output of a tuple of 2 files. In fact, you want two
+outputs, each consisting of 1 file.</p>
+<p>You want a single job (single input) to be produce multiple outputs (multiple jobs
+in downstream tasks). This is a one-to-many operation which calls for
+<a class="reference internal" href="decorators/split.html#decorators-split"><em>@split</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+<span class="nd">@split</span><span class="p">(</span><span class="s">"start.input"</span><span class="p">,</span> <span class="p">(</span><span class="s">"first_output.txt"</span><span class="p">,</span> <span class="s">"second_output.txt"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="n">o</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".txt"</span><span class="p">),</span> <span class="s">".result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">task2</span><span class="p">],</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="highlight-python"><pre>________________________________________
+Tasks which will be run:
+
+Task = task1
+ Job = [start.input
+ ->[first_output.txt, second_output.txt]]
+
+Task = task2
+ Job = [first_output.txt
+ ->first_output.result]
+ Job = [second_output.txt
+ ->second_output.result]
+
+________________________________________</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="q-how-can-a-ruffus-task-produce-output-which-goes-off-in-different-directions">
+<h3>Q. How can a <em>Ruffus</em> task produce output which goes off in different directions?<a class="headerlink" href="#q-how-can-a-ruffus-task-produce-output-which-goes-off-in-different-directions" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. As above, anytime there is a situation which requires a one-to-many operation, you should reach
+for <a class="reference internal" href="decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a>. The advanced form takes a regular expression, making
+it easier to produce multiple derivatives of the input file. The following example subdivides
+<em>2</em> jobs each into <em>3</em>, so that the subsequence task will run <em>2</em> x <em>3</em> = <em>6</em> jobs.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+<span class="nd">@subdivide</span><span class="p">([</span><span class="s">"1.input_file"</span><span class="p">,</span>
+ <span class="s">"2.input_file"</span><span class="p">],</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.+).input_file"</span><span class="p">),</span> <span class="c"># match file prefix</span>
+ <span class="p">[</span><span class="s">r"\1.file_type1"</span><span class="p">,</span>
+ <span class="s">r"\1.file_type2"</span><span class="p">,</span>
+ <span class="s">r"\1.file_type3"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">split_task</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">split_task</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">"(.+)"</span><span class="p">),</span> <span class="s">r"\1.test"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">test_split_output</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">test_split_output</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Each of the original 2 files have been split in three so that test_split_output will run
+6 jobs simultaneously.</p>
+<blockquote>
+<div><div class="highlight-python"><pre>________________________________________
+Tasks which will be run:
+
+Task = split_task
+ Job = [1.input_file ->[1.file_type1, 1.file_type2, 1.file_type3]]
+ Job = [2.input_file ->[2.file_type1, 2.file_type2, 2.file_type3]]
+
+Task = test_split_output
+ Job = [1.file_type1 ->1.file_type1.test]
+ Job = [1.file_type2 ->1.file_type2.test]
+ Job = [1.file_type3 ->1.file_type3.test]
+ Job = [2.file_type1 ->2.file_type1.test]
+ Job = [2.file_type2 ->2.file_type2.test]
+ Job = [2.file_type3 ->2.file_type3.test]
+________________________________________</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="q-can-i-call-extra-code-before-each-job">
+<h3>Q. Can I call extra code before each job?<a class="headerlink" href="#q-can-i-call-extra-code-before-each-job" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. This is easily accomplished by hijacking the process
+for checking if jobs are up to date or not (<a class="reference internal" href="decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a>):</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+
+<span class="k">def</span> <span class="nf">run_this_before_each_job</span> <span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Calling function before each job using these args"</span><span class="p">,</span> <span class="n">args</span>
+ <span class="c"># Remember to delegate to the default *Ruffus* code for checking if</span>
+ <span class="c"># jobs need to run.</span>
+ <span class="k">return</span> <span class="n">needs_update_check_modify_time</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
+
+<span class="nd">@check_if_uptodate</span><span class="p">(</span><span class="n">run_this_before_each_job</span><span class="p">)</span>
+<span class="nd">@files</span><span class="p">([[</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">],</span> <span class="p">[</span><span class="bp">None</span><span class="p">,</span> <span class="s">"b.1"</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">task_func</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">task_func</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<div class="highlight-python"><pre>________________________________________
+>>> pipeline_run([task_func])
+Calling function before each job using these args (None, 'a.1')
+Calling function before each job using these args (None, 'a.1')
+Calling function before each job using these args (None, 'b.1')
+ Job = [None -> a.1] completed
+ Job = [None -> b.1] completed
+Completed Task = task_func</pre>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Because <tt class="docutils literal"><span class="pre">run_this_before_each_job(...)</span></tt> is called whenever <em>Ruffus</em> checks to see if
+a job is up to date or not, the function may be called twice for some jobs
+(e.g. <tt class="docutils literal"><span class="pre">(None,</span> <span class="pre">'a.1')</span></tt> above).</p>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="q-does-ruffus-allow-checkpointing-to-distinguish-interrupted-and-completed-results">
+<h3>Q. Does <em>Ruffus</em> allow checkpointing: to distinguish interrupted and completed results?<a class="headerlink" href="#q-does-ruffus-allow-checkpointing-to-distinguish-interrupted-and-completed-results" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="a-use-the-builtin-sqlite-checkpointing">
+<h4>A. Use the builtin sqlite checkpointing<a class="headerlink" href="#a-use-the-builtin-sqlite-checkpointing" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><p>By default, <tt class="docutils literal"><span class="pre">pipeline_run(...)</span></tt> will save the timestamps for output files from successfully run jobs to an sqlite database file (<tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt>) in the current directory .</p>
+<ul class="simple">
+<li>If you are using <tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt>, you can change the checksum / timestamp database file name on the command line using <tt class="docutils literal"><span class="pre">--checksum_file_name</span> <span class="pre">NNNN</span></tt></li>
+<li></li>
+</ul>
+<p>The level of timestamping / checksumming can be set via the <tt class="docutils literal"><span class="pre">checksum_level</span></tt> parameter:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">checksum_level</span> <span class="o">=</span> <span class="n">N</span><span class="p">,</span> <span class="o">...</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>where the default is 1:</p>
+<div class="highlight-python"><pre>level 0 : Use only file timestamps
+level 1 : above, plus timestamp of successful job completion
+level 2 : above, plus a checksum of the pipeline function body
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</pre>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="a-use-a-flag-file">
+<h4>A. Use a flag file<a class="headerlink" href="#a-use-a-flag-file" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><p>When gmake is interrupted, it will delete the target file it is updating so that the target is
+remade from scratch when make is next run. Ruffus, by design, does not do this because, more often than
+not, the partial / incomplete file may be usefully if only to reveal, for example, what might have caused an interrupting error
+or exception. It also seems a bit too clever and underhand to go around the programmer’s back to delete files...</p>
+<p>A common <em>Ruffus</em> convention is create an empty checkpoint or “flag” file whose sole purpose
+is to record a modification-time and the successful completion of a job.</p>
+<p>This would be task with a completion flag:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># Assuming a pipelined task function named "stage1"</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">stage1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".stage1"</span><span class="p">),</span> <span class="p">[</span><span class="s">".stage2"</span><span class="p">,</span> <span class="s">".stage2_finished"</span><span class="p">]</span> <span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage2</span> <span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="n">task_output_file</span><span class="p">,</span> <span class="n">flag_file</span> <span class="o">=</span> <span class="n">output_files</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="p">(</span><span class="s">"do_something2 </span><span class="si">%(input_file)s</span><span class="s"> >| </span><span class="si">%(task_output_file)s</span><span class="s"> "</span><span class="p">)</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="n">cmd</span> <span class="o">%</span> <span class="p">{</span>
+ <span class="s">"input_file"</span><span class="p">:</span> <span class="n">input_files</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
+ <span class="s">"task_output_file"</span><span class="p">:</span> <span class="n">task_output_file</span>
+ <span class="p">}</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">system</span><span class="p">(</span> <span class="n">cmd</span> <span class="p">):</span>
+ <span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+ <span class="c">#</span>
+ <span class="c"># It worked: Create completion flag_file</span>
+ <span class="c">#</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">flag_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+</pre></div>
+</div>
+<p>The flag_files <tt class="docutils literal"><span class="pre">xxx.stage2_finished</span></tt> indicate that each job is finished. If this is missing,
+<tt class="docutils literal"><span class="pre">xxx.stage2</span></tt> is only a partial, interrupted result.</p>
+<p>The only thing to be aware of is that the flag file will appear in the list of inputs of the
+downstream task, which should accordingly look like this:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">stage2</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".stage2"</span><span class="p">),</span> <span class="p">[</span><span class="s">".stage3"</span><span class="p">,</span> <span class="s">".stage3_finished"</span><span class="p">]</span> <span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage3</span> <span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+
+ <span class="c">#888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+ <span class="c">#</span>
+ <span class="c"># Note that the first parameter is a LIST of input files, the last of which</span>
+ <span class="c"># is the flag file from the previous task which we can ignore</span>
+ <span class="c">#</span>
+ <span class="n">input_file</span><span class="p">,</span> <span class="n">previous_flag_file</span> <span class="o">=</span> <span class="n">input_files</span>
+ <span class="c">#</span>
+ <span class="c">#888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+ <span class="n">task_output_file</span><span class="p">,</span> <span class="n">flag_file</span> <span class="o">=</span> <span class="n">output_files</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="p">(</span><span class="s">"do_something3 </span><span class="si">%(input_file)s</span><span class="s"> >| </span><span class="si">%(task_output_file)s</span><span class="s"> "</span><span class="p">)</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="n">cmd</span> <span class="o">%</span> <span class="p">{</span>
+ <span class="s">"input_file"</span><span class="p">:</span> <span class="n">input_file</span><span class="p">,</span>
+ <span class="s">"task_output_file"</span><span class="p">:</span> <span class="n">task_output_file</span>
+ <span class="p">}</span>
+ <span class="c"># completion flag file for this task</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">system</span><span class="p">(</span> <span class="n">cmd</span> <span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">flag_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>The <a class="reference internal" href="examples/bioinformatics/part2.html#examples-bioinformatics-part2-step2"><em>Bioinformatics example</em></a> contains <a class="reference internal" href="examples/bioinformatics/part2_code.html#examples-bioinformatics-part2-code"><em>code</em></a> for checkpointing.</p>
+</div></blockquote>
+</div>
+<div class="section" id="a-use-a-temp-file">
+<h4>A. Use a temp file<a class="headerlink" href="#a-use-a-temp-file" title="Permalink to this headline">¶</a></h4>
+<blockquote>
+<div><p>Thanks to Martin Goodson for suggesting this and providing an example. In his words:</p>
+<p>“I normally use a decorator to create a temporary file which is only renamed after the task has completed without any problems. This seems a more elegant solution to the problem:”</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="k">def</span> <span class="nf">usetemp</span><span class="p">(</span><span class="n">task_func</span><span class="p">):</span>
+ <span class="sd">""" Decorate a function to write to a tmp file and then rename it. So half finished tasks cannot create up to date targets.</span>
+<span class="sd"> """</span>
+ <span class="nd">@wraps</span><span class="p">(</span><span class="n">task_func</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">wrapper_function</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="n">args</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
+ <span class="n">outnames</span><span class="o">=</span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">outnames</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">outnames</span><span class="p">,</span> <span class="s">'__getitem__'</span><span class="p">):</span>
+ <span class="n">tmpnames</span><span class="o">=</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">+</span><span class="s">".tmp"</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">outnames</span><span class="p">]</span>
+ <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">=</span><span class="n">tmpnames</span>
+ <span class="n">task_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">tmp</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">tmpnames</span><span class="p">,</span> <span class="n">outnames</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">tmp</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
+ <span class="k">except</span> <span class="ne">BaseException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">outnames</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">tmp</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">outnames</span><span class="p">)</span><span class="o">+</span><span class="s">'.tmp'</span>
+ <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">=</span><span class="n">tmp</span>
+ <span class="n">task_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">outnames</span><span class="p">))</span>
+<span class="k">return</span> <span class="n">wrapper_function</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Use like this:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">'client1.price'</span><span class="p">)</span>
+<span class="nd">@usetemp</span>
+<span class="k">def</span> <span class="nf">getusers</span><span class="p">(</span><span class="n">inputfile</span><span class="p">,</span> <span class="n">outputname</span><span class="p">):</span>
+ <span class="c">#**************************************************</span>
+ <span class="c"># code goes here</span>
+ <span class="c"># outputname now refers to temporary file</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+<div class="section" id="windows">
+<h2>Windows<a class="headerlink" href="#windows" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="q-windows-seems-to-spawn-ruffus-processes-recursively">
+<h3>Q. Windows seems to spawn <em>ruffus</em> processes recursively<a class="headerlink" href="#q-windows-seems-to-spawn-ruffus-processes-recursively" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>A. It is necessary to protect the “entry point” of the program under windows.
+Otherwise, a new process will be started each time the main module is imported
+by a new Python interpreter as an unintended side effects. Causing a cascade
+of new processes.</p>
+<p>See: <a class="reference external" href="http://docs.python.org/library/multiprocessing.html#multiprocessing-programming">http://docs.python.org/library/multiprocessing.html#multiprocessing-programming</a></p>
+<p>This code works:</p>
+<div class="highlight-python"><pre>if __name__ == '__main__':
+ try:
+ pipeline_run([parallel_task], multiprocess = 5)
+except Exception, e:
+ print e.args</pre>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="sun-grid-engine-pbs-slurm-etc">
+<h2>Sun Grid Engine / PBS / SLURM etc<a class="headerlink" href="#sun-grid-engine-pbs-slurm-etc" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="q-can-ruffus-be-used-to-manage-a-cluster-or-grid-based-pipeline">
+<h3>Q. Can Ruffus be used to manage a cluster or grid based pipeline?<a class="headerlink" href="#q-can-ruffus-be-used-to-manage-a-cluster-or-grid-based-pipeline" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha simple">
+<li>Some minimum modifications have to be made to your <em>Ruffus</em> script to allow it to submit jobs to a cluster</li>
+</ol>
+<p>See <a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#new-manual-ruffus-drmaa-wrapper-run-job"><em>ruffus.drmaa_wrapper</em></a></p>
+<p>Thanks to Andreas Heger and others at CGAT and Bernie Pope for contributing ideas and code.</p>
+</div></blockquote>
+</div>
+<div class="section" id="q-when-i-submit-lots-of-jobs-via-sun-grid-engine-sge-the-head-node-occassionally-freezes-and-dies">
+<h3>Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies<a class="headerlink" href="#q-when-i-submit-lots-of-jobs-via-sun-grid-engine-sge-the-head-node-occassionally-freezes-and-dies" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha simple">
+<li>You need to use multithreading rather than multiprocessing. See <a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#new-manual-ruffus-drmaa-wrapper-run-job"><em>ruffus.drmaa_wrapper</em></a></li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="q-keeping-large-intermediate-files">
+<h3>Q. Keeping Large intermediate files<a class="headerlink" href="#q-keeping-large-intermediate-files" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Sometimes pipelines create a large number of intermediate files which might not be needed later.</p>
+<p>Unfortunately, the current design of <em>Ruffus</em> requires these files to hang around otherwise the pipeline
+will not know that it ran successfully.</p>
+<p>We have some tentative plans to get around this but in the meantime, Bernie Pope suggests
+truncating intermediate files in place, preserving timestamps:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># truncate a file to zero bytes, and preserve its original modification time</span>
+<span class="k">def</span> <span class="nf">zeroFile</span><span class="p">(</span><span class="nb">file</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="nb">file</span><span class="p">):</span>
+ <span class="c"># save the current time of the file</span>
+ <span class="n">timeInfo</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">stat</span><span class="p">(</span><span class="nb">file</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="nb">file</span><span class="p">,</span><span class="s">'w'</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">IOError</span><span class="p">:</span>
+ <span class="k">pass</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="c"># change the time of the file back to what it was</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">utime</span><span class="p">(</span><span class="nb">file</span><span class="p">,(</span><span class="n">timeInfo</span><span class="o">.</span><span class="n">st_atime</span><span class="p">,</span> <span class="n">timeInfo</span><span class="o">.</span><span class="n">st_mtime</span><span class="p">))</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="sharing-python-objects-between-ruffus-processes-running-concurrently">
+<h2>Sharing python objects between Ruffus processes running concurrently<a class="headerlink" href="#sharing-python-objects-between-ruffus-processes-running-concurrently" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The design of Ruffus envisages that much of the data flow in pipelines occurs in files but it is also possible to pass python objects in memory.</p>
+<p>Ruffus uses the <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html">multiprocessing</a> module and much of the following is a summary of what is covered
+in depth in the Python Standard Library <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#sharing-state-between-processes">Documentation</a>.</p>
+<p>Running Ruffus using <tt class="docutils literal"><span class="pre">pipeline_run(...,</span> <span class="pre">multiprocess</span> <span class="pre">=</span> <span class="pre">NNN)</span></tt> where <tt class="docutils literal"><span class="pre">NNN</span></tt> > 1 runs each job concurrently on up to <tt class="docutils literal"><span class="pre">NNN</span></tt> separate local processes.
+Each task function runs independently in a different python intepreter, possibly on a different CPU, in the most efficient way.
+However, this does mean we have to pay some attention to how data is sent across process boundaries (unlike the situation with <tt class="docutils literal"><span class="pre">pipeline_run(...,</span> <span class="pre">multithread</span> <span class="pre">=</span> <span class="pre">NNN)</span></tt> ).</p>
+<p>The python code and data which comprises your multitasking Ruffus job is sent to a separate process in three ways:</p>
+<ol class="arabic simple">
+<li>The python function code and data objects are <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickled</a>, i.e. converting into a byte stream, by the master process, sent to the remote process
+before being converted back into normal python (unpickling).</li>
+<li>The parameters for your jobs, i.e. what Ruffus calls your task functions with, are separately <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickled</a> and sent to the remote process via
+<a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue">multiprocessing.Queue</a></li>
+<li>You can share and synchronise other data yourselves. The canonical example is the logger provided by <tt class="docutils literal"><span class="pre">Ruffus.cmdline.setup_logging</span></tt></li>
+</ol>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Check that your function code and data can be <a class="reference external" href="http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled">pickled</a>.</p>
+<p class="last">Only functions, built-in functions and classes defined at the top level of a module are picklable.</p>
+</div>
+<p>The following answers are a short “how-to” for sharing and synchronising data yourselves.</p>
+</div></blockquote>
+<div class="section" id="can-ordinary-python-objects-be-shared-between-processes">
+<h3>Can ordinary python objects be shared between processes?<a class="headerlink" href="#can-ordinary-python-objects-be-shared-between-processes" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha">
+<li><p class="first">Objects which can be <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickled</a> can be shared as is. These include</p>
+<blockquote>
+<div><ul class="simple">
+<li>numbers</li>
+<li>strings</li>
+<li>tuples, lists, sets, and dictionaries containing only objects which can be <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickled</a>.</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">If these do not change during your pipeline, you can just use them without any further effort in your task.</p>
+</li>
+<li><p class="first">If you need to use the value at the point when the task function is <em>called</em>, then you need to pass the python object as parameters to your task.
+For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="hll"> <span class="c"># changing_list changes...</span>
+</span> <span class="nd">@transform</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">,</span> <span class="n">changing_list</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">next_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">changing_list</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">If you need to use the value when the task function is <em>run</em> then see <a class="reference internal" href="#how-about-synchronising-python-objects-in-real-time"><em>the following answer.</em></a>.</p>
+</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="why-am-i-getting-picklingerror">
+<h3>Why am I getting <tt class="docutils literal"><span class="pre">PicklingError</span></tt>?<a class="headerlink" href="#why-am-i-getting-picklingerror" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>What is happening? Didn’t <a class="reference external" href="https://en.wikipedia.org/wiki/Battle_of_the_Herrings">Joan of Arc</a> solve this once and for all?</p>
+<ol class="upperalpha">
+<li><p class="first">Some of the data or code in your function cannot be <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickled</a> and is being asked to be sent by python <tt class="docutils literal"><span class="pre">mulitprocessing</span></tt> across process boundaries.</p>
+<blockquote>
+<div><p>When you run your pipeline using multiprocess:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">logger</span> <span class="o">=</span> <span class="n">ruffusLoggerProxy</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>You will get the following errors:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="ne">Exception</span> <span class="ow">in</span> <span class="n">thread</span> <span class="n">Thread</span><span class="o">-</span><span class="mi">2</span><span class="p">:</span>
+<span class="n">Traceback</span> <span class="p">(</span><span class="n">most</span> <span class="n">recent</span> <span class="n">call</span> <span class="n">last</span><span class="p">):</span>
+ <span class="n">File</span> <span class="s">"/path/to/python/python2.7/threading.py"</span><span class="p">,</span> <span class="n">line</span> <span class="mi">808</span><span class="p">,</span> <span class="ow">in</span> <span class="n">__bootstrap_inner</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">run</span><span class="p">()</span>
+ <span class="n">File</span> <span class="s">"/path/to/python/python2.7/threading.py"</span><span class="p">,</span> <span class="n">line</span> <span class="mi">761</span><span class="p">,</span> <span class="ow">in</span> <span class="n">run</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__target</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">__args</span><span class="p">,</span> <span class="o">*</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">__kwargs</span><span class="p">)</span>
+ <span class="n">File</span> <span class="s">"/path/to/python/python2.7/multiprocessing/pool.py"</span><span class="p">,</span> <span class="n">line</span> <span class="mi">342</span><span class="p">,</span> <span class="ow">in</span> <span class="n">_handle_tasks</span>
+ <span class="n">put</span><span class="p">(</span><span class="n">task</span><span class="p">)</span>
+<span class="n">PicklingError</span><span class="p">:</span> <span class="n">Can</span><span class="s">'t pickle <type '</span><span class="n">function</span><span class="s">'>: attribute lookup __builtin__.function failed</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>which go away when you set <tt class="docutils literal"><span class="pre">pipeline_run([],</span> <span class="pre">multiprocess</span> <span class="pre">=</span> <span class="pre">1,</span> <span class="pre">...)</span></tt></p>
+</div></blockquote>
+</li>
+</ol>
+<p>Unfortunately, pickling errors are particularly ill-served by standard python error messages. The only really good advice is to take the offending
+code and try and <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickle</a> it yourself and narrow down the errors. Check your objects against the list
+in the <a class="reference external" href="http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled">pickle</a> module.
+Watch out especially for nested functions. These will have to be moved to file scope.
+Other objects may have to be passed in proxy (see below).</p>
+</div></blockquote>
+</div>
+<div class="section" id="how-about-synchronising-python-objects-in-real-time">
+<span id="id1"></span><h3>How about synchronising python objects in real time?<a class="headerlink" href="#how-about-synchronising-python-objects-in-real-time" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha simple">
+<li>You can use managers and proxy objects from the <a class="reference external" href="http://docs.python.org/library/multiprocessing.html">multiprocessing</a> module.</li>
+</ol>
+<p>The underlying python object would be owned and managed by a (hidden) server process. Other processes can access the shared objects transparently by using proxies. This is how the logger provided by
+<tt class="docutils literal"><span class="pre">Ruffus.cmdline.setup_logging</span></tt> works:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># optional logger which can be passed to ruffus tasks</span>
+<span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="n">__name__</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class= [...]
+</pre></div>
+</div>
+<p><tt class="docutils literal"><span class="pre">logger</span></tt> is a proxy for the underlying python <a class="reference external" href="http://docs.python.org/2/library/logging.html">logger</a> object, and it can be shared freely between processes.</p>
+<p>The best course is to pass <tt class="docutils literal"><span class="pre">logger</span></tt> as a parameter to a <em>Ruffus</em> task.</p>
+<p>The only caveat is that we should make sure multiple jobs are not writting to the log at the same time. To synchronise logging, we use proxy to a non-reentrant <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Lock">multiprocessing.lock</a>.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="n">__name__</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span cl [...]
+
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">next_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">):</span>
+ <span class="k">with</span> <span class="n">logger_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"We are in the middle of next_task: </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">))</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="can-i-share-and-synchronise-my-own-python-classes-via-proxies">
+<h3>Can I share and synchronise my own python classes via proxies?<a class="headerlink" href="#can-i-share-and-synchronise-my-own-python-classes-via-proxies" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha">
+<li><p class="first"><a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager">multiprocessing.managers.SyncManager</a> provides out of the box support for lists, arrays and dicts etc.</p>
+<blockquote>
+<div><p>Most of the time, we can use a “vanilla” manager provided by <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.multiprocessing.Manager">multiprocessing.Manager()</a>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">multiprocessing</span>
+<span class="n">manager</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">Manager</span><span class="p">()</span>
+
+<span class="n">list_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">list</span><span class="p">()</span>
+<span class="n">dict_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">dict</span><span class="p">()</span>
+<span class="n">lock_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Lock</span><span class="p">()</span>
+<span class="n">namespace_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Namespace</span><span class="p">()</span>
+<span class="n">queue_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Queue</span><span class="p">([</span><span class="n">maxsize</span><span class="p">])</span>
+<span class="n">rentrant_lock_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">RLock</span><span class="p">()</span>
+<span class="n">semaphore_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Semaphore</span><span class="p">([</span><span class="n">value</span><span class="p">])</span>
+<span class="n">char_array_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Array</span><span class="p">(</span><span class="s">'c'</span><span class="p">)</span>
+<span class="n">integer_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Value</span><span class="p">(</span><span class="s">'i'</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">,</span> <span class="n">lock_proxy</span><span class="p">,</span> <span class="n">dict_proxy</span><span class="p">,</span> <span class="n">list_proxy</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">next_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">lock_proxy</span><span class="p">,</span> <span class="n">dict_proxy</span><span class="p">,</span> <span class="n">list_proxy</span><span class="p">):</span>
+ <span class="k">with</span> <span class="n">lock_proxy</span><span class="p">:</span>
+ <span class="n">list_proxy</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
+ <span class="n">dict_proxy</span><span class="p">[</span><span class="s">'a'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">5</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+</ol>
+<p>However, you can also create proxy custom classes for your own objects.</p>
+<p>In this case you may need to derive from <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager">multiprocessing.managers.SyncManager</a>
+and register proxy functions. See <tt class="docutils literal"><span class="pre">Ruffus.proxy_logger</span></tt> for an example of how to do this.</p>
+</div></blockquote>
+</div>
+<div class="section" id="how-do-i-send-python-objects-back-and-forth-without-tangling-myself-in-horrible-synchronisation-code">
+<h3>How do I send python objects back and forth without tangling myself in horrible synchronisation code?<a class="headerlink" href="#how-do-i-send-python-objects-back-and-forth-without-tangling-myself-in-horrible-synchronisation-code" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha simple">
+<li>Sharing python objects by passing messages is a much more modern and safer way to coordinate multitasking than using synchronization primitives like locks.</li>
+</ol>
+<p>The python <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#pipes-and-queues">multiprocessing</a> module provides support for passing python objects as messages between processes.
+You can either use <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe">pipes</a>
+or <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue">queues</a>.
+The idea is that one process pushes and object onto a <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe">pipe</a> or <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue">queue</a>
+and the other processes pops it out at the other end. <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe">Pipes</a> are
+only two ended so <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue">queues</a> are usually a better fit for sending data to multiple Ruffus jobs.</p>
+<p>Proxies for <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager.Queue">queues</a> can be passed between processes as in the previous section</p>
+</div></blockquote>
+</div>
+<div class="section" id="how-do-i-share-large-amounts-of-data-efficiently-across-processes">
+<h3>How do I share large amounts of data efficiently across processes?<a class="headerlink" href="#how-do-i-share-large-amounts-of-data-efficiently-across-processes" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="upperalpha simple">
+<li>If it is really impractical to use data files on disk, you can put the data in shared memory.</li>
+</ol>
+<p>It is possible to create shared objects using shared memory which can be inherited by child processes or passed as Ruffus parameters.
+This is probably most efficently done via the <a class="reference external" href="http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Array">array</a>
+interface. Again, it is easy to create locks and proxies for synchronised access:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">multiprocessing</span> <span class="kn">import</span> <span class="n">Process</span><span class="p">,</span> <span class="n">Lock</span>
+<span class="kn">from</span> <span class="nn">multiprocessing.sharedctypes</span> <span class="kn">import</span> <span class="n">Value</span><span class="p">,</span> <span class="n">Array</span>
+<span class="kn">from</span> <span class="nn">ctypes</span> <span class="kn">import</span> <span class="n">Structure</span><span class="p">,</span> <span class="n">c_double</span>
+
+<span class="n">manager</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">Manager</span><span class="p">()</span>
+
+<span class="n">lock_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Lock</span><span class="p">()</span>
+<span class="n">int_array_proxy</span> <span class="o">=</span> <span class="n">manager</span><span class="o">.</span><span class="n">Array</span><span class="p">(</span><span class="s">'i'</span><span class="p">,</span> <span class="p">[</span><span class="mi">123</span><span class="p">]</span> <span class="o">*</span> <span class="mi">100</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">previous_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">,</span> <span class="n">lock_proxy</span><span class="p">,</span> <span class="n">int_array_proxy</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">next_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">lock_proxy</span><span class="p">,</span> <span class="n">int_array_proxy</span><span class="p">):</span>
+ <span class="k">with</span> <span class="n">lock_proxy</span><span class="p">:</span>
+ <span class="n">int_array_proxy</span><span class="p">[</span><span class="mi">23</span><span class="p">]</span> <span class="o">=</span> <span class="mi">71</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">FAQ</a><ul>
+<li><a class="reference internal" href="#citations">Citations</a><ul>
+<li><a class="reference internal" href="#q-how-should-ruffus-be-cited-in-academic-publications">Q. How should <em>Ruffus</em> be cited in academic publications?</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#good-practices">Good practices</a><ul>
+<li><a class="reference internal" href="#q-what-is-the-best-way-of-keeping-my-data-and-workings-separate">Q. What is the best way of keeping my data and workings separate?</a></li>
+<li><a class="reference internal" href="#q-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc">Q. What is the best way of handling data in file pairs (or triplets etc.)</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#general">General</a><ul>
+<li><a class="reference internal" href="#q-ruffus-won-t-create-dependency-graphs">Q. <em>Ruffus</em> won’t create dependency graphs</a></li>
+<li><a class="reference internal" href="#q-ruffus-seems-to-be-hanging-in-the-same-place">Q. <em>Ruffus</em> seems to be hanging in the same place</a></li>
+<li><a class="reference internal" href="#q-regular-expression-substitutions-don-t-work">Q. Regular expression substitutions don’t work</a></li>
+<li><a class="reference internal" href="#q-how-to-force-a-pipeline-to-appear-up-to-date">Q. How to force a pipeline to appear up to date?</a></li>
+<li><a class="reference internal" href="#q-how-can-i-use-my-own-decorators-with-ruffus">Q. How can I use my own decorators with Ruffus?</a><ul>
+<li><a class="reference internal" href="#use-wraps-from-functools-or-michele-simionato-s-decorator-module">1. Use @wraps from <tt class="docutils literal"><span class="pre">functools</span></tt> or Michele Simionato’s decorator module</a></li>
+<li><a class="reference internal" href="#always-call-ruffus-decorators-first-before-your-own-decorators">2. Always call Ruffus decorators first before your own decorators.</a></li>
+<li><a class="reference internal" href="#example-decorator">Example decorator:</a></li>
+<li><a class="reference internal" href="#using-functools-wraps">1. Using functools @wraps</a></li>
+<li><a class="reference internal" href="#using-michele-simionato-s-decorator-module">2. Using Michele Simionato’s decorator module</a></li>
+<li><a class="reference internal" href="#by-hand-using-a-callable-object">2. By hand, using a callable object</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#q-can-a-task-function-in-a-ruffus-pipeline-be-called-normally-outside-of-ruffus">Q. Can a task function in a <em>Ruffus</em> pipeline be called normally outside of Ruffus?</a></li>
+<li><a class="reference internal" href="#q-my-ruffus-tasks-create-two-files-at-a-time-why-is-the-second-one-ignored-in-successive-stages-of-my-pipeline">Q. My <em>Ruffus</em> tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?</a></li>
+<li><a class="reference internal" href="#q-how-can-a-ruffus-task-produce-output-which-goes-off-in-different-directions">Q. How can a <em>Ruffus</em> task produce output which goes off in different directions?</a></li>
+<li><a class="reference internal" href="#q-can-i-call-extra-code-before-each-job">Q. Can I call extra code before each job?</a></li>
+<li><a class="reference internal" href="#q-does-ruffus-allow-checkpointing-to-distinguish-interrupted-and-completed-results">Q. Does <em>Ruffus</em> allow checkpointing: to distinguish interrupted and completed results?</a><ul>
+<li><a class="reference internal" href="#a-use-the-builtin-sqlite-checkpointing">A. Use the builtin sqlite checkpointing</a></li>
+<li><a class="reference internal" href="#a-use-a-flag-file">A. Use a flag file</a></li>
+<li><a class="reference internal" href="#a-use-a-temp-file">A. Use a temp file</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#windows">Windows</a><ul>
+<li><a class="reference internal" href="#q-windows-seems-to-spawn-ruffus-processes-recursively">Q. Windows seems to spawn <em>ruffus</em> processes recursively</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#sun-grid-engine-pbs-slurm-etc">Sun Grid Engine / PBS / SLURM etc</a><ul>
+<li><a class="reference internal" href="#q-can-ruffus-be-used-to-manage-a-cluster-or-grid-based-pipeline">Q. Can Ruffus be used to manage a cluster or grid based pipeline?</a></li>
+<li><a class="reference internal" href="#q-when-i-submit-lots-of-jobs-via-sun-grid-engine-sge-the-head-node-occassionally-freezes-and-dies">Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies</a></li>
+<li><a class="reference internal" href="#q-keeping-large-intermediate-files">Q. Keeping Large intermediate files</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#sharing-python-objects-between-ruffus-processes-running-concurrently">Sharing python objects between Ruffus processes running concurrently</a><ul>
+<li><a class="reference internal" href="#can-ordinary-python-objects-be-shared-between-processes">Can ordinary python objects be shared between processes?</a></li>
+<li><a class="reference internal" href="#why-am-i-getting-picklingerror">Why am I getting <tt class="docutils literal"><span class="pre">PicklingError</span></tt>?</a></li>
+<li><a class="reference internal" href="#how-about-synchronising-python-objects-in-real-time">How about synchronising python objects in real time?</a></li>
+<li><a class="reference internal" href="#can-i-share-and-synchronise-my-own-python-classes-via-proxies">Can I share and synchronise my own python classes via proxies?</a></li>
+<li><a class="reference internal" href="#how-do-i-send-python-objects-back-and-forth-without-tangling-myself-in-horrible-synchronisation-code">How do I send python objects back and forth without tangling myself in horrible synchronisation code?</a></li>
+<li><a class="reference internal" href="#how-do-i-share-large-amounts-of-data-efficiently-across-processes">How do I share large amounts of data efficiently across processes?</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="implementation_notes.html"
+ title="previous chapter">Implementation Tips</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="glossary.html"
+ title="next chapter">Glossary</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/faq.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="glossary.html" title="Glossary"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="implementation_notes.html" title="Implementation Tips"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="#">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/gallery.html b/doc/_build/html/gallery.html
new file mode 100644
index 0000000..4135db5
--- /dev/null
+++ b/doc/_build/html/gallery.html
@@ -0,0 +1,236 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Hall of Fame: User contributed flowcharts — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Why Ruffus?" href="why_ruffus.html" />
+ <link rel="prev" title="Glossary" href="glossary.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="why_ruffus.html" title="Why Ruffus?"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="glossary.html" title="Glossary"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="#">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <img alt="_images/logo.jpg" src="_images/logo.jpg" />
+<div class="section" id="hall-of-fame-user-contributed-flowcharts">
+<h1>Hall of Fame: User contributed flowcharts<a class="headerlink" href="#hall-of-fame-user-contributed-flowcharts" title="Permalink to this headline">¶</a></h1>
+<p>Please contribute your own work flows in your favourite colours with (an optional) short description
+to email: ruffus_lib at llew.org.uk</p>
+<div class="section" id="rnaseq-pipeline">
+<h2>RNASeq pipeline<a class="headerlink" href="#rnaseq-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference external" href="http://en.wikipedia.org/wiki/RNA-Seq">http://en.wikipedia.org/wiki/RNA-Seq</a></p>
+<p>Mapping transcripts onto genomes using high-throughput sequencing technologies (<a class="reference download internal" href="_downloads/gallery_rna_seq.svg"><tt class="xref download docutils literal"><span class="pre">svg</span></tt></a>).</p>
+<a class="reference external image-reference" href="_downloads/gallery_rna_seq.svg"><img alt="_images/gallery_rna_seq.png" src="_images/gallery_rna_seq.png" /></a>
+</div></blockquote>
+</div>
+<div class="section" id="non-coding-evolutionary-constraints">
+<h2>non-coding evolutionary constraints<a class="headerlink" href="#non-coding-evolutionary-constraints" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference external" href="http://en.wikipedia.org/wiki/Noncoding_DNA">http://en.wikipedia.org/wiki/Noncoding_DNA</a></p>
+<p>Non-protein coding evolutionary constraints in different species (<a class="reference download internal" href="_downloads/gallery_dless.svg"><tt class="xref download docutils literal"><span class="pre">svg</span></tt></a>).</p>
+<a class="reference external image-reference" href="_downloads/gallery_dless.svg"><img alt="_images/gallery_dless.png" src="_images/gallery_dless.png" /></a>
+</div></blockquote>
+</div>
+<div class="section" id="snp-annotation">
+<h2>SNP annotation<a class="headerlink" href="#snp-annotation" title="Permalink to this headline">¶</a></h2>
+<p>Predicting impact of different Single Nucleotide Polymorphisms</p>
+<p><a class="reference external" href="http://en.wikipedia.org/wiki/Single-nucleotide_polymorphism">http://en.wikipedia.org/wiki/Single-nucleotide_polymorphism</a></p>
+<p>Population variation across genomes (<a class="reference download internal" href="_downloads/gallery_snp_annotation.svg"><tt class="xref download docutils literal"><span class="pre">svg</span></tt></a>).</p>
+<a class="reference external image-reference" href="_downloads/gallery_snp_annotation.svg"><img alt="_images/gallery_snp_annotation.png" src="_images/gallery_snp_annotation.png" /></a>
+<p>Using “pseudo” targets to run only part of the pipeline (<a class="reference download internal" href="_downloads/gallery_snp_annotation_consequences.svg"><tt class="xref download docutils literal"><span class="pre">svg</span></tt></a>).</p>
+<a class="reference external image-reference" href="_downloads/gallery_snp_annotation_consequences.svg"><img alt="_images/gallery_snp_annotation_consequences.png" src="_images/gallery_snp_annotation_consequences.png" /></a>
+</div>
+<div class="section" id="chip-seq-analysis">
+<h2>Chip-Seq analysis<a class="headerlink" href="#chip-seq-analysis" title="Permalink to this headline">¶</a></h2>
+<p>Analysing DNA binding sites with Chip-Seq
+<a class="reference external" href="http://en.wikipedia.org/wiki/Chip-Sequencing">http://en.wikipedia.org/wiki/Chip-Sequencing</a></p>
+<blockquote>
+<div><p>(<a class="reference download internal" href="_downloads/gallery_big_pipeline.svg"><tt class="xref download docutils literal"><span class="pre">svg</span></tt></a>)</p>
+<a class="reference external image-reference" href="_downloads/gallery_big_pipeline.svg"><img alt="_images/gallery_big_pipeline.png" src="_images/gallery_big_pipeline.png" /></a>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Hall of Fame: User contributed flowcharts</a><ul>
+<li><a class="reference internal" href="#rnaseq-pipeline">RNASeq pipeline</a></li>
+<li><a class="reference internal" href="#non-coding-evolutionary-constraints">non-coding evolutionary constraints</a></li>
+<li><a class="reference internal" href="#snp-annotation">SNP annotation</a></li>
+<li><a class="reference internal" href="#chip-seq-analysis">Chip-Seq analysis</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="glossary.html"
+ title="previous chapter">Glossary</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="why_ruffus.html"
+ title="next chapter">Why <em>Ruffus</em>?</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/gallery.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="why_ruffus.html" title="Why Ruffus?"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="glossary.html" title="Glossary"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="#">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/genindex.html b/doc/_build/html/genindex.html
new file mode 100644
index 0000000..a400bec
--- /dev/null
+++ b/doc/_build/html/genindex.html
@@ -0,0 +1,2200 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Index — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="#" title="General Index"
+ accesskey="I">index</a></li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+
+<h1 id="index">Index</h1>
+
+<div class="genindex-jumpbox">
+ <a href="#Symbols"><strong>Symbols</strong></a>
+ | <a href="#A"><strong>A</strong></a>
+ | <a href="#B"><strong>B</strong></a>
+ | <a href="#C"><strong>C</strong></a>
+ | <a href="#D"><strong>D</strong></a>
+ | <a href="#E"><strong>E</strong></a>
+ | <a href="#F"><strong>F</strong></a>
+ | <a href="#G"><strong>G</strong></a>
+ | <a href="#I"><strong>I</strong></a>
+ | <a href="#J"><strong>J</strong></a>
+ | <a href="#L"><strong>L</strong></a>
+ | <a href="#M"><strong>M</strong></a>
+ | <a href="#N"><strong>N</strong></a>
+ | <a href="#O"><strong>O</strong></a>
+ | <a href="#P"><strong>P</strong></a>
+ | <a href="#R"><strong>R</strong></a>
+ | <a href="#S"><strong>S</strong></a>
+ | <a href="#T"><strong>T</strong></a>
+ | <a href="#U"><strong>U</strong></a>
+
+</div>
+<h2 id="Symbols">Symbols</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ @active_if
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/active_if.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/active_if.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @check_if_uptodate
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/check_if_uptodate.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @collate
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/collate.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/subdivide_collate.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @collate (Advanced Usage)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/collate_ex.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @collate, add_inputs(...)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/collate_ex.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @collate, inputs(...)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/collate_ex.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @combinations
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/combinations.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @combinations_with_replacement
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/combinations_with_replacement.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @files
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-1">Manual</a>
+ </dt>
+
+
+ <dt><a href="decorators/files.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/onthefly.html#index-1">Tutorial on-the-fly parameter generation</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-3">check if up to date</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-2">in parallel</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @files (on-the-fly parameter generation)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/files_ex.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @files_re
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/files_re.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-9">combine (Deprecated Syntax)</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @follow
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-3">imposing order with</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @follows
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/follows.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-4">mkdir (Manual)</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-6">mkdir (Syntax)</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @graphviz
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/graphviz.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ @jobs_limit
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/jobs_limit.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-3">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @merge
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/merge.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @mkdir
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/mkdir.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @originate
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/originate.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @parallel
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/parallel.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/parallel.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @permutations
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/permutations.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @posttask
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/posttask.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-7">touch_file (Syntax)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/posttask.html#index-1">touchfile (Manual)</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @product
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/product.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @split
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/split.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @subdivide
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/subdivide.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/subdivide_collate.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @transform
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/transform.html#index-0">Syntax</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-2">multiple dependencies</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @transform, add_inputs(...)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/transform_ex.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ @transform, inputs(...)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/transform_ex.html#index-0">Syntax</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="A">A</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="design.html#index-2">Acknowledgements</a>
+ </dt>
+
+
+ <dt>
+ add_inputs
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-4">Indicator Object (Adding additional input parameters)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/inputs.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="task.html#ruffus.task.args_param_factory">args_param_factory() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+</tr></table>
+
+<h2 id="B">B</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-1">break</a>
+ </dt>
+
+ </dl></td>
+</tr></table>
+
+<h2 id="C">C</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ check if up to date
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-3">@files</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ check_if_uptodate
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/check_if_uptodate.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Checking dependencies
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/dependencies.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.collate_param_factory">collate_param_factory() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ combinatorics
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/combinatorics.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ combine
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-9">@follows (Deprecated Syntax)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files_re.html#index-1">Manual</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ command line
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/command_line.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Comparison of Ruffus with alternatives
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="design.html#index-1">Design</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="D">D</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ data sharing across processes
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-2">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="glossary.html#term-decorator"><strong>decorator</strong></a>
+ </dt>
+
+
+ <dt>
+ decorators_compendium
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/decorators_compendium.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ defining tasks out of order
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-1">output_from</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ deprecated @files
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ deprecated @files_re
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files_re.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Design
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="design.html#index-1">Comparison of Ruffus with alternatives</a>
+ </dt>
+
+
+ <dt><a href="design.html#index-0">Ruffus</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ drmaa
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="drmaa_wrapper_functions.html#index-0">run_job</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="E">E</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-1">errors</a>
+ </dt>
+
+
+ <dt>
+ Etymology
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="why_ruffus.html#index-0">Ruffus</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Exception
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-4">Missing input files</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ Exceptions
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-1">exceptions</a>
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="F">F</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="task.html#ruffus.task.files_param_factory">files_param_factory() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt>
+ flag files
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-6">Manual</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ flowchart colours
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/flowchart_colours.html#index-0">Tutorial</a>, <a href="tutorials/new_tutorial/flowchart_colours_code.html#index-0">[1]</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ for rerunning jobs
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-3">rules</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ formatter
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-1">Indicator Object (Disambiguating parameters)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="G">G</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="glossary.html#term-generator"><strong>generator</strong></a>
+ </dt>
+
+
+ <dt>
+ globs
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-5">inputs parameters</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ globs in input parameters
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-5">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="I">I</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="tutorials/new_tutorial/introduction.html#index-1">importing ruffus</a>
+ </dt>
+
+
+ <dt>
+ imposing order with
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-3">@follow</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ in parallel
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-2">@files</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Indicator Object (Adding additional input parameters)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-4">add_inputs</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="decorators/indicator_objects.html#index-0">Indicator Object (Disambiguating parameters)</a>
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-9">combine</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-1">formatter</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-6">mkdir</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-8">output_from</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-3">regex</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-2">suffix</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-7">touch_file</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Indicator Object (Replacing input parameters)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-5">inputs</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ input / output parameters
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform.html#index-2">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ inputs
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-5">Indicator Object (Replacing input parameters)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/inputs.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ inputs parameters
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-5">globs</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Interrupted Pipeline
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ interrupting tasks
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-1">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-1">interrupts</a>
+ </dt>
+
+ </dl></td>
+</tr></table>
+
+<h2 id="J">J</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="glossary.html#term-job"><strong>job</strong></a>
+ </dt>
+
+
+ <dt><a href="task.html#ruffus.task.job_wrapper_generic">job_wrapper_generic() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="task.html#ruffus.task.job_wrapper_io_files">job_wrapper_io_files() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt><a href="task.html#ruffus.task.job_wrapper_mkdir">job_wrapper_mkdir() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+</tr></table>
+
+<h2 id="L">L</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ logging
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ logging customising
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-3">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ logging with ruffus.cmdline
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-2">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ logging your own message
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-4">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="M">M</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="proxy_logger.html#ruffus.proxy_logger.make_shared_logger_and_proxy">make_shared_logger_and_proxy() (in module ruffus.proxy_logger)</a>
+ </dt>
+
+
+ <dt>
+ Manual
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-1">@files</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-5">Timestamp resolution</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files_re.html#index-1">combine</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-6">flag files</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ merge
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/merge.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.merge_param_factory">merge_param_factory() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt>
+ Missing input files
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-4">Exception</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ Mixing tasks, globs and file names
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-6">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ mkdir
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-4">@follows (Manual)</a>
+ </dt>
+
+
+ <dt><a href="decorators/indicator_objects.html#index-6">@follows (Syntax)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/mkdir.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ multiple dependencies
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-2">@transform</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-1">multiple errors</a>
+ </dt>
+
+
+ <dt>
+ multiprocessing
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="N">N</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ Name origins
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="why_ruffus.html#index-0">Ruffus</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.needs_update_check_directory_missing">needs_update_check_directory_missing() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="task.html#ruffus.task.needs_update_check_modify_time">needs_update_check_modify_time() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+</tr></table>
+
+<h2 id="O">O</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ on_the_fly
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/onthefly.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ one to one @transform
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform.html#index-1">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ originate
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/originate.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ output file names
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ output_from
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-8">Indicator Object (Disambiguating parameters)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-1">defining tasks out of order</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-1">referring to functions before they are defined</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ overview
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/introduction.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="P">P</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ pipeline functions
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-3">pipeline_get_task_names</a>
+ </dt>
+
+
+ <dt><a href="pipeline_functions.html#index-2">pipeline_printout_graph</a>
+ </dt>
+
+
+ <dt><a href="pipeline_functions.html#index-0">pipeline_run</a>, <a href="pipeline_functions.html#index-1">[1]</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ pipeline_get_task_names
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-3">print list of task names without running the pipeline</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ pipeline_printout
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-1">Printout simulated run of the pipeline</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/pipeline_printout.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.pipeline_printout">pipeline_printout() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt>
+ pipeline_printout_graph
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/pipeline_printout_graph.html#index-0">Tutorial</a>
+ </dt>
+
+
+ <dt><a href="pipeline_functions.html#index-2">print flowchart representation of pipeline functions</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.pipeline_printout_graph">pipeline_printout_graph() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt>
+ pipeline_run
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-0">Run pipeline</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/introduction.html#index-2">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ pipeline_run touch mode
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-4">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ pipeline_run verbosity
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-1">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.pipeline_run">pipeline_run() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt>
+ pipeline_run(multiprocess)
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-1">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ posttask
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/posttask.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ print flowchart representation of pipeline functions
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-2">pipeline_printout_graph</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ print list of task names without running the pipeline
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-3">pipeline_get_task_names</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Printout simulated run of the pipeline
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-1">pipeline_printout</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="R">R</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ referring to functions before they are defined
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-1">output_from</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Regenerating the checkpoint file
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-2">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ regex
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-3">Indicator Object (Disambiguating parameters)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Ruffus
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="design.html#index-0">Design</a>
+ </dt>
+
+
+ <dt><a href="why_ruffus.html#index-0">Etymology</a>
+ </dt>
+
+
+ <dt><a href="why_ruffus.html#index-0">Name origins</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Ruffus names list
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/list_of_ruffus_names.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="proxy_logger.html#module-ruffus.proxy_logger">ruffus.proxy_logger (module)</a>
+ </dt>
+
+
+ <dt>
+ rules
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-3">for rerunning jobs</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Run drmaa
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="drmaa_wrapper_functions.html#index-0">run_job</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Run pipeline
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="pipeline_functions.html#index-0">pipeline_run</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ run_job
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="drmaa_wrapper_functions.html#index-0">Run drmaa</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="S">S</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="proxy_logger.html#ruffus.proxy_logger.setup_std_shared_logger">setup_std_shared_logger() (in module ruffus.proxy_logger)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-1">signalling</a>
+ </dt>
+
+
+ <dt>
+ split
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/split.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.split_param_factory">split_param_factory() (in module ruffus.task)</a>
+ </dt>
+
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ string substiution for inputs
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/inputs.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ suffix
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-2">Indicator Object (Disambiguating parameters)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Syntax
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/active_if.html#index-0">@active_if</a>
+ </dt>
+
+
+ <dt><a href="decorators/check_if_uptodate.html#index-0">@check_if_uptodate</a>
+ </dt>
+
+
+ <dt><a href="decorators/collate.html#index-0">@collate</a>
+ </dt>
+
+
+ <dt><a href="decorators/collate_ex.html#index-0">@collate (Advanced Usage)</a>
+ </dt>
+
+
+ <dt><a href="decorators/collate_ex.html#index-0">@collate, add_inputs(...)</a>
+ </dt>
+
+
+ <dt><a href="decorators/collate_ex.html#index-0">@collate, inputs(...)</a>
+ </dt>
+
+
+ <dt><a href="decorators/combinations.html#index-0">@combinations</a>
+ </dt>
+
+
+ <dt><a href="decorators/combinations_with_replacement.html#index-0">@combinations_with_replacement</a>
+ </dt>
+
+
+ <dt><a href="decorators/files.html#index-0">@files</a>
+ </dt>
+
+
+ <dt><a href="decorators/files_ex.html#index-0">@files (on-the-fly parameter generation)</a>
+ </dt>
+
+
+ <dt><a href="decorators/files_re.html#index-0">@files_re</a>
+ </dt>
+
+
+ <dt><a href="decorators/follows.html#index-0">@follows</a>
+ </dt>
+
+
+ <dt><a href="decorators/graphviz.html#index-0">@graphviz</a>
+ </dt>
+
+
+ <dt><a href="decorators/jobs_limit.html#index-0">@jobs_limit</a>
+ </dt>
+
+
+ <dt><a href="decorators/merge.html#index-0">@merge</a>
+ </dt>
+
+
+ <dt><a href="decorators/mkdir.html#index-0">@mkdir</a>
+ </dt>
+
+
+ <dt><a href="decorators/originate.html#index-0">@originate</a>
+ </dt>
+
+
+ <dt><a href="decorators/parallel.html#index-0">@parallel</a>
+ </dt>
+
+
+ <dt><a href="decorators/permutations.html#index-0">@permutations</a>
+ </dt>
+
+
+ <dt><a href="decorators/posttask.html#index-0">@posttask</a>
+ </dt>
+
+
+ <dt><a href="decorators/product.html#index-0">@product</a>
+ </dt>
+
+
+ <dt><a href="decorators/split.html#index-0">@split</a>
+ </dt>
+
+
+ <dt><a href="decorators/subdivide.html#index-0">@subdivide</a>
+ </dt>
+
+
+ <dt><a href="decorators/transform.html#index-0">@transform</a>
+ </dt>
+
+
+ <dt><a href="decorators/transform_ex.html#index-0">@transform, add_inputs(...)</a>
+ </dt>
+
+
+ <dt><a href="decorators/transform_ex.html#index-0">@transform, inputs(...)</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="T">T</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt><a href="task.html#ruffus.task.t_black_hole_logger">t_black_hole_logger (class in ruffus.task)</a>
+ </dt>
+
+
+ <dt><a href="task.html#ruffus.task.t_stderr_logger">t_stderr_logger (class in ruffus.task)</a>
+ </dt>
+
+
+ <dt><a href="glossary.html#term-task"><strong>task</strong></a>
+ </dt>
+
+
+ <dt>
+ Task completion
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Timestamp resolution
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-5">Manual</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ touch mode pipeline_run
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-4">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ touch_file
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="decorators/indicator_objects.html#index-7">@posttask (Syntax)</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ touchfile
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/posttask.html#index-1">@posttask (Manual)</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ transform
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt><a href="task.html#ruffus.task.transform_param_factory">transform_param_factory() (in module ruffus.task)</a>
+ </dt>
+
+
+ <dt>
+ transforming in parallel
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Tutorial
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/active_if.html#index-0">@active_if</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/subdivide_collate.html#index-0">@collate</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-3">@jobs_limit</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/parallel.html#index-0">@parallel</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/subdivide_collate.html#index-0">@subdivide</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/dependencies.html#index-0">Checking dependencies</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Exceptions</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Interrupted Pipeline</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-6">Mixing tasks, globs and file names</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-2">Regenerating the checkpoint file</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/list_of_ruffus_names.html#index-0">Ruffus names list</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Task completion</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Up to date</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/inputs.html#index-0">add_inputs</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/check_if_uptodate.html#index-0">check_if_uptodate</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/combinatorics.html#index-0">combinatorics</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/command_line.html#index-0">command line</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-2">data sharing across processes</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/decorators_compendium.html#index-0">decorators_compendium</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files.html#index-0">deprecated @files</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/deprecated_files_re.html#index-0">deprecated @files_re</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/exceptions.html#index-0">exceptions</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/flowchart_colours.html#index-0">flowchart colours</a>, <a href="tutorials/new_tutorial/flowchart_colours_code.html#index-0">[1]</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">formatter</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-5">globs in input parameters</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform.html#index-2">input / output parameters</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/inputs.html#index-0">inputs</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-1">interrupting tasks</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-0">logging</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-3">logging customising</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-2">logging with ruffus.cmdline</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-4">logging your own message</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/merge.html#index-0">merge</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/mkdir.html#index-0">mkdir</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-0">multiprocessing</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/onthefly.html#index-0">on_the_fly</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform.html#index-1">one to one @transform</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/originate.html#index-0">originate</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">output file names</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/introduction.html#index-0">overview</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/pipeline_printout.html#index-0">pipeline_printout</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/pipeline_printout_graph.html#index-0">pipeline_printout_graph</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/introduction.html#index-2">pipeline_run</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-4">pipeline_run touch mode</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/logging.html#index-1">pipeline_run verbosity</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-1">pipeline_run(multiprocess)</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/posttask.html#index-0">posttask</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">regex</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/split.html#index-0">split</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/inputs.html#index-0">string substiution for inputs</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/output_file_names.html#index-0">suffix</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/multiprocessing.html#index-4">touch mode pipeline_run</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform.html#index-0">transform</a>
+ </dt>
+
+
+ <dt><a href="tutorials/new_tutorial/transform_in_parallel.html#index-0">transforming in parallel</a>
+ </dt>
+
+ </dl></dd>
+
+ <dt>
+ Tutorial on-the-fly parameter generation
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/onthefly.html#index-1">@files</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+<h2 id="U">U</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+ <td style="width: 33%" valign="top"><dl>
+
+ <dt>
+ Up to date
+ </dt>
+
+ <dd><dl>
+
+ <dt><a href="tutorials/new_tutorial/checkpointing.html#index-0">Tutorial</a>
+ </dt>
+
+ </dl></dd>
+ </dl></td>
+</tr></table>
+
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+
+
+
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="#" title="General Index"
+ >index</a></li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/glossary.html b/doc/_build/html/glossary.html
new file mode 100644
index 0000000..3b991ec
--- /dev/null
+++ b/doc/_build/html/glossary.html
@@ -0,0 +1,247 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Glossary — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Hall of Fame: User contributed flowcharts" href="gallery.html" />
+ <link rel="prev" title="FAQ" href="faq.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="gallery.html" title="Hall of Fame: User contributed flowcharts"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="faq.html" title="FAQ"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="glossary">
+<h1>Glossary<a class="headerlink" href="#glossary" title="Permalink to this headline">¶</a></h1>
+<dl class="glossary docutils">
+<dt id="term-task">task</dt>
+<dd><p class="first">A stage in a computational pipeline.</p>
+<p>Each <strong>task</strong> in <em>ruffus</em> is represented by a python function.</p>
+<p>For example, a task might be to find the products of a sets of two numbers:</p>
+<div class="last highlight-python"><pre>4 x 5 = 20
+5 x 6 = 30
+2 x 7 = 14</pre>
+</div>
+</dd>
+<dt id="term-job">job</dt>
+<dd><p class="first">Any number of operations which can be run in parallel and make up
+the work in a stage of a computional pipeline.</p>
+<p>Each <strong>task</strong> in <em>ruffus</em> is a separate call to the <strong>task</strong> function.</p>
+<p>For example, if a task is to find products of numbers, each of these will be a separate job.</p>
+<blockquote>
+<div><p>Job1:</p>
+<div class="highlight-python"><pre>4 x 5 = 20</pre>
+</div>
+<p>Job2:</p>
+<div class="highlight-python"><pre>5 x 6 = 30</pre>
+</div>
+<p>Job3:</p>
+<div class="highlight-python"><pre>2 x 7 = 14</pre>
+</div>
+</div></blockquote>
+<p class="last">Jobs need not complete in order.</p>
+</dd>
+<dt id="term-decorator">decorator</dt>
+<dd><p class="first">Ruffus decorators allow functions to be incorporated into a computational
+pipeline, with automatic generation of parameters, dependency checking etc.,
+without modifying any code within the function.
+Quoting from the <a class="reference external" href="http://wiki.python.org/moin/PythonDecorators">python wiki</a>:</p>
+<blockquote class="last">
+<div><p>A Python decorator is a specific change to the Python syntax that
+allows us to more conveniently alter functions and methods.</p>
+<p>Decorators dynamically alter the functionality of a function, method, or
+class without having to directly use subclasses or change the source code
+of the function being decorated.</p>
+</div></blockquote>
+</dd>
+<dt id="term-generator">generator</dt>
+<dd><p class="first">python generators are new to python 2.2
+(see <a class="reference external" href="http://www.ibm.com/developerworks/library/l-pycon.html">Charming Python: Iterators and simple generators</a>).
+They allow iterable data to be generated on the fly.</p>
+<p>Ruffus asks for generators when you want to generate <strong>job</strong> parameters dynamically.</p>
+<p>Each set of job parameters is returned by the <tt class="docutils literal"><span class="pre">yield</span></tt> keyword for
+greater clarity. For example,:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">def</span> <span class="nf">generate_job_parameters</span><span class="p">():</span>
+
+ <span class="k">for</span> <span class="n">file_index</span><span class="p">,</span> <span class="n">file_name</span> <span class="ow">in</span> <span class="n">iterate</span><span class="p">(</span><span class="n">all_file_names</span><span class="p">):</span>
+
+ <span class="c"># parameter for each job</span>
+ <span class="k">yield</span> <span class="n">file_index</span><span class="p">,</span> <span class="n">file_name</span>
+</pre></div>
+</div>
+<p class="last">Each job takes the parameters <tt class="docutils literal"><span class="pre">file_index</span></tt> and <tt class="docutils literal"><span class="pre">file_name</span></tt></p>
+</dd>
+</dl>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="faq.html"
+ title="previous chapter">FAQ</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="gallery.html"
+ title="next chapter">Hall of Fame: User contributed flowcharts</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/glossary.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="gallery.html" title="Hall of Fame: User contributed flowcharts"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="faq.html" title="FAQ"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/googlef47546b8526ae0ed.html b/doc/_build/html/googlef47546b8526ae0ed.html
new file mode 100644
index 0000000..e749a5b
--- /dev/null
+++ b/doc/_build/html/googlef47546b8526ae0ed.html
@@ -0,0 +1 @@
+google-site-verification: googlef47546b8526ae0ed.html
\ No newline at end of file
diff --git a/doc/_build/html/history.html b/doc/_build/html/history.html
new file mode 100644
index 0000000..2f178e0
--- /dev/null
+++ b/doc/_build/html/history.html
@@ -0,0 +1,1008 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Major Features added to Ruffus — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Future Changes to Ruffus" href="todo.html" />
+ <link rel="prev" title="Design & Architecture" href="design.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="todo.html" title="Future Changes to Ruffus"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="design.html" title="Design & Architecture"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="#">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="major-features-added-to-ruffus">
+<h1>Major Features added to Ruffus<a class="headerlink" href="#major-features-added-to-ruffus" title="Permalink to this headline">¶</a></h1>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">See <a class="reference internal" href="todo.html#todo"><em>To do list</em></a> for future enhancements to Ruffus</p>
+</div>
+<div class="section" id="version-2-5rc">
+<h2>version 2.5RC<a class="headerlink" href="#version-2-5rc" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>31st July 2014: Release Candidate</p>
+<p>5th August 2014: Release</p>
+</div></blockquote>
+<div class="section" id="python3-compatability-but-at-least-python-2-6-is-now-required">
+<h3>1) Python3 compatability (but at least python 2.6 is now required)<a class="headerlink" href="#python3-compatability-but-at-least-python-2-6-is-now-required" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Ruffus v2.5 is now python3 compatible. This has required surprisingly many changes to the codebase. Please report any bugs to me.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>Ruffus now requires at least python 2.6</strong></p>
+<p class="last">It proved to be impossible to support python 2.5 and python 3.x at the same time.</p>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="ctrl-c-interrupts">
+<h3>2) Ctrl-C interrupts<a class="headerlink" href="#ctrl-c-interrupts" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Ruffus now mostly(!) terminates gracefully when interrupted by Ctrl-C .</p>
+<p>Please send me bug reports for when this doesn’t work with a minimally reproducible case.</p>
+<p>This means that, in general, if an <tt class="docutils literal"><span class="pre">Exception</span></tt> is thrown during your pipeline but you don’t want to wait for the rest of the jobs to complete, you can still press Ctrl-C at any point.
+Note that you may still need to clean up spawned processes, for example, using <tt class="docutils literal"><span class="pre">qdel</span></tt> if you are using <tt class="docutils literal"><span class="pre">Ruffus.drmaa_wrapper</span></tt></p>
+</div></blockquote>
+</div>
+<div class="section" id="customising-flowcharts-in-pipeline-printout-graph-with-graphviz">
+<h3>3) Customising flowcharts in pipeline_printout_graph() with <tt class="docutils literal"><span class="pre">@graphviz</span></tt><a class="headerlink" href="#customising-flowcharts-in-pipeline-printout-graph-with-graphviz" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><em>Contributed by Sean Davis, with improved syntax via Jake Biesinger</em></p>
+<p>The graphics for each task can have its own attributes (URL, shape, colour) etc. by adding
+<a class="reference external" href="http://www.graphviz.org/doc/info/attrs.html">graphviz attributes</a>
+using the <tt class="docutils literal"><span class="pre">@graphviz</span></tt> decorator.</p>
+<ul>
+<li><p class="first">This allows HTML formatting in the task names (using the <tt class="docutils literal"><span class="pre">label</span></tt> parameter as in the following example).
+HTML labels <strong>must</strong> be enclosed in <tt class="docutils literal"><span class="pre"><</span></tt> and <tt class="docutils literal"><span class="pre">></span></tt>. E.g.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">label</span> <span class="o">=</span> <span class="s">"<Line <BR/> wrapped task_name()>"</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">You can also opt to keep the task name and wrap it with a prefix and suffix:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">label_suffix</span> <span class="o">=</span> <span class="s">"??? "</span><span class="p">,</span> <span class="n">label_prefix</span> <span class="o">=</span> <span class="s">": What is this?"</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">The <tt class="docutils literal"><span class="pre">URL</span></tt> attribute allows the generation of clickable svg, and also client / server
+side image maps usable in web pages.
+See <a class="reference external" href="http://www.graphviz.org/content/output-formats#dimap">Graphviz documentation</a></p>
+</li>
+</ul>
+<p>Example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@graphviz</span><span class="p">(</span><span class="n">URL</span><span class="o">=</span><span class="s">'"http://cnn.com"'</span><span class="p">,</span> <span class="n">fillcolor</span> <span class="o">=</span> <span class="s">'"#FFCCCC"'</span><span class="p">,</span>
+ <span class="n">color</span> <span class="o">=</span> <span class="s">'"#FF0000"'</span><span class="p">,</span> <span class="n">pencolor</span><span class="o">=</span><span class="s">'"#FF0000"'</span><span class="p">,</span> <span class="n">fontcolor</span><span class="o">=</span><span class="s">'"#4B6000"'</span><span class="p">,</span>
+ <span class="n">label_suffix</span> <span class="o">=</span> <span class="s">"???"</span><span class="p">,</span> <span class="n">label_prefix</span> <span class="o">=</span> <span class="s">"What is this?<BR/> "</span><span class="p">,</span>
+ <span class="n">label</span> <span class="o">=</span> <span class="s">"<What <FONT COLOR=</span><span class="se">\"</span><span class="s">red</span><span class="se">\"</span><span class="s">>is</FONT>this>"</span><span class="p">,</span>
+ <span class="n">shape</span><span class="o">=</span> <span class="s">"component"</span><span class="p">,</span> <span class="n">height</span> <span class="o">=</span> <span class="mf">1.5</span><span class="p">,</span> <span class="n">peripheries</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
+ <span class="n">style</span><span class="o">=</span><span class="s">"dashed"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task2</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c"># Can use dictionary if you wish...</span>
+<span class="n">graphviz_params</span> <span class="o">=</span> <span class="p">{</span><span class="s">"URL"</span><span class="p">:</span><span class="s">"http://cnn.com"</span><span class="p">,</span> <span class="s">"fontcolor"</span><span class="p">:</span> <span class="s">'"#FF00FF"'</span><span class="p">}</span>
+<span class="nd">@graphviz</span><span class="p">(</span><span class="o">**</span><span class="n">graphviz_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">myTask</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span><span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<a class="reference internal image-reference" href="_images/history_html_flowchart.png"><img alt="_images/history_html_flowchart.png" src="_images/history_html_flowchart.png" style="width: 336.6px; height: 316.5px;" /></a>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="consistent-verbosity-levels">
+<h3>4. Consistent verbosity levels<a class="headerlink" href="#consistent-verbosity-levels" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The verbosity levels are now more fine-grained and consistent between pipeline_printout and pipeline_run.
+Note that At verbosity > 2, <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> outputs lists of up-to-date tasks before running the pipeline.
+Many users who defaulted to using a verbosity of 3 may want to move up to <tt class="docutils literal"><span class="pre">verbose</span> <span class="pre">=</span> <span class="pre">4</span></tt>.</p>
+<blockquote>
+<div><ul class="simple">
+<li><strong>level 0</strong> : <em>Nothing</em></li>
+<li><strong>level 1</strong> : <em>Out-of-date Task names</em></li>
+<li><strong>level 2</strong> : <em>All Tasks (including any task function docstrings)</em></li>
+<li><strong>level 3</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, no explanation</em></li>
+<li><strong>level 4</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings</em></li>
+<li><strong>level 5</strong> : <em>All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)</em></li>
+<li><strong>level 6</strong> : <em>All jobs in All Tasks whether out of date or not</em></li>
+<li><strong>level 10</strong>: <em>Logs messages useful only for debugging ruffus pipeline code</em></li>
+</ul>
+</div></blockquote>
+<ul class="simple">
+<li>Defaults to <strong>level 4</strong> for pipeline_printout: <em>Out of date jobs, with explanations and warnings</em></li>
+<li>Defaults to <strong>level 1</strong> for pipeline_run: <em>Out-of-date Task names</em></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="allow-abbreviated-paths-from-pipeline-run-or-pipeline-printout">
+<h3>5. Allow abbreviated paths from <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> or <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt><a class="headerlink" href="#allow-abbreviated-paths-from-pipeline-run-or-pipeline-printout" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Please contact me with suggestions if you find the abbreviations useful but “aesthetically challenged”!</p>
+</div>
+<p>Some pipelines produce interminable lists of long filenames. It would be nice to be able to abbreviate this
+to just enough information to follow the progress.</p>
+<dl class="docutils">
+<dt>Ruffus now allows either</dt>
+<dd><ol class="first arabic">
+<li><p class="first">Only the nth top level sub-directories to be included</p>
+</li>
+<li><p class="first">The message to be truncated to a specified number of characters (to fit on a line, for example)</p>
+<p>Note that the number of characters specified is the separate length of the input and output parameters,
+not the entire message. You many need to specify a smaller limit that you expect (e.g. <tt class="docutils literal"><span class="pre">60</span></tt> rather than <cite>80</cite>)</p>
+</li>
+</ol>
+<div class="last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="n">NNN</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="o">-</span><span class="n">MMM</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>The <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt> parameter restricts the length of input / output file paths to either</p>
+<blockquote>
+<div><ul class="simple">
+<li>NNN levels of nested paths</li>
+<li>A total of MMM characters, MMM is specified by setting <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt> to -MMM (i.e. negative values)</li>
+</ul>
+<p><tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt> defaults to <tt class="docutils literal"><span class="pre">2</span></tt></p>
+</div></blockquote>
+<p>For example:</p>
+<blockquote>
+<div><p>Given <tt class="docutils literal"><span class="pre">["aa/bb/cc/dddd.txt",</span> <span class="pre">"aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"]</span></tt></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="hll"> <span class="c"># Original relative paths</span>
+</span> <span class="s">"[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"</span>
+
+<span class="hll"> <span class="c"># Full abspath</span>
+</span> <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="s">"[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"</span>
+
+<span class="hll"> <span class="c"># Specifed level of nested directories</span>
+</span> <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="s">"[.../dddd.txt, .../gggg.txt]"</span>
+
+ <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="s">"[.../cc/dddd.txt, .../ffff/gggg.txt]"</span>
+
+ <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">3</span>
+ <span class="s">"[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]"</span>
+
+
+<span class="hll"> <span class="c"># Truncated to MMM characters</span>
+</span> <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="o">-</span><span class="mi">60</span>
+ <span class="s">"<???> /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>If you are using <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt>, the abbreviated path lengths can be specified on
+the command line as an extension to the verbosity:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre> <span class="c"># verbosity of 4</span>
+ yourscript.py --verbose 4
+
+<span class="hll"> <span class="c"># display three levels of nested directories</span>
+</span> yourscript.py --verbose 4:3
+
+<span class="hll"> <span class="c"># restrict input and output parameters to 60 letters</span>
+</span> yourscript.py --verbose 4:-60
+</pre></div>
+</div>
+<p>The number after the colon is the abbreviated path length</p>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="other-changes">
+<h3>Other changes<a class="headerlink" href="#other-changes" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li>BUG FIX: Output producing wild cards was not saved in the checksum files!!!</li>
+<li>BUG FIX: @mkdir bug under Windows. Thanks to Sean Turley. (Aargh! Different exceptions are thrown in Windows vs. Linux for the same condition!)</li>
+<li>Added <a class="reference internal" href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names"><em>pipeline_get_task_names(...)</em></a> which returns all task name as a list of strings. Thanks to Clare Sloggett</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="version-2-4-1">
+<h2>version 2.4.1<a class="headerlink" href="#version-2-4-1" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>26th April 2014</p>
+<ul class="simple">
+<li>Breaking changes to drmaa API suggested by Bernie Pope to ensure portability across different drmaa implementations (SGE, SLURM etc.)</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-4">
+<h2>version 2.4<a class="headerlink" href="#version-2-4" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>4th April 2014</div></blockquote>
+<div class="section" id="additions-to-ruffus-namespace">
+<h3>Additions to <tt class="docutils literal"><span class="pre">ruffus</span></tt> namespace<a class="headerlink" href="#additions-to-ruffus-namespace" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#new-manual-formatter"><em>formatter()</em></a> (<a class="reference internal" href="decorators/indicator_objects.html#decorators-formatter"><em>syntax</em></a>)</li>
+<li><a class="reference internal" href="tutorials/new_tutorial/originate.html#new-manual-originate"><em>originate()</em></a> (<a class="reference internal" href="decorators/originate.html#decorators-originate"><em>syntax</em></a>)</li>
+<li><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#new-manual-subdivide"><em>subdivide()</em></a> (<a class="reference internal" href="decorators/subdivide.html#decorators-subdivide"><em>syntax</em></a>)</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="installation-use-pip">
+<h3>Installation: use pip<a class="headerlink" href="#installation-use-pip" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><pre>sudo pip install ruffus --upgrade</pre>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="command-line-support">
+<h3>1) Command Line support<a class="headerlink" href="#command-line-support" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>The optional <tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt> module provides support for a set of common command
+line arguments which make writing <em>Ruffus</em> pipelines much more pleasant.
+See <a class="reference internal" href="tutorials/new_tutorial/command_line.html#new-manual-cmdline"><em>manual</em></a></div></blockquote>
+</div>
+<div class="section" id="check-pointing">
+<h3>2) Check pointing<a class="headerlink" href="#check-pointing" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul>
+<li><p class="first">Contributed by <strong>Jake Biesinger</strong></p>
+</li>
+<li><p class="first">See <a class="reference internal" href="tutorials/new_tutorial/checkpointing.html#new-manual-checkpointing"><em>Manual</em></a></p>
+</li>
+<li><p class="first">Uses a fault resistant sqlite database file to log i/o files, and additional checksums</p>
+</li>
+<li><p class="first">defaults to checking file timestamps stored in the current directory (<tt class="docutils literal"><span class="pre">ruffus_utilility.RUFFUS_HISTORY_FILE</span> <span class="pre">=</span> <span class="pre">'.ruffus_history.sqlite'</span></tt>)</p>
+</li>
+<li><p class="first"><a class="reference internal" href="pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(..., checksum_level = N, ...)</em></a></p>
+<blockquote>
+<div><ul class="simple">
+<li>level 0 = CHECKSUM_FILE_TIMESTAMPS : Classic mode. Use only file timestamps (no checksum file will be created)</li>
+<li>level 1 = CHECKSUM_HISTORY_TIMESTAMPS : Also store timestamps in a database after successful job completion</li>
+<li>level 2 = CHECKSUM_FUNCTIONS : As above, plus a checksum of the pipeline function body</li>
+<li>level 3 = CHECKSUM_FUNCTIONS_AND_PARAMS : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+<li>defaults to level 1</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">Can speed up trivial tasks: Previously Ruffus always added an extra 1 second pause between tasks
+to guard against file systems (Ext3, FAT, some NFS) with low timestamp granularity.</p>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="subdivide-syntax">
+<h3>3) <a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#new-manual-subdivide"><em>subdivide()</em></a> (<a class="reference internal" href="decorators/subdivide.html#decorators-subdivide"><em>syntax</em></a>)<a class="headerlink" href="#subdivide-syntax" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li>Take a list of input jobs (like <a class="reference internal" href="decorators/transform.html#decorators-transform"><em>@transform</em></a>) but further splits each into multiple jobs, i.e. it is a <strong>many->even more</strong> relationship</li>
+<li>synonym for the deprecated <tt class="docutils literal"><span class="pre">@split(...,</span> <span class="pre">regex(),</span> <span class="pre">...)</span></tt></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="mkdir-syntax-with-formatter-suffix-and-regex">
+<h3>4) <a class="reference internal" href="tutorials/new_tutorial/mkdir.html#new-manual-mkdir"><em>mkdir()</em></a> (<a class="reference internal" href="decorators/mkdir.html#decorators-mkdir"><em>syntax</em></a>) with <a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#new-manual-formatter"><em>formatter()</em></a>, <a class="reference internal" href="decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> and <a class="reference interna [...]
+<blockquote>
+<div><ul class="simple">
+<li>allows directories to be created depending on runtime parameters or the output of previous tasks</li>
+<li>behaves just like <a class="reference internal" href="decorators/transform.html#decorators-transform"><em>@transform</em></a> but with its own (internal) function which does the actual work of making a directory</li>
+<li>Previous behavior is retained:<tt class="docutils literal"><span class="pre">mkdir</span></tt> continues to work seamlessly inside <a class="reference internal" href="decorators/follows.html#decorators-follows"><em>@follows</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="originate-syntax">
+<h3>5) <a class="reference internal" href="tutorials/new_tutorial/originate.html#new-manual-originate"><em>originate()</em></a> (<a class="reference internal" href="decorators/originate.html#decorators-originate"><em>syntax</em></a>)<a class="headerlink" href="#originate-syntax" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li>Generates output files without dependencies from scratch (<em>ex nihilo</em>!)</li>
+<li>For first step in a pipeline</li>
+<li>Task function obviously only takes output and not input parameters. (There <em>are</em> no inputs!)</li>
+<li>synonym for <a class="reference internal" href="decorators/split.html#decorators-split"><em>@split(None,...)</em></a></li>
+<li>See <a class="reference internal" href="decorators/originate.html#decorators-originate"><em>Summary</em></a> / <a class="reference internal" href="tutorials/new_tutorial/originate.html#new-manual-originate"><em>Manual</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="new-flexible-formatter-syntax-alternative-to-regex-suffix">
+<h3>6) New flexible <a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#new-manual-formatter"><em>formatter()</em></a> (<a class="reference internal" href="decorators/indicator_objects.html#decorators-formatter"><em>syntax</em></a>) alternative to <a class="reference internal" href="decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> & <a class="reference internal" href="decorators/indicator_objects.html#decorators-suffix"><em>suffi [...]
+<blockquote>
+<div><ul class="simple">
+<li>Easy manipulation of path subcomponents in the style of <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.split">os.path.split()</a></li>
+<li>Regular expressions are no longer necessary for path manipulation</li>
+<li>Familiar python syntax</li>
+<li>Optional regular expression matches</li>
+<li>Can refer to any in the list of N input files (not only the first file as for <tt class="docutils literal"><span class="pre">regex(...)</span></tt>)</li>
+<li>Can even refer to individual letters within a match</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="combinatorics-all-vs-all-decorators">
+<h3>7) Combinatorics (all vs. all decorators)<a class="headerlink" href="#combinatorics-all-vs-all-decorators" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#new-manual-product"><em>@product</em></a> (See <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.product">itertools.product</a>)</li>
+<li><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#new-manual-permutations"><em>@permutations</em></a> (See <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.permutations">itertools.permutations</a>)</li>
+<li><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#new-manual-combinations"><em>@combinations</em></a> (See <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.combinations">itertools.combinations</a>)</li>
+<li><a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#new-manual-combinations-with-replacement"><em>@combinations_with_replacement</em></a> (See <a class="reference external" href="http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement">itertools.combinations_with_replacement</a>)</li>
+<li>in optional <a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#new-manual-combinatorics"><em>combinatorics</em></a> module</li>
+<li>Only <a class="reference internal" href="tutorials/new_tutorial/output_file_names.html#new-manual-formatter"><em>formatter()</em></a> provides the necessary flexibility to construct the output. (<a class="reference internal" href="decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> and <a class="reference internal" href="decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> are not supported.)</li>
+<li>See <a class="reference internal" href="decorators/decorators.html#decorators-combinatorics"><em>Summary</em></a> / <a class="reference internal" href="tutorials/new_tutorial/combinatorics.html#new-manual-combinatorics"><em>Manual</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="drmaa-support-and-multithreading">
+<h3>8) drmaa support and multithreading:<a class="headerlink" href="#drmaa-support-and-multithreading" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="tutorials/new_tutorial/multiprocessing.html#new-manual-ruffus-drmaa-wrapper-run-job"><em>ruffus.drmaa_wrapper.run_job()</em></a> (<a class="reference internal" href="drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>syntax</em></a>)</li>
+<li>Optional helper module allows jobs to dispatch work to a computational cluster and wait until it completes.</li>
+<li>Requires <tt class="docutils literal"><span class="pre">multithread</span></tt> rather than <tt class="docutils literal"><span class="pre">multiprocess</span></tt></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="pipeline-run-and-exceptions">
+<h3>9) <tt class="docutils literal"><span class="pre">pipeline_run(...)</span></tt> and exceptions<a class="headerlink" href="#pipeline-run-and-exceptions" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>See <a class="reference internal" href="tutorials/new_tutorial/exceptions.html#new-manual-exceptions"><em>Manual</em></a></p>
+<ul class="simple">
+<li>Optionally terminate pipeline after first exception</li>
+<li>Display exceptions without delay</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="miscellaneous">
+<h3>10) Miscellaneous<a class="headerlink" href="#miscellaneous" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>Better error messages for <tt class="docutils literal"><span class="pre">formatter()</span></tt>, <tt class="docutils literal"><span class="pre">suffix()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt> for <tt class="docutils literal"><span class="pre">pipeline_printout(...,</span> <span class="pre">verbose</span> <span class="pre">>=</span> <span class="pre">3,</span> <span class="pre">...)</span></tt></dt>
+<dd><ul class="first last simple">
+<li>Error messages for showing mismatching regular expression and offending file name</li>
+<li>Wrong capture group names or out of range indices will raise informative Exception</li>
+</ul>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="version-2-3">
+<h2>version 2.3<a class="headerlink" href="#version-2-3" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>1st September, 2013</p>
+<ul>
+<li><dl class="first docutils">
+<dt><tt class="docutils literal"><span class="pre">@active_if</span></tt> turns off tasks at runtime</dt>
+<dd><p class="first">The Design and initial implementation were contributed by Jacob Biesinger</p>
+<p>Takes one or more parameters which can be either booleans or functions or callable objects which return True / False:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">run_if_true_1</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">False</span>
+
+<span class="nd">@active_if</span><span class="p">(</span><span class="n">run_if_true</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">run_if_true_2</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">this_task_might_be_inactive</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>The expressions inside @active_if are evaluated each time
+<tt class="docutils literal"><span class="pre">pipeline_run</span></tt>, <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt> or <tt class="docutils literal"><span class="pre">pipeline_printout_graph</span></tt> is called.</p>
+<p class="last">Dormant tasks behave as if they are up to date and have no output.</p>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>Command line parsing</dt>
+<dd><ul class="first last simple">
+<li>Supports both argparse (python 2.7) and optparse (python 2.6):</li>
+<li><tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt> module is optional.</li>
+<li>See <a class="reference internal" href="tutorials/new_tutorial/command_line.html#new-manual-cmdline"><em>manual</em></a></li>
+</ul>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>Optionally terminate pipeline after first exception</dt>
+<dd><p class="first">To have all exceptions interrupt immediately:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">exceptions_terminate_immediately</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>By default ruffus accumulates <tt class="docutils literal"><span class="pre">NN</span></tt> errors before interrupting the pipeline prematurely. <tt class="docutils literal"><span class="pre">NN</span></tt> is the specified parallelism for <tt class="docutils literal"><span class="pre">pipeline_run(...,</span> <span class="pre">multiprocess</span> <span class="pre">=</span> <span class="pre">NN)</span></tt>.</p>
+<p class="last">Otherwise, a pipeline will only be interrupted immediately if exceptions of type <tt class="docutils literal"><span class="pre">ruffus.JobSignalledBreak</span></tt> are thrown.</p>
+</dd>
+</dl>
+</li>
+<li><p class="first">Display exceptions without delay</p>
+<blockquote>
+<div><p>By default, Ruffus re-throws exceptions in ensemble after pipeline termination.</p>
+<p>To see exceptions as they occur:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">log_exceptions</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+<p><tt class="docutils literal"><span class="pre">logger.error(...)</span></tt> will be invoked with the string representation of the each exception, and associated stack trace.</p>
+<p>The default logger prints to sys.stderr, but this can be changed to any class from the logging module or compatible object via <tt class="docutils literal"><span class="pre">pipeline_run(...,</span> <span class="pre">logger</span> <span class="pre">=</span> <span class="pre">???)</span></tt></p>
+</div></blockquote>
+</li>
+<li><p class="first">Improved <tt class="docutils literal"><span class="pre">pipeline_printout()</span></tt></p>
+<blockquote>
+<div><ul>
+<li><p class="first"><cite>@split</cite> operations now show the 1->many output in pipeline_printout</p>
+<blockquote>
+<div><p>This make it clearer that <tt class="docutils literal"><span class="pre">@split</span></tt> is creating multiple output parameters (rather than a single output parameter consisting of a list):</p>
+<div class="highlight-python"><pre>Task = split_animals
+ Job = [None
+ -> cows
+ -> horses
+ -> pigs
+ , any_extra_parameters]</pre>
+</div>
+</div></blockquote>
+</li>
+<li><p class="first">File date and time are displayed in human readable form and out of date files are flagged with asterisks.</p>
+</li>
+</ul>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-2">
+<h2>version 2.2<a class="headerlink" href="#version-2-2" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>22nd July, 2010</p>
+<ul>
+<li><p class="first">Simplifying <strong>@transform</strong> syntax with <strong>suffix(...)</strong></p>
+<blockquote>
+<div><p>Regular expressions within ruffus are very powerful, and can allow files to be moved
+from one directory to another and renamed at will.</p>
+<p>However, using consistent file extensions and
+<tt class="docutils literal"><span class="pre">@transform(...,</span> <span class="pre">suffix(...))</span></tt> makes the code much simpler and easier to read.</p>
+<p>Previously, <tt class="docutils literal"><span class="pre">suffix(...)</span></tt> did not cooperate well with <tt class="docutils literal"><span class="pre">inputs(...)</span></tt>.
+For example, finding the corresponding header file (”.h”) for the matching input
+required a complicated <tt class="docutils literal"><span class="pre">regex(...)</span></tt> regular expression and <tt class="docutils literal"><span class="pre">input(...)</span></tt>. This simple case,
+e.g. matching “something.c” with “something.h”, is now much easier in Ruffus.</p>
+<dl class="docutils">
+<dt>For example:</dt>
+<dd><div class="first highlight-python"><div class="highlight"><pre><span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"something.c"</span><span class="p">,</span> <span class="s">"more_code.c"</span><span class="p">]</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".c"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">r"\1.h"</span><span class="p">,</span> <span class="s">"common.h"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="p">(</span> <span class="n">source_file</span><span class="p">,</span>
+ <span class="n">header_file</span><span class="p">,</span>
+ <span class="n">common_header</span><span class="p">)</span> <span class="o">=</span> <span class="n">input_files</span>
+ <span class="c"># call compiler to make object file</span>
+</pre></div>
+</div>
+<p>This is equivalent to calling:</p>
+<blockquote class="last">
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nb">compile</span><span class="p">([</span><span class="s">"something.c"</span><span class="p">,</span> <span class="s">"something.h"</span><span class="p">,</span> <span class="s">"common.h"</span><span class="p">],</span> <span class="s">"something.o"</span><span class="p">)</span>
+<span class="nb">compile</span><span class="p">([</span><span class="s">"more_code.c"</span><span class="p">,</span> <span class="s">"more_code.h"</span><span class="p">,</span> <span class="s">"common.h"</span><span class="p">],</span> <span class="s">"more_code.o"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</dd>
+</dl>
+<p>The <tt class="docutils literal"><span class="pre">\1</span></tt> matches everything <em>but</em> the suffix and will be applied to both <tt class="docutils literal"><span class="pre">glob</span></tt>s and file names.</p>
+</div></blockquote>
+</li>
+</ul>
+<p>For simplicity and compatibility with previous versions, there is always an implied r”1” before
+the output parameters. I.e. output parameters strings are <em>always</em> substituted.</p>
+<ul>
+<li><p class="first">Tasks and glob in <strong>inputs(...)</strong> and <strong>add_inputs(...)</strong></p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">glob</span></tt>s and tasks can be added as the prerequisites / input files using
+<tt class="docutils literal"><span class="pre">inputs(...)</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs(...)</span></tt>. <tt class="docutils literal"><span class="pre">glob</span></tt> expansions will take place when the task
+is run.</p>
+</div></blockquote>
+</li>
+<li><p class="first">Advanced form of <strong>@split</strong> with <strong>regex</strong>:</p>
+<blockquote>
+<div><p>The standard <tt class="docutils literal"><span class="pre">@split</span></tt> divided one set of inputs into multiple outputs (the number of which
+can be determined at runtime).</p>
+<p>This is a <tt class="docutils literal"><span class="pre">one->many</span></tt> operation.</p>
+<p>An advanced form of <tt class="docutils literal"><span class="pre">@split</span></tt> has been added which can split each of several files further.</p>
+<p>In other words, this is a <tt class="docutils literal"><span class="pre">many->"many</span> <span class="pre">more"</span></tt> operation.</p>
+<dl class="docutils">
+<dt>For example, given three starting files:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">original_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"original_0.file"</span><span class="p">,</span>
+ <span class="s">"original_1.file"</span><span class="p">,</span>
+ <span class="s">"original_2.file"</span><span class="p">]</span>
+</pre></div>
+</div>
+</dd>
+<dt>We can split each into its own set of sub-sections:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="n">original_files</span><span class="p">,</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"starting_(\d+).fa"</span><span class="p">),</span> <span class="c"># match starting files</span>
+ <span class="s">r"files.split.\1.*.fa"</span> <span class="c"># glob pattern</span>
+ <span class="s">r"\1"</span><span class="p">)</span> <span class="c"># index of original file</span>
+<span class="k">def</span> <span class="nf">split_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">,</span> <span class="n">original_index</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Code to split each input_file</span>
+<span class="sd"> "original_0.file" -> "files.split.0.*.fa"</span>
+<span class="sd"> "original_1.file" -> "files.split.1.*.fa"</span>
+<span class="sd"> "original_2.file" -> "files.split.2.*.fa"</span>
+<span class="sd"> """</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>This is, conceptually, the reverse of the @collate(...) decorator</p>
+</div></blockquote>
+</li>
+<li><p class="first">Ruffus will complain about unescaped regular expression special characters:</p>
+<blockquote>
+<div><p>Ruffus uses “\1” and “\2” in regular expression substitutions. Even seasoned python
+users may not remember that these have to be ‘escaped’ in strings. The best option is
+to use ‘raw’ python strings e.g.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="s">r"\1_substitutes\2correctly\3four\4times"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Ruffus will throw an exception if it sees an unescaped “\1” or “\2” in a file name,
+which should catch most of these bugs.</p>
+</div></blockquote>
+</li>
+<li><p class="first">Prettier output from <em>pipeline_printout_graph</em></p>
+<blockquote>
+<div><p>Changed to nicer colours, symbols etc. for a more professional look.
+@split and @merge tasks now look different from @transform.
+Colours, size and resolution are now fully customisable:</p>
+<div class="highlight-python"><pre>pipeline_printout_graph( #...
+ user_colour_scheme = {
+ "colour_scheme_index":1,
+ "Task to run" : {"fillcolor":"blue"},
+ pipeline_name : "My flowchart",
+ size : (11,8),
+ dpi : 120)})</pre>
+</div>
+<p>An SVG bug in firefox has been worked around so that font size are displayed correctly.</p>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-1-1">
+<h2>version 2.1.1<a class="headerlink" href="#version-2-1-1" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><dl class="first docutils">
+<dt><strong>@transform(.., add_inputs(...))</strong></dt>
+<dd><p class="first"><tt class="docutils literal"><span class="pre">add_inputs(...)</span></tt> allows the addition of extra input dependencies / parameters for each job.</p>
+<dl class="docutils">
+<dt>Unlike <tt class="docutils literal"><span class="pre">inputs(...)</span></tt>, the original input parameter is retained:</dt>
+<dd><div class="first last highlight-python"><pre>from ruffus import *
+ at transform(["a.input", "b.input"], suffix(".input"), add_inputs("just.1.more","just.2.more"), ".output")
+def task(i, o):
+""</pre>
+</div>
+</dd>
+<dt>Produces:</dt>
+<dd><div class="first last highlight-python"><pre>Job = [[a.input, just.1.more, just.2.more] ->a.output]
+Job = [[b.input, just.1.more, just.2.more] ->b.output]</pre>
+</div>
+</dd>
+</dl>
+<p class="last">Like <tt class="docutils literal"><span class="pre">inputs</span></tt>, <tt class="docutils literal"><span class="pre">add_inputs</span></tt> accepts strings, tasks and <tt class="docutils literal"><span class="pre">glob</span></tt> s
+This minor syntactic change promises add much clarity to Ruffus code.
+<tt class="docutils literal"><span class="pre">add_inputs()</span></tt> is available for <tt class="docutils literal"><span class="pre">@transform</span></tt>, <tt class="docutils literal"><span class="pre">@collate</span></tt> and <tt class="docutils literal"><span class="pre">@split</span></tt></p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-1-0">
+<h2>version 2.1.0<a class="headerlink" href="#version-2-1-0" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first"><strong>@jobs_limit</strong>
+Some tasks are resource intensive and too many jobs should not be run at the
+same time. Examples include disk intensive operations such as unzipping, or
+downloading from FTP sites.</p>
+<p>Adding:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">new_data_list</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".big_data.gz"</span><span class="p">),</span> <span class="s">".big_data"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">unzip</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span>
+ <span class="s">"unzip code goes here"</span>
+</pre></div>
+</div>
+<p>would limit the unzip operation to 4 jobs at a time, even if the rest of the
+pipeline runs highly in parallel.</p>
+<p>(Thanks to Rob Young for suggesting this.)</p>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-0-10">
+<h2>version 2.0.10<a class="headerlink" href="#version-2-0-10" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first"><strong>touch_files_only</strong> option for <strong>pipeline_run</strong></p>
+<p>When the pipeline runs, task functions will not be run. Instead, the output files for
+each job (in each task) will be <tt class="docutils literal"><span class="pre">touch</span></tt>-ed if necessary.
+This can be useful for simulating a pipeline run so that all files look as
+if they are up-to-date.</p>
+<p>Caveats:</p>
+<blockquote>
+<div><ul class="simple">
+<li>This may not work correctly where output files are only determined at runtime, e.g. with <strong>@split</strong></li>
+<li>Only the output from pipelined jobs which are currently out-of-date will be <tt class="docutils literal"><span class="pre">touch</span></tt>-ed.
+In other words, the pipeline runs <em>as normal</em>, the only difference is that the
+output files are <tt class="docutils literal"><span class="pre">touch</span></tt>-ed instead of being created by the python task functions
+which would otherwise have been called.</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">Parameter substitution for <strong>inputs(...)</strong></p>
+<p>The <strong>inputs(...)</strong> parameter in <strong>@transform</strong>, <strong>@collate</strong> can now take tasks and <tt class="docutils literal"><span class="pre">glob</span></tt> s,
+and these will be expanded appropriately (after regular expression replacement).</p>
+<p>For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="s">"dir/a.input"</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*)\/(.+).input"</span><span class="p">),</span>
+ <span class="n">inputs</span><span class="p">((</span><span class="s">r"\1/\2.other"</span><span class="p">,</span> <span class="s">r"\1/*.more"</span><span class="p">)),</span> <span class="s">r"elsewhere/\2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Some pipeline task</span>
+<span class="sd"> """</span>
+</pre></div>
+</div>
+<p>Is equivalent to calling:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">task1</span><span class="p">((</span><span class="s">"dir/a.other"</span><span class="p">,</span> <span class="s">"dir/1.more"</span><span class="p">,</span> <span class="s">"dir/2.more"</span><span class="p">),</span> <span class="s">"elsewhere/a.output"</span><span class="p">)</span>
+</pre></div>
+</div>
+<p></p>
+<blockquote>
+<div><p>Here:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="s">r"\1/*.more"</span>
+</pre></div>
+</div>
+<p>is first converted to:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="s">r"dir/*.more"</span>
+</pre></div>
+</div>
+<p>which matches:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="s">"dir/1.more"</span>
+<span class="s">"dir/2.more"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-0-9">
+<h2>version 2.0.9<a class="headerlink" href="#version-2-0-9" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first">Better display of logging output</p>
+</li>
+<li><p class="first">Advanced form of <strong>@split</strong>
+This is an experimental feature.</p>
+<p>Hitherto, <strong>@split</strong> only takes 1 set of input (tasks/files/<tt class="docutils literal"><span class="pre">glob</span></tt> s) and split these
+into an indeterminate number of output.</p>
+<blockquote>
+<div><p>This is a one->many operation.</p>
+</div></blockquote>
+<p>Sometimes it is desirable to take multiple input files, and split each of them further.</p>
+<blockquote>
+<div><p>This is a many->many (more) operation.</p>
+</div></blockquote>
+<p>It is possible to hack something together using <strong>@transform</strong> but downstream tasks would not
+aware that each job in <strong>@transform</strong> produces multiple outputs (rather than one input,
+one output per job).</p>
+<p>The syntax looks like:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="n">get_files</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.+).original"</span><span class="p">),</span> <span class="s">r"\1.*.split"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_files</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>If <tt class="docutils literal"><span class="pre">get_files()</span></tt> returned <tt class="docutils literal"><span class="pre">A.original</span></tt>, <tt class="docutils literal"><span class="pre">B.original</span></tt> and <tt class="docutils literal"><span class="pre">C.original</span></tt>,
+<tt class="docutils literal"><span class="pre">split_files()</span></tt> might lead to the following operations:</p>
+<div class="highlight-python"><pre>A.original
+ -> A.1.original
+ -> A.2.original
+ -> A.3.original
+B.original
+ -> B.1.original
+ -> B.2.original
+C.original
+ -> C.1.original
+ -> C.2.original
+ -> C.3.original
+ -> C.4.original
+ -> C.5.original</pre>
+</div>
+<p>Note that each input (<tt class="docutils literal"><span class="pre">A/B/C.original</span></tt>) can produce a number of output, the exact
+number of which does not have to be pre-determined.
+This is similar to <strong>@split</strong></p>
+<p>Tasks following <tt class="docutils literal"><span class="pre">split_files</span></tt> will have ten inputs corresponding to each of the
+output from <tt class="docutils literal"><span class="pre">split_files</span></tt>.</p>
+<p>If <strong>@transform</strong> was used instead of <strong>@split</strong>, then tasks following <tt class="docutils literal"><span class="pre">split_files</span></tt>
+would only have 3 inputs.</p>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-0-8">
+<h2>version 2.0.8<a class="headerlink" href="#version-2-0-8" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>File names can be in unicode</li>
+<li>File systems with 1 second timestamp granularity no longer cause problems.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-0-2">
+<h2>version 2.0.2<a class="headerlink" href="#version-2-0-2" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Much prettier /useful output from <a class="reference internal" href="pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout</em></a></li>
+<li>New tutorial / manual</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-2-0">
+<h2>version 2.0<a class="headerlink" href="#version-2-0" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first">Revamped documentation:</p>
+<blockquote>
+<div><ul class="simple">
+<li>Rewritten tutorial</li>
+<li>Comprehensive manual</li>
+<li>New syntax help</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">Major redesign. New decorators include</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="tutorials/new_tutorial/split.html#new-manual-split"><em>@split</em></a></li>
+<li><a class="reference internal" href="tutorials/new_tutorial/transform.html#new-manual-transform"><em>@transform</em></a></li>
+<li><a class="reference internal" href="tutorials/new_tutorial/merge.html#new-manual-merge"><em>@merge</em></a></li>
+<li><a class="reference internal" href="tutorials/new_tutorial/subdivide_collate.html#new-manual-collate"><em>@collate</em></a></li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">Major redesign. Decorator <em>inputs</em> can mix</p>
+<blockquote>
+<div><ul class="simple">
+<li>Output from previous tasks</li>
+<li><a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns e.g. <tt class="docutils literal"><span class="pre">*.txt</span></tt></li>
+<li>Files names</li>
+<li>Any other data type</li>
+</ul>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="version-1-1-4">
+<h2>version 1.1.4<a class="headerlink" href="#version-1-1-4" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>Tasks can get their input by automatically chaining to the output from one or more parent tasks using <a class="reference internal" href="decorators/files_re.html#decorators-files-re"><em>@files_re</em></a></div></blockquote>
+</div>
+<div class="section" id="version-1-0-7">
+<h2>version 1.0.7<a class="headerlink" href="#version-1-0-7" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>Added <cite>proxy_logger</cite> module for accessing a shared log across multiple jobs in different processes.</div></blockquote>
+</div>
+<div class="section" id="version-1-0">
+<h2>version 1.0<a class="headerlink" href="#version-1-0" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>Initial Release in Oxford</div></blockquote>
+</div>
+</div>
+<div class="section" id="fixed-bugs">
+<h1>Fixed Bugs<a class="headerlink" href="#fixed-bugs" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div>Full list at <a class="reference external" href="http://code.google.com/p/ruffus/wiki/LatestChanges">“Latest Changes wiki entry”</a></div></blockquote>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Major Features added to Ruffus</a><ul>
+<li><a class="reference internal" href="#version-2-5rc">version 2.5RC</a><ul>
+<li><a class="reference internal" href="#python3-compatability-but-at-least-python-2-6-is-now-required">1) Python3 compatability (but at least python 2.6 is now required)</a></li>
+<li><a class="reference internal" href="#ctrl-c-interrupts">2) Ctrl-C interrupts</a></li>
+<li><a class="reference internal" href="#customising-flowcharts-in-pipeline-printout-graph-with-graphviz">3) Customising flowcharts in pipeline_printout_graph() with <tt class="docutils literal"><span class="pre">@graphviz</span></tt></a></li>
+<li><a class="reference internal" href="#consistent-verbosity-levels">4. Consistent verbosity levels</a></li>
+<li><a class="reference internal" href="#allow-abbreviated-paths-from-pipeline-run-or-pipeline-printout">5. Allow abbreviated paths from <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> or <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt></a></li>
+<li><a class="reference internal" href="#other-changes">Other changes</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#version-2-4-1">version 2.4.1</a></li>
+<li><a class="reference internal" href="#version-2-4">version 2.4</a><ul>
+<li><a class="reference internal" href="#additions-to-ruffus-namespace">Additions to <tt class="docutils literal"><span class="pre">ruffus</span></tt> namespace</a></li>
+<li><a class="reference internal" href="#installation-use-pip">Installation: use pip</a></li>
+<li><a class="reference internal" href="#command-line-support">1) Command Line support</a></li>
+<li><a class="reference internal" href="#check-pointing">2) Check pointing</a></li>
+<li><a class="reference internal" href="#subdivide-syntax">3) <tt class="docutils literal"><span class="pre">subdivide()</span></tt> (<tt class="docutils literal"><span class="pre">syntax</span></tt>)</a></li>
+<li><a class="reference internal" href="#mkdir-syntax-with-formatter-suffix-and-regex">4) <tt class="docutils literal"><span class="pre">mkdir()</span></tt> (<tt class="docutils literal"><span class="pre">syntax</span></tt>) with <tt class="docutils literal"><span class="pre">formatter()</span></tt>, <tt class="docutils literal"><span class="pre">suffix()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a></li>
+<li><a class="reference internal" href="#originate-syntax">5) <tt class="docutils literal"><span class="pre">originate()</span></tt> (<tt class="docutils literal"><span class="pre">syntax</span></tt>)</a></li>
+<li><a class="reference internal" href="#new-flexible-formatter-syntax-alternative-to-regex-suffix">6) New flexible <tt class="docutils literal"><span class="pre">formatter()</span></tt> (<tt class="docutils literal"><span class="pre">syntax</span></tt>) alternative to <tt class="docutils literal"><span class="pre">regex()</span></tt> & <tt class="docutils literal"><span class="pre">suffix()</span></tt></a></li>
+<li><a class="reference internal" href="#combinatorics-all-vs-all-decorators">7) Combinatorics (all vs. all decorators)</a></li>
+<li><a class="reference internal" href="#drmaa-support-and-multithreading">8) drmaa support and multithreading:</a></li>
+<li><a class="reference internal" href="#pipeline-run-and-exceptions">9) <tt class="docutils literal"><span class="pre">pipeline_run(...)</span></tt> and exceptions</a></li>
+<li><a class="reference internal" href="#miscellaneous">10) Miscellaneous</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#version-2-3">version 2.3</a></li>
+<li><a class="reference internal" href="#version-2-2">version 2.2</a></li>
+<li><a class="reference internal" href="#version-2-1-1">version 2.1.1</a></li>
+<li><a class="reference internal" href="#version-2-1-0">version 2.1.0</a></li>
+<li><a class="reference internal" href="#version-2-0-10">version 2.0.10</a></li>
+<li><a class="reference internal" href="#version-2-0-9">version 2.0.9</a></li>
+<li><a class="reference internal" href="#version-2-0-8">version 2.0.8</a></li>
+<li><a class="reference internal" href="#version-2-0-2">version 2.0.2</a></li>
+<li><a class="reference internal" href="#version-2-0">version 2.0</a></li>
+<li><a class="reference internal" href="#version-1-1-4">version 1.1.4</a></li>
+<li><a class="reference internal" href="#version-1-0-7">version 1.0.7</a></li>
+<li><a class="reference internal" href="#version-1-0">version 1.0</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#fixed-bugs">Fixed Bugs</a></li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="design.html"
+ title="previous chapter">Design & Architecture</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="todo.html"
+ title="next chapter">Future Changes to Ruffus</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/history.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="todo.html" title="Future Changes to Ruffus"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="design.html" title="Design & Architecture"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="#">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/implementation_notes.html b/doc/_build/html/implementation_notes.html
new file mode 100644
index 0000000..90bca4d
--- /dev/null
+++ b/doc/_build/html/implementation_notes.html
@@ -0,0 +1,623 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Implementation Tips — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="FAQ" href="faq.html" />
+ <link rel="prev" title="Future Changes to Ruffus" href="todo.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="faq.html" title="FAQ"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="todo.html" title="Future Changes to Ruffus"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="implementation-tips">
+<h1>Implementation Tips<a class="headerlink" href="#implementation-tips" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="release">
+<h2>Release<a class="headerlink" href="#release" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first">Change <tt class="docutils literal"><span class="pre">ruffus_version.py</span></tt></p>
+</li>
+<li><p class="first">Rebuild pdf and copy it to <tt class="docutils literal"><span class="pre">doc/static_data</span></tt></p>
+<blockquote>
+<div><p>cd doc
+make latexpdf
+cp _build/latex/ruffus.pdf static_data</p>
+</div></blockquote>
+</li>
+<li><p class="first">Rebuild documentation:</p>
+<div class="highlight-python"><pre>make htmlsync</pre>
+</div>
+</li>
+<li><p class="first">tag git with, for example:</p>
+<div class="highlight-python"><pre>git tag -a v2.5RC -m "Version 2.5 Release Candidate"</pre>
+</div>
+</li>
+<li><p class="first">Upload to pypi:</p>
+<div class="highlight-python"><pre>python setup.py sdist --format=gztar upload</pre>
+</div>
+</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="dbdict-py">
+<h2>dbdict.py<a class="headerlink" href="#dbdict-py" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This is an sqlite backed dictionary originally written by Jacob Sondergaard and
+contributed by Jake Biesinger who added automatic pickling of python objects.</p>
+<p>The pickling code was refactored out by Leo Goodstadt into separate functions as
+part of the preparation to make Ruffus python3 ready.</p>
+<p>Python original saved (pickled) objects as 7 bit ASCII strings. Later formats
+(protocol = -1 is the latest format) uses 8 bit strings and are rather more efficient.</p>
+<p>These then need to be saved as BLOBs to sqlite3 rather than normal strings. We
+can signal this by wrapping the pickled string in a object providing a “buffer interface”.
+This is <tt class="docutils literal"><span class="pre">buffer</span></tt> in python2.6/2.7 and <tt class="docutils literal"><span class="pre">memoryview</span></tt> in python3.</p>
+<p><a class="reference external" href="http://bugs.python.org/issue7723">http://bugs.python.org/issue7723</a> suggests there is no portable python2/3 way to write
+blobs to Sqlite without these two incompatible wrappers.
+This would require conditional compilation:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">hexversion</span> <span class="o">>=</span> <span class="mh">0x03000000</span><span class="p">:</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="n">memoryview</span><span class="p">(</span><span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">protocol</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">))</span>
+<span class="k">else</span><span class="p">:</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="nb">buffer</span><span class="p">(</span><span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">protocol</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">))</span>
+</pre></div>
+</div>
+<p>Despite the discussion on the bug report, sqlite3.Binary seems to work.
+We shall see if this is portable to python3.</p>
+</div></blockquote>
+</div>
+<div class="section" id="how-to-write-new-decorators">
+<h2>how to write new decorators<a class="headerlink" href="#how-to-write-new-decorators" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>New placeholder class. E.g. for <tt class="docutils literal"><span class="pre">@new_deco</span></tt></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">class</span> <span class="nc">new_deco</span><span class="p">(</span><span class="n">task_decorator</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Add to list of action names and ids:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">action_names</span> <span class="o">=</span> <span class="p">[</span><span class="s">"unspecified"</span><span class="p">,</span>
+ <span class="o">...</span>
+ <span class="s">"task_new_deco"</span><span class="p">,</span>
+
+<span class="n">action_task_new_deco</span> <span class="o">=</span> <span class="mi">15</span>
+</pre></div>
+</div>
+<p>Add function:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">def</span> <span class="nf">task_transform</span> <span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">orig_args</span><span class="p">):</span>
+</pre></div>
+</div>
+<p>Add documentation to:</p>
+<blockquote>
+<div><ul class="simple">
+<li>decorators/NEW_DECORATOR.rst</li>
+<li>decorators/decorators.rst</li>
+<li>_templates/layout.html</li>
+<li>manual</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="implementation-notes">
+<h1>Implementation notes<a class="headerlink" href="#implementation-notes" title="Permalink to this headline">¶</a></h1>
+<p>N.B. Remember to cite Jake Biesinger and see if he is interested to be a co-author if we ever resubmit the drastically changed version...
+He contributed checkpointing, travis and tox etc.</p>
+<div class="section" id="ctrl-c-handling">
+<span id="todo-misfeatures"></span><h2><tt class="docutils literal"><span class="pre">Ctrl-C</span></tt> handling<a class="headerlink" href="#ctrl-c-handling" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Pressing <tt class="docutils literal"><span class="pre">Ctrl-C</span></tt> left dangling process in Ruffus 2.4 because <tt class="docutils literal"><span class="pre">KeyboardInterrupt</span></tt> does not play nice with python <tt class="docutils literal"><span class="pre">multiprocessing.Pool</span></tt>
+See <a class="reference external" href="http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool/1408476#1408476">http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool/1408476#1408476</a></p>
+<p><a class="reference external" href="http://bryceboe.com/2012/02/14/python-multiprocessing-pool-and-keyboardinterrupt-revisited/">http://bryceboe.com/2012/02/14/python-multiprocessing-pool-and-keyboardinterrupt-revisited/</a> provides a reimplementation of Pool which
+however only works when you have a fixed number of jobs which should then run in parallel to completion. Ruffus is considerably more
+complicated because we have a variable number of jobs completing and being submitted into the job queue at any one time. Think
+of tasks stalling waiting for the dependent tasks to complete and then all the jobs of the task being released onto the queue</p>
+<p>The solution is</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Use a <tt class="docutils literal"><span class="pre">timeout</span></tt> parameter when using <tt class="docutils literal"><span class="pre">IMapIterator.next(timeout=None)</span></tt> to iterate through <tt class="docutils literal"><span class="pre">pool.imap_unordered</span></tt> because only timed <tt class="docutils literal"><span class="pre">condition</span></tt> s can be interruptible by signals...!!</li>
+<li>This involves rewriting the <tt class="docutils literal"><span class="pre">for</span></tt> loop manually as a <tt class="docutils literal"><span class="pre">while</span></tt> loop</li>
+<li>We use a timeout of <tt class="docutils literal"><span class="pre">99999999</span></tt>, i.e. 3 years, which should be enough for any job to complete...</li>
+<li>Googling after the fact, it looks like the galaxy guys (cool dudes or what) have written similar <a class="reference external" href="https://galaxy-dist.readthedocs.org/en/latest/_modules/galaxy/objectstore/s3_multipart_upload.html">code</a></li>
+<li><tt class="docutils literal"><span class="pre">next()</span></tt> for normal iterators do not take <tt class="docutils literal"><span class="pre">timeout</span></tt> as an extra parameter so we have to wrap next in a conditional :-(. The galaxy guys do a <a class="reference external" href="http://en.wikipedia.org/wiki/Shim_(computing)">shim</a> around <tt class="docutils literal"><span class="pre">next()</span></tt> but that is as much obsfucation as a simple if...</li>
+<li>After jobs are interrupted by a signal, we rethrow with our own exception because we want something that inherits from <tt class="docutils literal"><span class="pre">Exception</span></tt> unlike <tt class="docutils literal"><span class="pre">KeyboardInterrupt</span></tt></li>
+<li>When a signal happens, we need to immediately stop <tt class="docutils literal"><span class="pre">feed_job_params_to_process_pool()</span></tt> from sending more parameters into the job queue (<tt class="docutils literal"><span class="pre">parameter_q</span></tt>)
+We use a proxy to a <tt class="docutils literal"><span class="pre">multiprocessing.Event</span></tt> (via <tt class="docutils literal"><span class="pre">syncmanager.Event()</span></tt>). When <tt class="docutils literal"><span class="pre">death_event</span></tt> is set, all further processing stops...</li>
+<li>We also signal that all jobs should finish by putting <tt class="docutils literal"><span class="pre">all_tasks_complete()</span></tt> into <tt class="docutils literal"><span class="pre">parameter_q</span></tt> but only <tt class="docutils literal"><span class="pre">death_event</span></tt> prevents jobs already in the queue from going through</li>
+<li>Ater signalling, some of the child processes appear to be dead by the time we start cleaning up. <tt class="docutils literal"><span class="pre">pool.terminate()</span></tt> sometimes tries and fails to
+re-connect to the the <tt class="docutils literal"><span class="pre">death_event</span></tt> proxy via sockets and throws an exception. We should really figure out a better solution but in the meantime
+wrapping it in a <tt class="docutils literal"><span class="pre">try</span> <span class="pre">/</span> <span class="pre">except</span></tt> allows a clean exit.</li>
+<li>If a vanilla exception is raised without multiprocessing running, we still need to first save the exception in <tt class="docutils literal"><span class="pre">job_errors</span></tt> (even if it is just one) before
+cleaning up, because the cleaning up process may lead to further (ignored) exceptions which would overwrite the current exception when we need to rethrow it</li>
+</ol>
+</div></blockquote>
+<p>Exceptions thrown in the middle of a multiprocessing / multithreading job appear to be handled gracefully.</p>
+<p>For drmaa jobs, <tt class="docutils literal"><span class="pre">qdel</span></tt> may still be necessary.</p>
+</div></blockquote>
+</div>
+<div class="section" id="python3-compatability">
+<h2>Python3 compatability<a class="headerlink" href="#python3-compatability" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Required extensive changes especially in unit test code.</p>
+<p>Changes:</p>
+<ol class="arabic">
+<li><p class="first"><tt class="docutils literal"><span class="pre">sort</span></tt> in python3 does not order mixed types, i.e. <tt class="docutils literal"><span class="pre">int()</span></tt>, <tt class="docutils literal"><span class="pre">list()</span></tt> and <tt class="docutils literal"><span class="pre">str()</span></tt> are incommensurate</p>
+<ul>
+<li><p class="first">In <tt class="docutils literal"><span class="pre">task.get_output_files</span> <span class="pre">(...)</span></tt>, sort after conversion to string</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_filenames</span><span class="p">,</span> <span class="n">key</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">In <tt class="docutils literal"><span class="pre">file_name_parameters.py</span></tt>: <tt class="docutils literal"><span class="pre">collate_param_factory</span> <span class="pre">(...)</span></tt>, <tt class="docutils literal"><span class="pre">sort</span></tt> after conversion to string, then <tt class="docutils literal"><span class="pre">groupby</span></tt> without string conversion. This is
+because we can’t guarantee that two different objects do not have the same string representation. But <tt class="docutils literal"><span class="pre">groupby</span></tt> requires that similar things are adjacent...</p>
+<p>In other words, <tt class="docutils literal"><span class="pre">groupby</span></tt> is a refinement of <tt class="docutils literal"><span class="pre">sorted</span></tt></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">for</span> <span class="n">output_extra_params</span><span class="p">,</span> <span class="n">grouped_params</span> <span class="ow">in</span> <span class="n">groupby</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">io_params_iter</span><span class="p">,</span> <span class="n">key</span> <span class="o">=</span> <span class="n">get_output_extras_str</span><span cla [...]
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</li>
+</ul>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">print()</span></tt> is a function</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">items()</span></tt> only returns a list in python2. Rewrite <tt class="docutils literal"><span class="pre">dict.iteritems()</span></tt> whenever this might cause a performance bottleneck</p>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">zip</span></tt> and <tt class="docutils literal"><span class="pre">map</span></tt> return iterators. Conditionally import in python2</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">sys</span>
+<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">hexversion</span> <span class="o"><</span> <span class="mh">0x03000000</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">future_builtins</span> <span class="kn">import</span> <span class="nb">zip</span><span class="p">,</span> <span class="nb">map</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">cPickle->pickle</span></tt> <tt class="docutils literal"><span class="pre">CStringIO->io</span></tt> need to be conditionally imported</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">try</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">StringIO</span> <span class="kn">as</span> <span class="nn">io</span>
+<span class="k">except</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">io</span> <span class="kn">as</span> <span class="nn">io</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">map</span></tt> code can be changed to list comprehensions. Use <tt class="docutils literal"><span class="pre">2to3</span></tt> to do heavy lifting</p>
+</li>
+<li><p class="first">All normal strings are unicode in python3. Have to use <tt class="docutils literal"><span class="pre">bytes</span></tt> to support 8-bit char arrays.
+Normally, this means that <tt class="docutils literal"><span class="pre">str</span></tt> “just works”. However, to provide special handling of
+both 8-bit and unicode strings in python2, we often need to check for <tt class="docutils literal"><span class="pre">isinstance(xxx,</span> <span class="pre">basestring)</span></tt>.</p>
+<p>We need to conditionally define:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">hexversion</span> <span class="o">>=</span> <span class="mh">0x03000000</span><span class="p">:</span>
+ <span class="c"># everything is unicode in python3</span>
+ <span class="n">path_str_type</span> <span class="o">=</span> <span class="nb">str</span>
+<span class="k">else</span><span class="p">:</span>
+ <span class="n">path_str_type</span> <span class="o">=</span> <span class="nb">basestring</span>
+
+<span class="c"># further down...</span>
+<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">compiled_regex</span><span class="p">,</span> <span class="n">path_str_type</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="refactoring-parameter-handling">
+<h2>Refactoring: parameter handling<a class="headerlink" href="#refactoring-parameter-handling" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>Though the code is still split in a not very sensible way between <tt class="docutils literal"><span class="pre">ruffus_utility.py</span></tt>, <tt class="docutils literal"><span class="pre">file_name_parameters.py</span></tt> and <tt class="docutils literal"><span class="pre">task.py</span></tt>,</dt>
+<dd>some rationalisation has taken place, and comments added so further refactoring can be made more easily.</dd>
+</dl>
+<p>Common code for:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">file_name_parameters</span><span class="o">.</span><span class="n">split_ex_param_factory</span><span class="p">()</span>
+<span class="n">file_name_parameters</span><span class="o">.</span><span class="n">transform_param_factory</span><span class="p">()</span>
+<span class="n">file_name_parameters</span><span class="o">.</span><span class="n">collate_param_factory</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>has been moved to <tt class="docutils literal"><span class="pre">file_name_parameters.py.yield_io_params_per_job()</span></tt></p>
+<p>unit tests added to <tt class="docutils literal"><span class="pre">test_file_name_parameters.py</span></tt> and <tt class="docutils literal"><span class="pre">test_ruffus_utility.py</span></tt></p>
+</div></blockquote>
+</div>
+<div class="section" id="formatter">
+<h2><tt class="docutils literal"><span class="pre">formatter</span></tt><a class="headerlink" href="#formatter" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">get_all_paths_components(paths,</span> <span class="pre">regex_str)</span></tt> in <tt class="docutils literal"><span class="pre">ruffus_utility.py</span></tt></p>
+<p>Input files names are first squished into a flat list of files.
+<tt class="docutils literal"><span class="pre">get_all_paths_components()</span></tt> returns both the regular expression matches and the break down of the path.</p>
+<p>In case of name clashes, the classes with higher priority override:</p>
+<blockquote>
+<div><ol class="arabic">
+<li><p class="first">Captures by name</p>
+</li>
+<li><p class="first">Captures by index</p>
+</li>
+<li><dl class="first docutils">
+<dt>Path components:</dt>
+<dd><p class="first last">‘ext’ = extension with dot
+‘basename’ = file name without extension
+‘path’ = path before basename, not ending with slash
+‘subdir’ = list of directories starting with the most nested and ending with the root (if normalised)
+‘subpath’ = list of ‘path’ with successive directories removed starting with the most nested and ending with the root (if normalised)</p>
+</dd>
+</dl>
+</li>
+</ol>
+<p>E.g. <tt class="docutils literal"><span class="pre">name</span> <span class="pre">=</span> <span class="pre">'/a/b/c/sample1.bam'</span></tt>, <tt class="docutils literal"><span class="pre">formatter=r"(.*)(?P<id>\d+)\.(.+)")</span></tt> returns:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="mi">0</span><span class="p">:</span> <span class="s">'/a/b/c/sample1.bam'</span><span class="p">,</span> <span class="o">//</span> <span class="n">Entire</span> <span class="n">match</span> <span class="n">captured</span> <span class="n">by</span> <span class="n">index</span>
+<span class="mi">1</span><span class="p">:</span> <span class="s">'/a/b/c/sample'</span><span class="p">,</span> <span class="o">//</span> <span class="n">captured</span> <span class="n">by</span> <span class="n">index</span>
+<span class="mi">2</span><span class="p">:</span> <span class="s">'bam'</span><span class="p">,</span> <span class="o">//</span> <span class="n">captured</span> <span class="n">by</span> <span class="n">index</span>
+<span class="s">'id'</span><span class="p">:</span> <span class="s">'1'</span> <span class="o">//</span> <span class="n">captured</span> <span class="n">by</span> <span class="n">name</span>
+<span class="s">'ext'</span><span class="p">:</span> <span class="s">'.bam'</span><span class="p">,</span>
+<span class="s">'subdir'</span><span class="p">:</span> <span class="p">[</span><span class="s">'c'</span><span class="p">,</span> <span class="s">'b'</span><span class="p">,</span> <span class="s">'a'</span><span class="p">,</span> <span class="s">'/'</span><span class="p">],</span>
+<span class="s">'subpath'</span><span class="p">:</span> <span class="p">[</span><span class="s">'/a/b/c'</span><span class="p">,</span> <span class="s">'/a/b'</span><span class="p">,</span> <span class="s">'/a'</span><span class="p">,</span> <span class="s">'/'</span><span class="p">],</span>
+<span class="s">'path'</span><span class="p">:</span> <span class="s">'/a/b/c'</span><span class="p">,</span>
+<span class="s">'basename'</span><span class="p">:</span> <span class="s">'sample1'</span><span class="p">,</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The code is in <tt class="docutils literal"><span class="pre">ruffus_utility.py</span></tt>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">results</span> <span class="o">=</span> <span class="n">get_all_paths_components</span><span class="p">(</span><span class="n">paths</span><span class="p">,</span> <span class="n">regex_str</span><span class="p">)</span>
+<span class="n">string</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>All the magic is hidden inside black boxes <tt class="docutils literal"><span class="pre">filename_transform</span></tt> classes:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">class</span> <span class="nc">t_suffix_filename_transform</span><span class="p">(</span><span class="n">t_filename_transform</span><span class="p">):</span>
+<span class="k">class</span> <span class="nc">t_regex_filename_transform</span><span class="p">(</span><span class="n">t_filename_transform</span><span class="p">):</span>
+<span class="k">class</span> <span class="nc">t_format_filename_transform</span><span class="p">(</span><span class="n">t_filename_transform</span><span class="p">):</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="section" id="formatter-regex-and-suffix">
+<h3><tt class="docutils literal"><span class="pre">formatter()</span></tt>: <tt class="docutils literal"><span class="pre">regex()</span></tt> and <tt class="docutils literal"><span class="pre">suffix()</span></tt><a class="headerlink" href="#formatter-regex-and-suffix" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The previous behaviour with regex() where mismatches fail even if no substitution is made is retained by the use of <tt class="docutils literal"><span class="pre">re.subn()</span></tt>.
+This is a corner case but I didn’t want user code to break</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># filter on ".txt"</span>
+<span class="n">input_filenames</span> <span class="o">=</span> <span class="p">[</span><span class="s">"a.wrong"</span><span class="p">,</span> <span class="s">"b.txt"</span><span class="p">]</span>
+<span class="n">regex</span><span class="p">(</span><span class="s">"(.txt)$"</span><span class="p">)</span>
+
+<span class="c"># fails, no substitution possible</span>
+<span class="s">r"\1"</span>
+
+<span class="c"># fails anyway even through regular expression matches not referenced...</span>
+<span class="s">r"output.filename"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="product">
+<h2>@product()<a class="headerlink" href="#product" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Use combinatoric generators from itertools and keep that naming scheme</li>
+<li>Put all new generators in an <tt class="docutils literal"><span class="pre">combinatorics</span></tt> submodule namespace to avoid breaking user code. (They can imported if necessary.)</li>
+<li>test code in test/test_combinatorics.py</li>
+<li>The <tt class="docutils literal"><span class="pre">itertools.product(repeat)</span></tt> parameter doesn’t make sense for Ruffus and will not be used</li>
+<li>Flexible number of pairs of <tt class="docutils literal"><span class="pre">task</span></tt> / <tt class="docutils literal"><span class="pre">glob</span></tt> / file names + <tt class="docutils literal"><span class="pre">formatter()</span></tt></li>
+<li>Only <tt class="docutils literal"><span class="pre">formatter([OPTIONAl_REGEX])</span></tt> provides the necessary flexibility to construct the output so we won’t bother with suffix and regex</li>
+<li>Similar to <tt class="docutils literal"><span class="pre">@transform</span></tt> but with extra level of nested-ness</li>
+</ul>
+<dl class="docutils">
+<dt>Retain same code for <tt class="docutils literal"><span class="pre">@product</span></tt> and <tt class="docutils literal"><span class="pre">@transform</span></tt> by adding an additional level of indirection:</dt>
+<dd><ul class="first last">
+<li><p class="first">generator wrap around <tt class="docutils literal"><span class="pre">get_strings_in_nested_sequence</span></tt> to convert nested input parameters either to a single flat list of file names or to nested lists of file names</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">file_name_parameters</span><span class="o">.</span><span class="n">input_param_to_file_name_list</span> <span class="p">(</span><span class="n">input_params</span><span class="p">)</span>
+<span class="n">file_name_parameters</span><span class="o">.</span><span class="n">list_input_param_to_file_name_list</span> <span class="p">(</span><span class="n">input_params</span><span class="p">)</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">t_file_names_transform</span></tt> class which stores a list of regular expressions, one for each <tt class="docutils literal"><span class="pre">formatter()</span></tt> object corresponding to a single set of input parameters</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">t_formatter_file_names_transform</span>
+<span class="n">t_nested_formatter_file_names_transform</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">string substitution functions which will apply a list of <tt class="docutils literal"><span class="pre">formatter</span></tt> changes</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">ruffus</span><span class="o">.</span><span class="n">utility</span><span class="o">.</span><span class="n">t_formatter_replace</span><span class="p">()</span>
+<span class="n">ruffus</span><span class="o">.</span><span class="n">utility</span><span class="o">.</span><span class="n">t_nested_formatter_replace</span><span class="p">()</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first"><tt class="docutils literal"><span class="pre">ruffus_uilility.swap_doubly_nested_order()</span></tt> makes the syntax / implementation very orthogonal</p>
+</li>
+</ul>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="permutations-combinations-combinations-with-replacement">
+<h2><tt class="docutils literal"><span class="pre">@permutations(...),</span></tt> <tt class="docutils literal"><span class="pre">@combinations(...),</span></tt> <tt class="docutils literal"><span class="pre">@combinations_with_replacement(...)</span></tt><a class="headerlink" href="#permutations-combinations-combinations-with-replacement" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Similar to <tt class="docutils literal"><span class="pre">@product</span></tt> extra level of nested-ness is self versus self</p>
+<dl class="docutils">
+<dt>Retain same code for <tt class="docutils literal"><span class="pre">@product</span></tt></dt>
+<dd><ul class="first last simple">
+<li>forward to a sinble <tt class="docutils literal"><span class="pre">file_name_parameters.combinatorics_param_factory()</span></tt></li>
+<li>use <tt class="docutils literal"><span class="pre">combinatorics_type</span></tt> to dispatch to <tt class="docutils literal"><span class="pre">combinatorics.permutations</span></tt>, <tt class="docutils literal"><span class="pre">combinatorics.combinations</span></tt> and <tt class="docutils literal"><span class="pre">combinatorics.combinations_with_replacement</span></tt></li>
+<li>use <tt class="docutils literal"><span class="pre">list_input_param_to_file_name_list</span></tt> from <tt class="docutils literal"><span class="pre">file_name_parameters.product_param_factory()</span></tt></li>
+</ul>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="drmaa-alternatives">
+<h2>drmaa alternatives<a class="headerlink" href="#drmaa-alternatives" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Alternative, non-drmaa polling code at</p>
+<p><a class="reference external" href="https://github.com/bjpop/rubra/blob/master/rubra/cluster_job.py">https://github.com/bjpop/rubra/blob/master/rubra/cluster_job.py</a></p>
+</div></blockquote>
+</div>
+<div class="section" id="task-completion-monitoring">
+<h2>Task completion monitoring<a class="headerlink" href="#task-completion-monitoring" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="how-easy-is-it-to-abstract-out-the-database">
+<h3>How easy is it to abstract out the database?<a class="headerlink" href="#how-easy-is-it-to-abstract-out-the-database" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul>
+<li><dl class="first docutils">
+<dt>The database is Jacob Sondergaard’s <tt class="docutils literal"><span class="pre">dbdict</span></tt> which is a nosql / key-value store wrapper around sqlite</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">job_history</span> <span class="o">=</span> <span class="n">dbdict</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">RUFFUS_HISTORY_FILE</span><span class="p">,</span> <span class="n">picklevalues</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+<li><p class="first">The key is the output file name, so it is important not to confuse Ruffus by having different tasks generate the same output file!</p>
+</li>
+<li><p class="first">Is it possible to abstract this so that <strong>jobs</strong> get timestamped as well?</p>
+</li>
+<li><p class="first">If we should ever want to abstract out <tt class="docutils literal"><span class="pre">dbdict</span></tt>, we need to have a similar key-value store class,
+and make sure that a single instance of <tt class="docutils literal"><span class="pre">dbdict</span></tt> is used through <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> which is passed up
+and down the function call chain. <tt class="docutils literal"><span class="pre">dbdict</span></tt> would then be drop-in replaceable by our custom (e.g. flat-file-based) dbdict alternative.</p>
+</li>
+</ul>
+<p>To peek into the database:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="nv">$ </span>sqlite3 .ruffus_history.sqlite
+sqlite> .tables
+data
+sqlite> .schema data
+CREATE TABLE data <span class="o">(</span>key PRIMARY KEY,value<span class="o">)</span>;
+sqlite> <span class="k">select </span>key from data order by key;
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="can-we-query-the-database-get-job-history-stats">
+<h3>Can we query the database, get Job history / stats?<a class="headerlink" href="#can-we-query-the-database-get-job-history-stats" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>Yes, if we write a function to read and dump the entire database but this is only useful with timestamps and task names. See below</div></blockquote>
+</div>
+<div class="section" id="what-are-the-run-time-performance-implications">
+<h3>What are the run time performance implications?<a class="headerlink" href="#what-are-the-run-time-performance-implications" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>Should be fast: a single db connection is created and used inside <tt class="docutils literal"><span class="pre">pipeline_run</span></tt>, <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt>, <tt class="docutils literal"><span class="pre">pipeline_printout_graph</span></tt></div></blockquote>
+</div>
+<div class="section" id="avoid-pauses-between-tasks">
+<h3>Avoid pauses between tasks<a class="headerlink" href="#avoid-pauses-between-tasks" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Allows Ruffus to avoid adding an extra 1 second pause between tasks to guard against file systems with low timestamp granularity.</p>
+<blockquote>
+<div><ul class="simple">
+<li>If the local file time looks to be in sync with the underlying file system, saved system time is used instead of file timestamps</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="mkdir">
+<h2><tt class="docutils literal"><span class="pre">@mkdir(...),</span></tt><a class="headerlink" href="#mkdir" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">mkdir</span></tt> continues to work seamlessly inside <tt class="docutils literal"><span class="pre">@follows</span></tt>) but also as its own decorator <tt class="docutils literal"><span class="pre">@mkdir</span></tt> due to the original happy orthogonal design</li>
+<li>fixed bug in checking so that Ruffus does’t blow up if non strings are in the output (number...)</li>
+<li>note: adding the decorator to a previously undecorated function might have unintended consequences. The undecorated function turns into a zombie.</li>
+<li>fixed ugly bug in <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt> for printing single line output</li>
+<li>fixed description and printout indent</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Implementation Tips</a><ul>
+<li><a class="reference internal" href="#release">Release</a></li>
+<li><a class="reference internal" href="#dbdict-py">dbdict.py</a></li>
+<li><a class="reference internal" href="#how-to-write-new-decorators">how to write new decorators</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#implementation-notes">Implementation notes</a><ul>
+<li><a class="reference internal" href="#ctrl-c-handling"><tt class="docutils literal"><span class="pre">Ctrl-C</span></tt> handling</a></li>
+<li><a class="reference internal" href="#python3-compatability">Python3 compatability</a></li>
+<li><a class="reference internal" href="#refactoring-parameter-handling">Refactoring: parameter handling</a></li>
+<li><a class="reference internal" href="#formatter"><tt class="docutils literal"><span class="pre">formatter</span></tt></a><ul>
+<li><a class="reference internal" href="#formatter-regex-and-suffix"><tt class="docutils literal"><span class="pre">formatter()</span></tt>: <tt class="docutils literal"><span class="pre">regex()</span></tt> and <tt class="docutils literal"><span class="pre">suffix()</span></tt></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#product">@product()</a></li>
+<li><a class="reference internal" href="#permutations-combinations-combinations-with-replacement"><tt class="docutils literal"><span class="pre">@permutations(...),</span></tt> <tt class="docutils literal"><span class="pre">@combinations(...),</span></tt> <tt class="docutils literal"><span class="pre">@combinations_with_replacement(...)</span></tt></a></li>
+<li><a class="reference internal" href="#drmaa-alternatives">drmaa alternatives</a></li>
+<li><a class="reference internal" href="#task-completion-monitoring">Task completion monitoring</a><ul>
+<li><a class="reference internal" href="#how-easy-is-it-to-abstract-out-the-database">How easy is it to abstract out the database?</a></li>
+<li><a class="reference internal" href="#can-we-query-the-database-get-job-history-stats">Can we query the database, get Job history / stats?</a></li>
+<li><a class="reference internal" href="#what-are-the-run-time-performance-implications">What are the run time performance implications?</a></li>
+<li><a class="reference internal" href="#avoid-pauses-between-tasks">Avoid pauses between tasks</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#mkdir"><tt class="docutils literal"><span class="pre">@mkdir(...),</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="todo.html"
+ title="previous chapter">Future Changes to Ruffus</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="faq.html"
+ title="next chapter">FAQ</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/implementation_notes.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="faq.html" title="FAQ"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="todo.html" title="Future Changes to Ruffus"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/index.html b/doc/_build/html/index.html
new file mode 100644
index 0000000..45249e7
--- /dev/null
+++ b/doc/_build/html/index.html
@@ -0,0 +1,313 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="#" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ Ruffus v. 2.5
+ <li><a href="#">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+
+<p>
+<a href="why_ruffus.html#why-ruffus"> <img src="_images/logo.jpg" alt="logo"></a>
+</p>
+
+
+Ruffus is a Computation Pipeline library for python. It is open-sourced,
+powerful and user-friendly, and widely used in science and bioinformatics.
+
+<h1>Welcome</h1>
+
+<table class="Introduction" align="center" style="margin-left: 30px">
+<tr>
+ <td width="50%">
+ <p><I>Ruffus</I> is designed to allow scientific and other analyses to
+ be automated with the minimum of fuss and the least effort.</p>
+
+ These are <I>Ruffus</I>'s strengths:
+ <ul>
+ <li><b>Lightweight</b>: Suitable for the simplest of tasks</li>
+
+ <li><b>Scalable</b>: Handles even fiendishly complicated pipelines</a>
+ which would cause <i>make</i> or <i>scons</i> to go cross-eyed and recursive.</li>
+
+ <li><b>Standard python</b>: No "clever magic", no pre-processing. </li>
+ <li><b>Unintrusive</b>: Unambitious, lightweight syntax which tries to do this
+ one small thing well. </li>
+ </ul>
+
+ <p>Please join me (email: ruffus_lib at llew.org.uk) in setting the direction of
+ this project if you are interested.
+ </p>
+
+ </td>
+
+ <td width="50%">
+
+ <img src="_images/front_page_flowchart.png" alt="flowchart">
+
+ </td>
+
+</tr>
+</table>
+
+
+
+
+
+
+<h1> Documentation</h1>
+
+<table class="contentstable" align="center" style="margin-left: 30px">
+<tr>
+ <td width="50%">
+
+ <p class="biglink"><a class="biglink"
+ href="installation.html">Download</a><br/>
+ <span class="linkdescr">to install <I>Ruffus</I></span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="tutorials/new_tutorial/introduction.html">Simple Tutorial</a><br/>
+ <span class="linkdescr">Start here for a quick introduction to <i>Ruffus</i></span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="tutorials/new_tutorial/manual_contents.html">Manual</a>
+ <a class="biglink" href="_downloads/ruffus.pdf">(pdf)</a><br/>
+ <span class="linkdescr">for an-depth demonstration of all
+ <I>Ruffus</I> features</span></p>
+
+ </td>
+
+ <td width="50%">
+
+
+ <p class="biglink"><a class="biglink"
+ href="contents.html">Table of contents</a><br/>
+ <span class="linkdescr">for an complete listing of all the documentation</span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="faq.html">Frequently Answered Questions</a><br/>
+ <span class="linkdescr">for any common problems, clever solutions from the community</span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="design.html">Design</a><br/>
+ <span class="linkdescr">to understand the design of <I>Ruffus</I></span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="cheatsheet.html">Cheat Sheet</a><br/>
+ <span class="linkdescr">for Ruffus syntax</span></p>
+ </td>
+
+</tr>
+</table>
+
+
+
+
+<h2>Get <I>Ruffus</I></h2>
+
+<p>
+ <i>Ruffus</i> is available as an <a
+ href="http://peak.telecommunity.com/DevCenter/EasyInstall">easy-install</a>able package on the <a href="http://pypi.python.org/pypi/ruffus">Python Package
+ Index</a>.
+ </p>
+Just run:
+
+<div style="margin-left: 30px" class="highlight-python"><pre>sudo pip install ruffus --upgrade</pre> or
+</div>
+
+
+<div style="margin-left: 30px" class="highlight-python"><pre>easy_install -U ruffus</pre>
+</div>
+
+<br>
+<p>
+The very latest (in development) code can be obtained via <a href="https://code.google.com/p/ruffus/source/checkout"> git </a>:
+ <div style="color: #ff0000" >
+ <pre>git clone https://bunbun68@code.google.com/p/ruffus/</pre>
+</div>
+</p>
+
+<h1>Feedback and Getting Involved:</h1>
+<ul>
+ <li>
+ The <I>Ruffus</I> project is hosted with <b><a
+ href="http://code.google.com/p/ruffus">Google
+ Code here.</a></b><br><br>
+ </li>
+
+ <li>
+ <img src="https://groups.google.com/forum/my-groups-color.png"
+ height=30 alt="Google Groups"> <b>Subscribe to the <a href="https://groups.google.com/forum/#!forum/ruffus_discuss"><i>ruffus_discuss</i> mailing list </a></b>
+ <br>
+ <form action="http://groups.google.com/group/ruffus_discuss/boxsubscribe">
+ Email: <input type=text name=email>
+ <input type=submit name="sub" value="Subscribe">
+ </form><br>
+ </li>
+
+
+ <li>
+ <a href="http://groups.google.com/group/ruffus_discuss">Check out the mailing list without subscribing here.</a>
+ <br>
+ </li>
+
+ <li>
+ <a href="http://code.google.com/p/ruffus/issues/list">Bugs or feature requests can be posted here.</a>
+ <br>
+ </li>
+
+
+</ul>
+
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ Ruffus v. 2.5
+ <li><a href="#">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/installation.html b/doc/_build/html/installation.html
new file mode 100644
index 0000000..5415eaa
--- /dev/null
+++ b/doc/_build/html/installation.html
@@ -0,0 +1,265 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Installation — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Design & Architecture" href="design.html" />
+ <link rel="prev" title="drmaa functions" href="drmaa_wrapper_functions.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="design.html" title="Design & Architecture"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="drmaa_wrapper_functions.html" title="drmaa functions"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="#">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="installation">
+<span id="id1"></span><h1>Installation<a class="headerlink" href="#installation" title="Permalink to this headline">¶</a></h1>
+<p><tt class="xref py py-mod docutils literal"><span class="pre">Ruffus</span></tt> is a lightweight python module for building computational pipelines.</p>
+<div class="section" id="the-easy-way">
+<h2>The easy way<a class="headerlink" href="#the-easy-way" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><em>Ruffus</em> is available as an
+<a class="reference external" href="http://peak.telecommunity.com/DevCenter/EasyInstall">easy-install</a> -able package
+on the <a class="reference external" href="http://pypi.python.org/pypi/Sphinx">Python Package Index</a>.</p>
+<div class="highlight-python"><pre>sudo pip install ruffus --upgrade</pre>
+</div>
+<p>This may also work for older installations</p>
+<ol class="arabic">
+<li><p class="first">Install setuptools:</p>
+<div class="highlight-python"><pre>wget peak.telecommunity.com/dist/ez_setup.py
+sudo python ez_setup.py</pre>
+</div>
+</li>
+<li><p class="first">Install <em>Ruffus</em> automatically:</p>
+<div class="highlight-python"><pre>easy_install -U ruffus</pre>
+</div>
+</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="the-most-up-to-date-code">
+<h2>The most up-to-date code:<a class="headerlink" href="#the-most-up-to-date-code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first"><a class="reference external" href="https://pypi.python.org/pypi/ruffus">Download the latest sources</a> or</p>
+</li>
+<li><p class="first">Check out the latest code from Google using git:</p>
+<div class="highlight-python"><pre>git clone https://bunbun68@code.google.com/p/ruffus/ .</pre>
+</div>
+</li>
+<li><p class="first">Bleeding edge Ruffus development takes place on github:</p>
+<div class="highlight-python"><pre>git clone git at github.com:bunbun/ruffus.git .</pre>
+</div>
+</li>
+<li><p class="first">To install after downloading, change to the , type:</p>
+<div class="highlight-python"><pre>python ./setup.py install</pre>
+</div>
+</li>
+</ul>
+</div></blockquote>
+<div class="section" id="graphical-flowcharts">
+<h3>Graphical flowcharts<a class="headerlink" href="#graphical-flowcharts" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><strong>Ruffus</strong> relies on the <tt class="docutils literal"><span class="pre">dot</span></tt> programme from <a class="reference external" href="http://www.graphviz.org/">Graphviz</a>
+(“Graph visualisation”) to make pretty flowchart representations of your pipelines in multiple
+graphical formats (e.g. <tt class="docutils literal"><span class="pre">png</span></tt>, <tt class="docutils literal"><span class="pre">jpg</span></tt>). The crossplatform Graphviz package can be
+<a class="reference external" href="http://www.graphviz.org/Download.php">downloaded here</a> for Windows,
+Linux, Macs and Solaris. Some Linux
+distributions may include prebuilt packages.</p>
+<dl class="docutils">
+<dt>For Fedora, try</dt>
+<dd><div class="first last highlight-python"><pre>yum list 'graphviz*'</pre>
+</div>
+</dd>
+<dt>For ubuntu / Debian, try</dt>
+<dd><div class="first last highlight-python"><pre>sudo apt-get install graphviz</pre>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Installation</a><ul>
+<li><a class="reference internal" href="#the-easy-way">The easy way</a></li>
+<li><a class="reference internal" href="#the-most-up-to-date-code">The most up-to-date code:</a><ul>
+<li><a class="reference internal" href="#graphical-flowcharts">Graphical flowcharts</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="drmaa_wrapper_functions.html"
+ title="previous chapter">drmaa functions</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="design.html"
+ title="next chapter">Design & Architecture</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/installation.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="design.html" title="Design & Architecture"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="drmaa_wrapper_functions.html" title="drmaa functions"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="#">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/objects.inv b/doc/_build/html/objects.inv
new file mode 100644
index 0000000..6ca5a43
Binary files /dev/null and b/doc/_build/html/objects.inv differ
diff --git a/doc/_build/html/pipeline_functions.html b/doc/_build/html/pipeline_functions.html
new file mode 100644
index 0000000..a1bd380
--- /dev/null
+++ b/doc/_build/html/pipeline_functions.html
@@ -0,0 +1,991 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Pipeline functions — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="drmaa functions" href="drmaa_wrapper_functions.html" />
+ <link rel="prev" title="Cheat Sheet" href="cheatsheet.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="drmaa_wrapper_functions.html" title="drmaa functions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="cheatsheet.html" title="Cheat Sheet"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <p id="pipeline-functions">See <a class="reference internal" href="decorators/decorators.html#decorators"><em>Decorators</em></a> for more decorators</p>
+<div class="section" id="id1">
+<h1>Pipeline functions<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p>There are only four functions for <strong>Ruffus</strong> pipelines:</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="#pipeline-functions-pipeline-run"><cite>pipeline_run</cite></a> executes a pipeline</li>
+<li><a class="reference internal" href="#pipeline-functions-pipeline-printout"><cite>pipeline_printout</cite></a> prints a list of tasks and jobs which will be run in a pipeline</li>
+<li><a class="reference internal" href="#pipeline-functions-pipeline-printout-graph"><cite>pipeline_printout_graph</cite></a> prints a schematic flowchart of pipeline tasks in various graphical formats</li>
+<li><a class="reference internal" href="#pipeline-functions-pipeline-get-task-names"><cite>pipeline_get_task_names</cite></a> returns a list of all task names in the pipeline</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<span class="target" id="pipeline-functions-pipeline-run"></span><div class="section" id="index-0">
+<span id="id2"></span><h2><em>pipeline_run</em><a class="headerlink" href="#index-0" title="Permalink to this headline">¶</a></h2>
+<p><strong>pipeline_run</strong> ( <a class="reference internal" href="#pipeline-functions-pipeline-run-target-tasks"><cite>target_tasks</cite></a> = [], <a class="reference internal" href="#pipeline-functions-pipeline-run-forcedtorun-tasks"><cite>forcedtorun_tasks</cite></a> = [], <a class="reference internal" href="#pipeline-functions-pipeline-run-multiprocess"><cite>multiprocess</cite></a> = 1, <a class="reference internal" href="#pipeline-functions-pipeline-run-logger"><cite>logger< [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div>Runs all specified pipelined functions if they or any antecedent tasks are
+incomplete or out-of-date.</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># Run task2 whatever its state, and also task1 and antecedents if they are incomplete</span>
+<span class="c"># Do not log pipeline progress messages to stderr</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">task1</span><span class="p">,</span> <span class="n">task2</span><span class="p">],</span> <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">task2</span><span class="p">],</span> <span class="n">logger</span> <span class="o">=</span> <span class="n">blackhole_logger</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-target-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>target_tasks</em></dt>
+<dd><p class="first last">Pipeline functions and any necessary antecedents (specified implicitly or with <a class="reference internal" href="decorators/follows.html#decorators-follows"><em>@follows</em></a>)
+which should be invoked with the appropriate parameters if they are incomplete or out-of-date.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-forcedtorun-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>forcedtorun_tasks</em></dt>
+<dd><p class="first last">Optional. These pipeline functions will be invoked regardless of their state.
+Any antecedents tasks will also be executed if they are out-of-date or incomplete.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-multiprocess">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>multiprocess</em></dt>
+<dd><p class="first last">Optional. The number of processes which should be dedicated to running in parallel independent
+tasks and jobs within each task. If <tt class="docutils literal"><span class="pre">multiprocess</span></tt> is set to 1, the pipeline will
+execute in the main process.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-multithread">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>multithread</em></dt>
+<dd><p class="first last">Optional. The number of threads which should be dedicated to running in parallel independent
+tasks and jobs within each task. Should be used only with drmaa. Otherwise the CPython <a class="reference external" href="https://wiki.python.org/moin/GlobalInterpreterLock">global interpreter lock (GIL)</a>
+will slow down your pipeline</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-logger">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>logger</em></dt>
+<dd><p class="first last">For logging messages indicating the progress of the pipeline in terms of tasks and jobs.
+Defaults to outputting to sys.stderr.
+Setting <tt class="docutils literal"><span class="pre">logger=blackhole_logger</span></tt> will prevent any logging output.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-gnu-make">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>gnu_make_maximal_rebuild_mode</em></dt>
+<dd><div class="first admonition warning">
+<p class="first admonition-title">Warning</p>
+<p class="last">This is a dangerous option. Use rarely and with caution</p>
+</div>
+<p>Optional parameter governing how <strong>Ruffus</strong> determines which part of the pipeline is
+out of date and needs to be re-run. If set to <tt class="docutils literal"><span class="pre">False</span></tt>, <strong>ruffus</strong> will work back
+from the <tt class="docutils literal"><span class="pre">target_tasks</span></tt> and only execute the pipeline after the first up-to-date
+tasks that it encounters. For example, if there are four tasks:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># task1 -> task2 -> task3 -> task4 -> task5</span>
+<span class="c">#</span>
+<span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">task5</span><span class="p">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>If <tt class="docutils literal"><span class="pre">task3()</span></tt> is up-to-date, then only <tt class="docutils literal"><span class="pre">task4()</span></tt> and <tt class="docutils literal"><span class="pre">task5()</span></tt> will be run.
+This will be the case even if <tt class="docutils literal"><span class="pre">task2()</span></tt> and <tt class="docutils literal"><span class="pre">task1()</span></tt> are incomplete.</p>
+<p class="last">This allows you to remove all intermediate results produced by <tt class="docutils literal"><span class="pre">task1</span> <span class="pre">-></span> <span class="pre">task3</span></tt>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-verbose">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>verbose</em></dt>
+<dd><p class="first">Optional parameter indicating the verbosity of the messages sent to <tt class="docutils literal"><span class="pre">logger</span></tt>:
+(Defaults to level 1 if unspecified)</p>
+<ul class="simple">
+<li>level <strong>0</strong> : <em>nothing</em></li>
+<li>level <strong>1</strong> : <em>Out-of-date Task names</em></li>
+<li>level <strong>2</strong> : <em>All Tasks (including any task function docstrings)</em></li>
+<li>level <strong>3</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, no explanation</em></li>
+<li>level <strong>4</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings</em></li>
+<li>level <strong>5</strong> : <em>All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)</em></li>
+<li>level <strong>6</strong> : <em>All jobs in All Tasks whether out of date or not</em></li>
+<li>level <strong>10</strong>: <em>logs messages useful only for debugging ruffus pipeline code</em></li>
+</ul>
+<p class="last"><tt class="docutils literal"><span class="pre">verbose</span> <span class="pre">>=</span> <span class="pre">10</span></tt> are intended for debugging <strong>Ruffus</strong> by the developers and the details
+are liable to change from release to release</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-runtime-data">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>runtime_data</em></dt>
+<dd><p class="first last">Experimental feature for passing data to tasks at run time</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-one-second-per-job">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>one_second_per_job</em></dt>
+<dd><p class="first last">To work around poor file timepstamp resolution for some file systems.
+Defaults to True if checksum_level is 0 forcing Tasks to take a minimum of 1 second to complete.
+If your file system has coarse grained time stamps, you can turn on this delay
+by setting <em>one_second_per_job</em> to <tt class="docutils literal"><span class="pre">True</span></tt></p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-touch-files-only">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>touch_files_only</em></dt>
+<dd><p class="first">Create or update output files only to simulate the running of the pipeline.
+Does not invoke real task functions to run jobs. This is most useful to force a
+pipeline to acknowledge that a particular part is now up-to-date.</p>
+<p class="last">This will not work properly if the identities of some files are not known before hand,
+and depend on run time. In other words, not recommended if <tt class="docutils literal"><span class="pre">@split</span></tt> or custom parameter generators are being used.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-exceptions-terminate-immediately">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>exceptions_terminate_immediately</em></dt>
+<dd><p class="first last">Exceptions cause immediate termination of the pipeline.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-log-exceptions">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>log_exceptions</em></dt>
+<dd><p class="first last">Print exceptions to the logger as soon as they occur.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-history-file">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>history_file</em></dt>
+<dd><p class="first last">The database file which stores checksums and file timestamps for input/output files.
+Defaults to <tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt> if unspecified</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-checksum-level">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>checksum_level</em></dt>
+<dd><p class="first">Several options for checking up-to-dateness are available: Default is level 1.</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>level 0 : Use only file timestamps</li>
+<li>level 1 : above, plus timestamp of successful job completion</li>
+<li>level 2 : above, plus a checksum of the pipeline function body</li>
+<li>level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+</ul>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-run-verbose-abbreviated-path">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>verbose_abbreviated_path</em></dt>
+<dd><p class="first">Whether input and output paths are abbreviated. Defaults to 2 if unspecified</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>level 0: The full (expanded, abspath) input or output path</li>
+<li>level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with <tt class="docutils literal"><span class="pre">[,,,]/</span></tt></li>
+<li>level < 0: Input / Output parameters are truncated to <tt class="docutils literal"><span class="pre">MMM</span></tt> letters where <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span> <span class="pre">==-MMM</span></tt>. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by <tt class="docutils literal"><span class="pre"><???></span></tt></li>
+</ul>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<span class="target" id="pipeline-functions-pipeline-printout"></span></div>
+<div class="section" id="index-1">
+<span id="id3"></span><h2><em>pipeline_printout</em><a class="headerlink" href="#index-1" title="Permalink to this headline">¶</a></h2>
+<p><strong>pipeline_printout</strong> (<a class="reference internal" href="#pipeline-functions-pipeline-printout-output-stream"><cite>output_stream</cite></a> = sys.stdout, <a class="reference internal" href="#pipeline-functions-pipeline-printout-target-tasks"><cite>target_tasks</cite></a> = [], <a class="reference internal" href="#pipeline-functions-pipeline-printout-forcedtorun-tasks"><cite>forcedtorun_tasks</cite></a> = [], <a class="reference internal" href="#pipeline-functions-pipel [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div>Prints out all the pipelined functions which will be invoked given specified <tt class="docutils literal"><span class="pre">target_tasks</span></tt>
+without actually running the pipeline. Because this is a simulation, some of the job
+parameters may be incorrect. For example, the results of a <a class="reference internal" href="tutorials/new_tutorial/split.html#new-manual-split"><em>@split</em></a>
+operation is not predetermined and will only be known after the pipelined function
+splits up the original data. Parameters of all downstream pipelined functions will
+be changed depending on this initial operation.</div></blockquote>
+<dl class="docutils">
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># Simulate running task2 whatever its state, and also task1 and antecedents</span>
+<span class="c"># if they are incomplete</span>
+<span class="c"># Print out results to STDOUT</span>
+<span class="c">#</span>
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">task1</span><span class="p">,</span> <span class="n">task2</span><span class="p">],</span> <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">task2</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=< [...]
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-output-stream">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_stream</em></dt>
+<dd><p class="first last">Where to printout the results of simulating the running of the pipeline.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-target-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>target_tasks</em></dt>
+<dd><p class="first last">As in <a class="reference internal" href="#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>: Pipeline functions and any necessary antecedents (specified implicitly or with <a class="reference internal" href="decorators/follows.html#decorators-follows"><em>@follows</em></a>)
+which should be invoked with the appropriate parameters if they are incomplete or out-of-date.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-forcedtorun-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>forcedtorun_tasks</em></dt>
+<dd><p class="first last">As in <a class="reference internal" href="#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>:These pipeline functions will be invoked regardless of their state.
+Any antecedents tasks will also be executed if they are out-of-date or incomplete.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-verbose">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>verbose</em></dt>
+<dd><p class="first">Optional parameter indicating the verbosity of the messages sent to <tt class="docutils literal"><span class="pre">logger</span></tt>:
+(Defaults to level 4 if unspecified)</p>
+<ul class="simple">
+<li>level <strong>0</strong> : <em>nothing</em></li>
+<li>level <strong>1</strong> : <em>Out-of-date Task names</em></li>
+<li>level <strong>2</strong> : <em>All Tasks (including any task function docstrings)</em></li>
+<li>level <strong>3</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, no explanation</em></li>
+<li>level <strong>4</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings</em></li>
+<li>level <strong>5</strong> : <em>All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)</em></li>
+<li>level <strong>6</strong> : <em>All jobs in All Tasks whether out of date or not</em></li>
+<li>level <strong>10</strong>: <em>logs messages useful only for debugging ruffus pipeline code</em></li>
+</ul>
+<p class="last"><tt class="docutils literal"><span class="pre">verbose</span> <span class="pre">>=</span> <span class="pre">10</span></tt> are intended for debugging <strong>Ruffus</strong> by the developers and the details
+are liable to change from release to release</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-indent">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>indent</em></dt>
+<dd><p class="first last">Optional parameter governing the indentation when printing out the component job
+parameters of each task function.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-gnu-make">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>gnu_make_maximal_rebuild_mode</em></dt>
+<dd><div class="first admonition warning">
+<p class="first admonition-title">Warning</p>
+<p class="last">This is a dangerous option. Use rarely and with caution</p>
+</div>
+<p class="last">See explanation in <a class="reference internal" href="#pipeline-functions-pipeline-run-gnu-make"><em>pipeline_run</em></a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-wrap-width">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>wrap_width</em></dt>
+<dd><p class="first last">Optional parameter governing the length of each line before it starts wrapping
+around.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-runtime-data">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>runtime_data</em></dt>
+<dd><p class="first last">Experimental feature for passing data to tasks at run time</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-history-file">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>history_file</em></dt>
+<dd><p class="first last">The database file which stores checksums and file timestamps for input/output files.
+Defaults to <tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt> if unspecified</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-checksum-level">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>checksum_level</em></dt>
+<dd><p class="first">Several options for checking up-to-dateness are available: Default is level 1.</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>level 0 : Use only file timestamps</li>
+<li>level 1 : above, plus timestamp of successful job completion</li>
+<li>level 2 : above, plus a checksum of the pipeline function body</li>
+<li>level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+</ul>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-verbose-abbreviated-path">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>verbose_abbreviated_path</em></dt>
+<dd><p class="first">Whether input and output paths are abbreviated. Defaults to 2 if unspecified</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>level 0: The full (expanded, abspath) input or output path</li>
+<li>level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with <tt class="docutils literal"><span class="pre">[,,,]/</span></tt></li>
+<li>level < 0: Input / Output parameters are truncated to <tt class="docutils literal"><span class="pre">MMM</span></tt> letters where <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span> <span class="pre">==-MMM</span></tt>. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by <tt class="docutils literal"><span class="pre"><???></span></tt></li>
+</ul>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<span class="target" id="pipeline-functions-pipeline-printout-graph"></span></div>
+<div class="section" id="index-2">
+<span id="id4"></span><h2><em>pipeline_printout_graph</em><a class="headerlink" href="#index-2" title="Permalink to this headline">¶</a></h2>
+<p><strong>pipeline_printout_graph</strong> (<a class="reference internal" href="#pipeline-functions-pipeline-printout-graph-stream"><cite>stream</cite></a>, <a class="reference internal" href="#pipeline-functions-pipeline-printout-graph-output-format"><cite>output_format</cite></a> = None, <a class="reference internal" href="#pipeline-functions-pipeline-printout-graph-target-tasks"><cite>target_tasks</cite></a> = [], <a class="reference internal" href="#pipeline-functions-pipeline-print [...]
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div><p>Prints out flowchart of all the pipelined functions which will be invoked given specified <tt class="docutils literal"><span class="pre">target_tasks</span></tt>
+without actually running the pipeline.</p>
+<p>See <a class="reference internal" href="tutorials/new_tutorial/flowchart_colours.html#new-manual-flowchart-colours"><em>Flowchart colours</em></a></p>
+</div></blockquote>
+<dl class="docutils">
+<dt><strong>Example</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span><span class="p">(</span><span class="s">"flowchart.jpg"</span><span class="p">,</span> <span class="s">"jpg"</span><span class="p">,</span> <span class="p">[</span><span class="n">task1</span><span class="p">,</span> <span class="n">task16</span><span class="p">],</span>
+ <span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">task2</span><span class="p">],</span>
+ <span class="n">no_key_legend</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><strong>Customising appearance:</strong></p>
+<blockquote>
+<div>The <a class="reference internal" href="#pipeline-functions-pipeline-printout-graph-user-colour-scheme"><em>user_colour_scheme</em></a> parameter can be used to change
+flowchart colours. This allows the default <a class="reference internal" href="tutorials/new_tutorial/flowchart_colours.html#new-manual-flowchart-colours"><em>Colour Schemes</em></a>
+to be set. An example of customising flowchart appearance is available <a class="reference internal" href="tutorials/new_tutorial/flowchart_colours_code.html#new-manual-flowchart-colours-code"><em>(see code)</em></a> .</div></blockquote>
+<p><strong>Parameters:</strong></p>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-stream">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>stream</em></dt>
+<dd><p class="first last">The file or file-like object to which the flowchart should be printed.
+If a string is provided, it is assumed that this is the name of the output file
+which will be opened automatically.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-output-format">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>output_format</em></dt>
+<dd><p class="first">If missing, defaults to the extension of the <em>stream</em> file name (i.e. <tt class="docutils literal"><span class="pre">jpg</span></tt> for <tt class="docutils literal"><span class="pre">a.jpg</span></tt>)</p>
+<div class="last line-block">
+<div class="line">If the programme <tt class="docutils literal"><span class="pre">dot</span></tt> can be found on the executio path, this
+can be any number of <a class="reference external" href="http://www.graphviz.org/doc/info/output.html">formats</a>
+supported by <a class="reference external" href="http://www.graphviz.org/">Graphviz</a>, including, for example,
+<tt class="docutils literal"><span class="pre">jpg</span></tt>, <tt class="docutils literal"><span class="pre">png</span></tt>, <tt class="docutils literal"><span class="pre">pdf</span></tt>, <tt class="docutils literal"><span class="pre">svg</span></tt> etc.</div>
+<div class="line">Otherwise, <strong>ruffus</strong> will only output without error in the <a class="reference external" href="http://en.wikipedia.org/wiki/DOT_language">dot</a> format, which
+is a plain-text graph description language.</div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-target-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>target_tasks</em></dt>
+<dd><p class="first last">As in <a class="reference internal" href="#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>: Pipeline functions and any necessary antecedents (specified implicitly or with <a class="reference internal" href="decorators/follows.html#decorators-follows"><em>@follows</em></a>)
+which should be invoked with the appropriate parameters if they are incomplete or out-of-date.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-forcedtorun-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>forcedtorun_tasks</em></dt>
+<dd><p class="first last">As in <a class="reference internal" href="#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>:These pipeline functions will be invoked regardless of their state.
+Any antecedents tasks will also be executed if they are out-of-date or incomplete.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-draw-vertically">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>draw_vertically</em></dt>
+<dd><p class="first last">Draw flowchart in vertical orientation</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-ignore-upstream-of-target">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>ignore_upstream_of_target</em></dt>
+<dd><p class="first last">Start drawing flowchart from specified target tasks. Do not draw tasks which are
+downstream (subsequent) to the targets.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-skip-uptodate-tasks">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>ignore_upstream_of_target</em></dt>
+<dd><p class="first last">Do not draw up-to-date / completed tasks in the flowchart unless they are
+lie on the execution path of the pipeline.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-gnu-make">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>gnu_make_maximal_rebuild_mode</em></dt>
+<dd><div class="first admonition warning">
+<p class="first admonition-title">Warning</p>
+<p class="last">This is a dangerous option. Use rarely and with caution</p>
+</div>
+<p class="last">See explanation in <a class="reference internal" href="#pipeline-functions-pipeline-run-gnu-make"><em>pipeline_run</em></a>.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-test-all-task-for-update">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>test_all_task_for_update</em></dt>
+<dd><div class="first last line-block">
+<div class="line">Indicates whether intermediate tasks are out of date or not. Normally <strong>Ruffus</strong> will
+stop checking dependent tasks for completion or whether they are out-of-date once it has
+discovered the maximal extent of the pipeline which has to be run.</div>
+<div class="line">For displaying the flow of the pipeline, this is hardly very informative.</div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-no-key-legend">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>no_key_legend</em></dt>
+<dd><p class="first last">Do not include key legend explaining the colour scheme of the flowchart.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-minimal-key-legend">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>minimal_key_legend</em></dt>
+<dd><p class="first last">Do not include unused task types in key legend.</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-user-colour-scheme">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>user_colour_scheme</em></dt>
+<dd><p class="first">Dictionary specifying colour scheme for flowchart</p>
+<p>See complete <a class="reference internal" href="tutorials/new_tutorial/flowchart_colours.html#new-manual-flowchart-colours"><em>list of Colour Schemes</em></a>.</p>
+<div class="line-block">
+<div class="line">Colours can be names e.g. <tt class="docutils literal"><span class="pre">"black"</span></tt> or quoted hex e.g. <tt class="docutils literal"><span class="pre">'"#F6F4F4"'</span></tt> (note extra quotes)</div>
+<div class="line">Default values will be used unless specified</div>
+</div>
+<table border="1" class="docutils">
+<colgroup>
+<col width="33%" />
+<col width="33%" />
+<col width="33%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">key</th>
+<th class="head">Subkey</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'colour_scheme_index'</span></tt></li>
+</ul>
+</td>
+<td><div class="first last line-block">
+<div class="line">index of default colour scheme,</div>
+<div class="line">0-7, defaults to 0 unless specified</div>
+</div>
+</td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'Final</span> <span class="pre">target'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Explicitly</span> <span class="pre">specified</span> <span class="pre">task'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Task</span> <span class="pre">to</span> <span class="pre">run'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Down</span> <span class="pre">stream'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Up-to-date</span> <span class="pre">Final</span> <span class="pre">target'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Up-to-date</span> <span class="pre">task</span> <span class="pre">forced</span> <span class="pre">to</span> <span class="pre">rerun'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Up-to-date</span> <span class="pre">task'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Vicious</span> <span class="pre">cycle'</span></tt></li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'fillcolor'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'fontcolor'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'color'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'dashed'</span></tt> = <tt class="docutils literal"><span class="pre">0/1</span></tt></li>
+</ul>
+</td>
+<td>Colours / attributes for each task type</td>
+</tr>
+<tr class="row-even"><td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'Vicious</span> <span class="pre">cycle'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Task</span> <span class="pre">to</span> <span class="pre">run'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'Up-to-date'</span></tt></li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'linecolor'</span></tt></li>
+</ul>
+</td>
+<td>Colours for arrows between tasks</td>
+</tr>
+<tr class="row-odd"><td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'Pipeline'</span></tt></li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'fontcolor'</span></tt></li>
+</ul>
+</td>
+<td>Flowchart title colour</td>
+</tr>
+<tr class="row-even"><td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'Key'</span></tt></li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">'fontcolor'</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">'fillcolor'</span></tt></li>
+</ul>
+</td>
+<td>Legend colours</td>
+</tr>
+</tbody>
+</table>
+<p>Example:</p>
+<blockquote class="last">
+<div><p>Use colour scheme index = 1</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span><span class="s">"flowchart.svg"</span><span class="p">,</span> <span class="s">"svg"</span><span class="p">,</span> <span class="p">[</span><span class="n">final_task</span><span class="p">],</span>
+ <span class="n">user_colour_scheme</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s">"colour_scheme_index"</span> <span class="p">:</span><span class="mi">1</span><span class="p">,</span>
+ <span class="s">"Pipeline"</span> <span class="p">:{</span><span class="s">"fontcolor"</span> <span class="p">:</span> <span class="s">'"#FF3232"'</span> <span class="p">},</span>
+ <span class="s">"Key"</span> <span class="p">:{</span><span class="s">"fontcolor"</span> <span class="p">:</span> <span class="s">"Red"</span><span class="p">,</span>
+ <span class="s">"fillcolor"</span> <span class="p">:</span> <span class="s">'"#F6F4F4"'</span> <span class="p">},</span>
+ <span class="s">"Task to run"</span> <span class="p">:{</span><span class="s">"linecolor"</span> <span class="p">:</span> <span class="s">'"#0044A0"'</span> <span class="p">},</span>
+ <span class="s">"Final target"</span> <span class="p">:{</span><span class="s">"fillcolor"</span> <span class="p">:</span> <span class="s">'"#EFA03B"'</span><span class="p">,</span>
+ <span class="s">"fontcolor"</span> <span class="p">:</span> <span class="s">"black"</span><span class="p">,</span>
+ <span class="s">"dashed"</span> <span class="p">:</span> <span class="mi">0</span> <span class="p">}</span>
+ <span class="p">})</span>
+</pre></div>
+</div>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-pipeline-name">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>pipeline_name</em></dt>
+<dd><p class="first last">Specify title for flowchart</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-size">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>size</em></dt>
+<dd><p class="first last">Size in inches for flowchart</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-dpi">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>dpi</em></dt>
+<dd><p class="first last">Resolution in dots per inch. Ignored for svg output</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-runtime-data">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>runtime_data</em></dt>
+<dd><p class="first last">Experimental feature for passing data to tasks at run time</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-history-file">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>history_file</em></dt>
+<dd><p class="first last">The database file which stores checksums and file timestamps for input/output files.
+Defaults to <tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt> if unspecified</p>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<blockquote id="pipeline-functions-pipeline-printout-graph-checksum-level">
+<div><ul>
+<li><dl class="first docutils">
+<dt><em>checksum_level</em></dt>
+<dd><p class="first">Several options for checking up-to-dateness are available: Default is level 1.</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>level 0 : Use only file timestamps</li>
+<li>level 1 : above, plus timestamp of successful job completion</li>
+<li>level 2 : above, plus a checksum of the pipeline function body</li>
+<li>level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+</ul>
+</div></blockquote>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<span class="target" id="pipeline-functions-pipeline-get-task-names"></span></div>
+<div class="section" id="index-3">
+<span id="id5"></span><h2><em>pipeline_get_task_names</em><a class="headerlink" href="#index-3" title="Permalink to this headline">¶</a></h2>
+<p><strong>pipeline_get_task_names</strong> ()</p>
+<blockquote>
+<div><p><strong>Purpose:</strong></p>
+<blockquote>
+<div>Returns a list of all task names in the pipeline without running the pipeline or checking to see if the tasks are connected correctly</div></blockquote>
+<p><strong>Example</strong>:</p>
+<blockquote>
+<div><p>Given:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@originate</span><span class="p">([])</span>
+<span class="k">def</span> <span class="nf">create_data</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_data</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".txt"</span><span class="p">),</span> <span class="s">".task1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".task1"</span><span class="p">),</span> <span class="s">".task2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>Produces a list of three task names:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_get_task_names</span> <span class="p">()</span>
+<span class="go">['create_data', 'task1', 'task2']</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Pipeline functions</a><ul>
+<li><a class="reference internal" href="#index-0"><em>pipeline_run</em></a></li>
+<li><a class="reference internal" href="#index-1"><em>pipeline_printout</em></a></li>
+<li><a class="reference internal" href="#index-2"><em>pipeline_printout_graph</em></a></li>
+<li><a class="reference internal" href="#index-3"><em>pipeline_get_task_names</em></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="cheatsheet.html"
+ title="previous chapter">Cheat Sheet</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="drmaa_wrapper_functions.html"
+ title="next chapter">drmaa functions</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/pipeline_functions.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="##pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="##pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="##pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="##pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="drmaa_wrapper_functions.html" title="drmaa functions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="cheatsheet.html" title="Cheat Sheet"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/proxy_logger.html b/doc/_build/html/proxy_logger.html
new file mode 100644
index 0000000..218d0cd
--- /dev/null
+++ b/doc/_build/html/proxy_logger.html
@@ -0,0 +1,407 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>ruffus.proxy_logger — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="prev" title="ruffus.Task" href="task.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="task.html" title="ruffus.Task"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-proxy-logger">
+<h1>ruffus.proxy_logger<a class="headerlink" href="#ruffus-proxy-logger" title="Permalink to this headline">¶</a></h1>
+<span class="target" id="module-ruffus.proxy_logger"><span id="proxy-logger"></span></span><div class="section" id="create-proxy-for-logging-for-use-with-multiprocessing">
+<h2>Create proxy for logging for use with multiprocessing<a class="headerlink" href="#create-proxy-for-logging-for-use-with-multiprocessing" title="Permalink to this headline">¶</a></h2>
+<p>These can be safely sent (marshalled) across process boundaries</p>
+<div class="section" id="example-1">
+<h3>Example 1<a class="headerlink" href="#example-1" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Set up logger from config file:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">args</span><span class="o">=</span><span class="p">{}</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"config_file"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"/my/config/file"</span>
+
+<span class="p">(</span><span class="n">logger_proxy</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-2">
+<h3>Example 2<a class="headerlink" href="#example-2" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Log to file <tt class="docutils literal"><span class="pre">"/my/lg.log"</span></tt> in the specified format (Time / Log name / Event type / Message).</p>
+<p>Delay file creation until first log.</p>
+<p>Only log <tt class="docutils literal"><span class="pre">Debug</span></tt> messages</p>
+<blockquote>
+<div><p>Other alternatives for the logging threshold (<tt class="docutils literal"><span class="pre">args["level"]</span></tt>) include</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">logging.DEBUG</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">logging.INFO</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">logging.WARNING</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">logging.ERROR</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">logging.CRITICAL</span></tt></li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">args</span><span class="o">=</span><span class="p">{}</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"file_name"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"/my/lg.log"</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"formatter"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"</span><span class="si">%(asctime)s</span><span class="s"> - </span><span class="si">%(name)s</span><span class="s"> - </span><span class="si">%(levelname)6s</span><span class="s"> - </span><span class="si">%(message)s</span><span class="s">"</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"delay"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"level"</span><span class="p">]</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span>
+
+<span class="p">(</span><span class="n">logger_proxy</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-3">
+<h3>Example 3<a class="headerlink" href="#example-3" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Rotate log files every 20 Kb, with up to 10 backups.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">args</span><span class="o">=</span><span class="p">{}</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"file_name"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"/my/lg.log"</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"rotating"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"maxBytes"</span><span class="p">]</span><span class="o">=</span><span class="mi">20000</span>
+<span class="n">args</span><span class="p">[</span><span class="s">"backupCount"</span><span class="p">]</span><span class="o">=</span><span class="mi">10</span>
+<span class="p">(</span><span class="n">logger_proxy</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="to-use">
+<h3>To use:<a class="headerlink" href="#to-use" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="p">(</span><span class="n">logger_proxy</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+
+<span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">'This is a debug message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">'This is an info message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s">'This is a warning message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">'This is an error message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">critical</span><span class="p">(</span><span class="s">'This is a critical error message'</span><span class="p">)</span>
+ <span class="n">my_log</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">,</span> <span class="s">'This is a debug message'</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Note that the logging function <tt class="docutils literal"><span class="pre">exception()</span></tt> is not included because python
+stack trace information is not well-marshalled
+(<a class="reference external" href="http://docs.python.org/library/pickle.html">pickle</a>d) across processes.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="proxies-for-a-log">
+<h2>Proxies for a log:<a class="headerlink" href="#proxies-for-a-log" title="Permalink to this headline">¶</a></h2>
+<dl class="function">
+<dt id="ruffus.proxy_logger.make_shared_logger_and_proxy">
+<tt class="descclassname">ruffus.proxy_logger.</tt><tt class="descname">make_shared_logger_and_proxy</tt><big>(</big><em>logger_factory</em>, <em>logger_name</em>, <em>args</em><big>)</big><a class="reference internal" href="_modules/ruffus/proxy_logger.html#make_shared_logger_and_proxy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.proxy_logger.make_shared_logger_and_proxy" title="Permalink to this definition">¶</a></dt>
+<dd><p>Make a <a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> object
+called “<tt class="docutils literal"><span class="pre">logger_name</span></tt>” by calling <tt class="docutils literal"><span class="pre">logger_factory</span></tt>(<tt class="docutils literal"><span class="pre">args</span></tt>)</p>
+<p>This function will return a proxy to the shared logger which can be copied to jobs
+in other processes, as well as a mutex which can be used to prevent simultaneous logging
+from happening.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
+<li><strong>logger_factory</strong> – <p>functions which creates and returns an object with the
+<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> interface.
+<tt class="docutils literal"><span class="pre">setup_std_shared_logger()</span></tt> is one example of a logger factory.</p>
+</li>
+<li><strong>logger_name</strong> – name of log</li>
+<li><strong>args</strong> – parameters passed (as a single argument) to <tt class="docutils literal"><span class="pre">logger_factory</span></tt></li>
+</ul>
+</td>
+</tr>
+<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">a proxy to the shared logger which can be copied to jobs in other processes</p>
+</td>
+</tr>
+<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">a mutex which can be used to prevent simultaneous logging from happening</p>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+<div class="section" id="create-a-logging-object">
+<h2>Create a logging object<a class="headerlink" href="#create-a-logging-object" title="Permalink to this headline">¶</a></h2>
+<dl class="function">
+<dt id="ruffus.proxy_logger.setup_std_shared_logger">
+<tt class="descclassname">ruffus.proxy_logger.</tt><tt class="descname">setup_std_shared_logger</tt><big>(</big><em>logger_name</em>, <em>args</em><big>)</big><a class="reference internal" href="_modules/ruffus/proxy_logger.html#setup_std_shared_logger"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.proxy_logger.setup_std_shared_logger" title="Permalink to this definition">¶</a></dt>
+<dd><p>This function is a simple around wrapper around the python
+<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> module.</p>
+<p>This <em>logger_factory</em> example creates logging objects which can
+then be managed by proxy via <tt class="docutils literal"><span class="pre">ruffus.proxy_logger.make_shared_logger_and_proxy()</span></tt></p>
+<p>This can be:</p>
+<blockquote>
+<div><ul class="simple">
+<li>a <a class="reference external" href="http://docs.python.org/library/logging.html#filehandler">disk log file</a></li>
+<li>a automatically backed-up <a class="reference external" href="http://docs.python.org/library/logging.html#rotatingfilehandler">(rotating) log</a>.</li>
+<li>any log specified in a <a class="reference external" href="http://docs.python.org/library/logging.html#configuration-file-format">configuration file</a></li>
+</ul>
+</div></blockquote>
+<p>These are specified in the <tt class="docutils literal"><span class="pre">args</span></tt> dictionary forwarded by <tt class="docutils literal"><span class="pre">make_shared_logger_and_proxy()</span></tt></p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>logger_name</strong> – name of log</li>
+<li><strong>args</strong> – <p>a dictionary of parameters forwarded from <tt class="docutils literal"><span class="pre">make_shared_logger_and_proxy()</span></tt></p>
+<p>Valid entries include:</p>
+<blockquote>
+<div><dl class="describe">
+<dt>
+<tt class="descname">"level"</tt></dt>
+<dd><p>Sets the <a class="reference external" href="http://docs.python.org/library/logging.html#logging.Handler.setLevel">threshold</a> for the logger.</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"config_file"</tt></dt>
+<dd><p>The logging object is configured from this <a class="reference external" href="http://docs.python.org/library/logging.html#configuration-file-format">configuration file</a>.</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"file_name"</tt></dt>
+<dd><p>Sets disk log file name.</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"rotating"</tt></dt>
+<dd><p>Chooses a <a class="reference external" href="http://docs.python.org/library/logging.html#rotatingfilehandler">(rotating) log</a>.</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"maxBytes"</tt></dt>
+<dd><p>Allows the file to rollover at a predetermined size</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"backupCount"</tt></dt>
+<dd><p>If backupCount is non-zero, the system will save old log files by appending the extensions <tt class="docutils literal"><span class="pre">.1</span></tt>, <tt class="docutils literal"><span class="pre">.2</span></tt>, <tt class="docutils literal"><span class="pre">.3</span></tt> etc., to the filename.</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"delay"</tt></dt>
+<dd><p>Defer file creation until the log is written to.</p>
+</dd></dl>
+
+<dl class="describe">
+<dt>
+<tt class="descname">"formatter"</tt></dt>
+<dd><p><a class="reference external" href="http://docs.python.org/library/logging.html#formatter-objects">Converts</a> the message to a logged entry string.
+For example,</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="s">"</span><span class="si">%(asctime)s</span><span class="s"> - </span><span class="si">%(name)s</span><span class="s"> - </span><span class="si">%(levelname)6s</span><span class="s"> - </span><span class="si">%(message)s</span><span class="s">"</span>
+</pre></div>
+</div>
+</dd></dl>
+
+</div></blockquote>
+</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">ruffus.proxy_logger</a><ul>
+<li><a class="reference internal" href="#create-proxy-for-logging-for-use-with-multiprocessing">Create proxy for logging for use with multiprocessing</a><ul>
+<li><a class="reference internal" href="#example-1">Example 1</a></li>
+<li><a class="reference internal" href="#example-2">Example 2</a></li>
+<li><a class="reference internal" href="#example-3">Example 3</a></li>
+<li><a class="reference internal" href="#to-use">To use:</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#proxies-for-a-log">Proxies for a log:</a></li>
+<li><a class="reference internal" href="#create-a-logging-object">Create a logging object</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="task.html"
+ title="previous chapter">ruffus.Task</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/proxy_logger.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="task.html" title="ruffus.Task"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/recipes.html b/doc/_build/html/recipes.html
new file mode 100644
index 0000000..9ab136e
--- /dev/null
+++ b/doc/_build/html/recipes.html
@@ -0,0 +1,199 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Recipes — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="A simple tutorial: 8 steps to Ruffus" href="tutorials/simple_tutorial/simple_tutorial.html" />
+ <link rel="prev" title="Chapter 20: @files_re: Deprecated syntax using regular expressions" href="tutorials/manual/files_re.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="tutorials/simple_tutorial/simple_tutorial.html" title="A simple tutorial: 8 steps to Ruffus"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="tutorials/manual/files_re.html" title="Chapter 20: @files_re: Deprecated syntax using regular expressions"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="recipes">
+<h1>Recipes<a class="headerlink" href="#recipes" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="general">
+<h2>General<a class="headerlink" href="#general" title="Permalink to this headline">¶</a></h2>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Recipes</a><ul>
+<li><a class="reference internal" href="#general">General</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="tutorials/manual/files_re.html"
+ title="previous chapter"><strong>Chapter 20</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="tutorials/simple_tutorial/simple_tutorial.html"
+ title="next chapter">A simple tutorial: 8 steps to <em>Ruffus</em></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/recipes.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="tutorials/simple_tutorial/simple_tutorial.html" title="A simple tutorial: 8 steps to Ruffus"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="tutorials/manual/files_re.html" title="Chapter 20: @files_re: Deprecated syntax using regular expressions"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/refactoring_ruffus_notes.html b/doc/_build/html/refactoring_ruffus_notes.html
new file mode 100644
index 0000000..2b4fcfd
--- /dev/null
+++ b/doc/_build/html/refactoring_ruffus_notes.html
@@ -0,0 +1,187 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title><no title> — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Implementation Tips" href="implementation_notes.html" />
+ <link rel="prev" title="Future Changes to Ruffus" href="todo.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="implementation_notes.html" title="Implementation Tips"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="todo.html" title="Future Changes to Ruffus"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <blockquote>
+<div>Remember to cite Jake Biesinger and see if he is interested to be a co-author if we ever resubmit the drastically changed version...</div></blockquote>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="todo.html"
+ title="previous chapter">Future Changes to Ruffus</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="implementation_notes.html"
+ title="next chapter">Implementation Tips</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/refactoring_ruffus_notes.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="implementation_notes.html" title="Implementation Tips"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="todo.html" title="Future Changes to Ruffus"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/search.html b/doc/_build/html/search.html
new file mode 100644
index 0000000..7b0d2b7
--- /dev/null
+++ b/doc/_build/html/search.html
@@ -0,0 +1,177 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Search — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <script type="text/javascript" src="_static/searchtools.js"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <script type="text/javascript">
+ jQuery(function() { Search.loadIndex("searchindex.js"); });
+ </script>
+
+ <script type="text/javascript" id="searchindexloader"></script>
+
+
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <h1 id="search-documentation">Search</h1>
+ <div id="fallback" class="admonition warning">
+ <script type="text/javascript">$('#fallback').hide();</script>
+ <p>
+ Please activate JavaScript to enable the search
+ functionality.
+ </p>
+ </div>
+ <p>
+ From here you can search these documents. Enter your search
+ words into the box below and click "search". Note that the search
+ function will automatically search for all of the words. Pages
+ containing fewer words won't appear in the result list.
+ </p>
+ <form action="" method="get">
+ <input type="text" name="q" value="" />
+ <input type="submit" value="search" />
+ <span id="search-progress" style="padding-left: 10px"></span>
+ </form>
+
+ <div id="search-results">
+
+ </div>
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/searchindex.js b/doc/_build/html/searchindex.js
new file mode 100644
index 0000000..b170e26
--- /dev/null
+++ b/doc/_build/html/searchindex.js
@@ -0,0 +1 @@
+Search.setIndex({envversion:42,terms:{text3295:[],orthogon:84,prefic:3,reprocess:69,task_2:[],four:[],prefix:[90,15,1,85,56,27,70],sleep:[65,4,71,29,6,69],dirnam:[3,15,22],orig_arg:[84,90],inard:28,play_with_colour:91,split_ex_param_factori:84,whose:[15,25,2,22],deserialis:28,text3297:[],tspan7639:[],messi:[43,32],gene_data_dir:[61,53],under:[29,57,15,70],preprocess:1,transform_param_factori:[84,90],varianc:[],worth:[],merchant:[91,75],action_nam:84,everi:[42,48,80,0,32,43,46,51,86],down [...]
\ No newline at end of file
diff --git a/doc/_build/html/task.html b/doc/_build/html/task.html
new file mode 100644
index 0000000..fc2c39a
--- /dev/null
+++ b/doc/_build/html/task.html
@@ -0,0 +1,516 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>ruffus.Task — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="ruffus.proxy_logger" href="proxy_logger.html" />
+ <link rel="prev" title="@files_re" href="decorators/files_re.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="proxy_logger.html" title="ruffus.proxy_logger"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="decorators/files_re.html" title="@files_re"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-task">
+<h1>ruffus.Task<a class="headerlink" href="#ruffus-task" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="decorators">
+<h2>Decorators<a class="headerlink" href="#decorators" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Basic Task decorators are:</p>
+<blockquote>
+<div><p><a class="reference internal" href="decorators/follows.html#decorators-follows"><em>@follows()</em></a></p>
+<p>and</p>
+<p><a class="reference internal" href="decorators/files.html#decorators-files"><em>@files()</em></a></p>
+</div></blockquote>
+<p>Task decorators include:</p>
+<blockquote>
+<div><p><a class="reference internal" href="decorators/files.html#decorators-files"><em>@split()</em></a></p>
+<p><a class="reference internal" href="decorators/files.html#decorators-files"><em>@transform()</em></a></p>
+<p><a class="reference internal" href="decorators/files.html#decorators-files"><em>@merge()</em></a></p>
+<p><a class="reference internal" href="decorators/posttask.html#decorators-posttask"><em>@posttask()</em></a></p>
+</div></blockquote>
+<p>More advanced users may require:</p>
+<blockquote>
+<div><p><a class="reference internal" href="decorators/transform_ex.html#decorators-transform-ex"><em>@transform()</em></a></p>
+<p><a class="reference internal" href="decorators/collate.html#decorators-collate"><em>@collate()</em></a></p>
+<p><a class="reference internal" href="decorators/parallel.html#decorators-parallel"><em>@parallel()</em></a></p>
+<p><a class="reference internal" href="decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate()</em></a></p>
+<p><a class="reference internal" href="decorators/files_re.html#decorators-files-re"><em>@files_re()</em></a></p>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="pipeline-functions">
+<h2>Pipeline functions<a class="headerlink" href="#pipeline-functions" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="pipeline-run">
+<h3>pipeline_run<a class="headerlink" href="#pipeline-run" title="Permalink to this headline">¶</a></h3>
+<dl class="function">
+<dt id="ruffus.task.pipeline_run">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">pipeline_run</tt><big>(</big><em>target_tasks</em>, <em>forcedtorun_tasks=</em><span class="optional">[</span><span class="optional">]</span>, <em>multiprocess=1</em>, <em>logger=stderr_logger</em>, <em>gnu_make_maximal_rebuild_mode=True</em><big>)</big><a class="reference internal" href="_modules/ruffus/task.html#pipeline_run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.task.pipeline_r [...]
+<dd><p>Run pipelines.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>target_tasks</strong> – targets task functions which will be run if they are out-of-date</li>
+<li><strong>forcedtorun_tasks</strong> – task functions which will be run whether or not they are out-of-date</li>
+<li><strong>multiprocess</strong> – The number of concurrent jobs running on different processes.</li>
+<li><strong>multithread</strong> – The number of concurrent jobs running as different threads. If > 1, ruffus will use multithreading <em>instead of</em> multiprocessing (and ignore the multiprocess parameter). Using multi threading is particularly useful to manage high performance clusters which otherwise are prone to “processor storms” when large number of cores finish jobs at the same time. (Thanks Andreas Heger)</li>
+<li><strong>logger</strong> (<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> objects) – Where progress will be logged. Defaults to stderr output.</li>
+<li><strong>verbose</strong> – level 0 : nothing
+level 1 : Out-of-date Task names
+level 2 : All Tasks (including any task function docstrings)
+level 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+level 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+level 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+level 6 : All jobs in All Tasks whether out of date or not
+level 10: logs messages useful only for debugging ruffus pipeline code</li>
+<li><strong>touch_files_only</strong> – Create or update input/output files only to simulate running the pipeline. Do not run jobs. If set to CHECKSUM_REGENERATE, will regenerate the checksum history file to reflect the existing i/o files on disk.</li>
+<li><strong>exceptions_terminate_immediately</strong> – Exceptions cause immediate termination
+rather than waiting for N jobs to finish where N = multiprocess</li>
+<li><strong>log_exceptions</strong> – Print exceptions to the logger as soon as they occur.</li>
+<li><strong>checksum_level</strong> – Several options for checking up-to-dateness are available: Default is level 1.
+level 0 : Use only file timestamps
+level 1 : above, plus timestamp of successful job completion
+level 2 : above, plus a checksum of the pipeline function body
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+<li><strong>one_second_per_job</strong> – To work around poor file timepstamp resolution for some file systems. Defaults to True if checksum_level is 0 forcing Tasks to take a minimum of 1 second to complete.</li>
+<li><strong>runtime_data</strong> – Experimental feature for passing data to tasks at run time</li>
+<li><strong>gnu_make_maximal_rebuild_mode</strong> – Defaults to re-running <em>all</em> out-of-date tasks. Runs minimal
+set to build targets if set to <tt class="docutils literal"><span class="pre">True</span></tt>. Use with caution.</li>
+<li><strong>history_file</strong> – The database file which stores checksums and file timestamps for input/output files.</li>
+<li><strong>verbose_abbreviated_path</strong> – whether input and output paths are abbreviated.
+level 0: The full (expanded, abspath) input or output path
+level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with <tt class="docutils literal"><span class="pre">[,,,]/</span></tt>
+level < 0: level < 0: Input / Output parameters are truncated to <tt class="docutils literal"><span class="pre">MMM</span></tt> letters where <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span> <span class="pre">==-MMM</span></tt>. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by <tt class="docutils literal"><span class="pre"><???></span></tt></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+<div class="section" id="pipeline-printout">
+<h3>pipeline_printout<a class="headerlink" href="#pipeline-printout" title="Permalink to this headline">¶</a></h3>
+<dl class="function">
+<dt id="ruffus.task.pipeline_printout">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">pipeline_printout</tt><big>(</big><em>output_stream=None</em>, <em>target_tasks=</em><span class="optional">[</span><span class="optional">]</span>, <em>forcedtorun_tasks=</em><span class="optional">[</span><span class="optional">]</span>, <em>verbose=None</em>, <em>indent=4</em>, <em>gnu_make_maximal_rebuild_mode=True</em>, <em>wrap_width=100</em>, <em>runtime_data=None</em>, <em>checksum_level=None</em>, <em>history_file=N [...]
+<dd><p>Printouts the parts of the pipeline which will be run</p>
+<p>Because the parameters of some jobs depend on the results of previous tasks, this function
+produces only the current snap-shot of task jobs. In particular, tasks which generate
+variable number of inputs into following tasks will not produce the full range of jobs.</p>
+<dl class="docutils">
+<dt>::</dt>
+<dd>verbose = 0 : Nothing
+verbose = 1 : Out-of-date Task names
+verbose = 2 : All Tasks (including any task function docstrings)
+verbose = 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+verbose = 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+verbose = 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+verbose = 6 : All jobs in All Tasks whether out of date or not</dd>
+</dl>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>output_stream</strong> (file-like object with <tt class="docutils literal"><span class="pre">write()</span></tt> function) – where to print to</li>
+<li><strong>target_tasks</strong> – targets task functions which will be run if they are out-of-date</li>
+<li><strong>forcedtorun_tasks</strong> – task functions which will be run whether or not they are out-of-date</li>
+<li><strong>verbose</strong> – level 0 : nothing
+level 1 : Out-of-date Task names
+level 2 : All Tasks (including any task function docstrings)
+level 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+level 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+level 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+level 6 : All jobs in All Tasks whether out of date or not
+level 10: logs messages useful only for debugging ruffus pipeline code</li>
+<li><strong>indent</strong> – How much indentation for pretty format.</li>
+<li><strong>gnu_make_maximal_rebuild_mode</strong> – Defaults to re-running <em>all</em> out-of-date tasks. Runs minimal
+set to build targets if set to <tt class="docutils literal"><span class="pre">True</span></tt>. Use with caution.</li>
+<li><strong>wrap_width</strong> – The maximum length of each line</li>
+<li><strong>runtime_data</strong> – Experimental feature for passing data to tasks at run time</li>
+<li><strong>checksum_level</strong> – Several options for checking up-to-dateness are available: Default is level 1.
+level 0 : Use only file timestamps
+level 1 : As above, plus timestamp of successful job completion
+level 2 : As above, plus a checksum of the pipeline function body
+level 3 : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+<li><strong>history_file</strong> – The database file which stores checksums and file timestamps for input/output files.</li>
+<li><strong>verbose_abbreviated_path</strong> – whether input and output paths are abbreviated.
+level 0: The full (expanded, abspath) input or output path
+level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with <tt class="docutils literal"><span class="pre">[,,,]/</span></tt>
+level < 0: level < 0: Input / Output parameters are truncated to <tt class="docutils literal"><span class="pre">MMM</span></tt> letters where <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span> <span class="pre">==-MMM</span></tt>. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by <tt class="docutils literal"><span class="pre"><???></span></tt></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+<div class="section" id="pipeline-printout-graph">
+<h3>pipeline_printout_graph<a class="headerlink" href="#pipeline-printout-graph" title="Permalink to this headline">¶</a></h3>
+<dl class="function">
+<dt id="ruffus.task.pipeline_printout_graph">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">pipeline_printout_graph</tt><big>(</big><em>stream</em>, <em>output_format=None</em>, <em>target_tasks=</em><span class="optional">[</span><span class="optional">]</span>, <em>forcedtorun_tasks=</em><span class="optional">[</span><span class="optional">]</span>, <em>draw_vertically=True</em>, <em>ignore_upstream_of_target=False</em>, <em>skip_uptodate_tasks=False</em>, <em>gnu_make_maximal_rebuild_mode=True</em>, <em>test_al [...]
+<dd><p>print out pipeline dependencies in various formats</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>stream</strong> (file-like object with <tt class="docutils literal"><span class="pre">write()</span></tt> function) – where to print to</li>
+<li><strong>output_format</strong> – [“dot”, “jpg”, “svg”, “ps”, “png”]. All but the first depends on the <a class="reference external" href="http://www.graphviz.org">dot</a> program.</li>
+<li><strong>target_tasks</strong> – targets task functions which will be run if they are out-of-date.</li>
+<li><strong>forcedtorun_tasks</strong> – task functions which will be run whether or not they are out-of-date.</li>
+<li><strong>draw_vertically</strong> – Top to bottom instead of left to right.</li>
+<li><strong>ignore_upstream_of_target</strong> – Don’t draw upstream tasks of targets.</li>
+<li><strong>skip_uptodate_tasks</strong> – Don’t draw up-to-date tasks if possible.</li>
+<li><strong>gnu_make_maximal_rebuild_mode</strong> – Defaults to re-running <em>all</em> out-of-date tasks. Runs minimal
+set to build targets if set to <tt class="docutils literal"><span class="pre">True</span></tt>. Use with caution.</li>
+<li><strong>test_all_task_for_update</strong> – Ask all task functions if they are up-to-date.</li>
+<li><strong>no_key_legend</strong> – Don’t draw key/legend for graph.</li>
+<li><strong>minimal_key_legend</strong> – Only add entries to the legend for task types which appear</li>
+<li><strong>user_colour_scheme</strong> – Dictionary specifying colour scheme for flowchart</li>
+<li><strong>pipeline_name</strong> – Pipeline Title</li>
+<li><strong>size</strong> – tuple of x and y dimensions</li>
+<li><strong>dpi</strong> – print resolution</li>
+<li><strong>runtime_data</strong> – Experimental feature for passing data to tasks at run time</li>
+<li><strong>history_file</strong> – The database file which stores checksums and file timestamps for input/output files.</li>
+<li><strong>checksum_level</strong> – Several options for checking up-to-dateness are available: Default is level 1.
+level 0 : Use only file timestamps
+level 1 : above, plus timestamp of successful job completion
+level 2 : above, plus a checksum of the pipeline function body
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+</div>
+<div class="section" id="id1">
+<h2>Logging<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h2>
+<dl class="class">
+<dt id="ruffus.task.t_black_hole_logger">
+<em class="property">class </em><tt class="descclassname">ruffus.task.</tt><tt class="descname">t_black_hole_logger</tt><a class="reference internal" href="_modules/ruffus/task.html#t_black_hole_logger"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.task.t_black_hole_logger" title="Permalink to this definition">¶</a></dt>
+<dd><p>Does nothing!</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="ruffus.task.t_stderr_logger">
+<em class="property">class </em><tt class="descclassname">ruffus.task.</tt><tt class="descname">t_stderr_logger</tt><a class="reference internal" href="_modules/ruffus/task.html#t_stderr_logger"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.task.t_stderr_logger" title="Permalink to this definition">¶</a></dt>
+<dd><p>Everything to stderr</p>
+</dd></dl>
+
+</div>
+<div class="section" id="implementation">
+<h2>Implementation:<a class="headerlink" href="#implementation" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="parameter-factories">
+<h3>Parameter factories:<a class="headerlink" href="#parameter-factories" title="Permalink to this headline">¶</a></h3>
+<dl class="function">
+<dt id="ruffus.task.merge_param_factory">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">merge_param_factory</tt><big>(</big><em>input_files_task_globs</em>, <em>output_param</em>, <em>*extra_params</em><big>)</big><a class="headerlink" href="#ruffus.task.merge_param_factory" title="Permalink to this definition">¶</a></dt>
+<dd><p>Factory for task_merge</p>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.collate_param_factory">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">collate_param_factory</tt><big>(</big><em>input_files_task_globs</em>, <em>flatten_input</em>, <em>file_names_transform</em>, <em>extra_input_files_task_globs</em>, <em>replace_inputs</em>, <em>output_pattern</em>, <em>*extra_specs</em><big>)</big><a class="headerlink" href="#ruffus.task.collate_param_factory" title="Permalink to this definition">¶</a></dt>
+<dd><p>Factory for task_collate</p>
+<p>Looks exactly like @transform except that all [input] which lead to the same [output / extra] are combined together</p>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.transform_param_factory">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">transform_param_factory</tt><big>(</big><em>input_files_task_globs</em>, <em>flatten_input</em>, <em>file_names_transform</em>, <em>extra_input_files_task_globs</em>, <em>replace_inputs</em>, <em>output_pattern</em>, <em>*extra_specs</em><big>)</big><a class="headerlink" href="#ruffus.task.transform_param_factory" title="Permalink to this definition">¶</a></dt>
+<dd><p>Factory for task_transform</p>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.files_param_factory">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">files_param_factory</tt><big>(</big><em>input_files_task_globs</em>, <em>flatten_input</em>, <em>do_not_expand_single_job_tasks</em>, <em>output_extras</em><big>)</big><a class="headerlink" href="#ruffus.task.files_param_factory" title="Permalink to this definition">¶</a></dt>
+<dd><dl class="docutils">
+<dt>Factory for functions which</dt>
+<dd>yield tuples of inputs, outputs / extras</dd>
+</dl>
+<p>..Note:</p>
+<div class="highlight-python"><pre>1. Each job requires input/output file names
+2. Input/output file names can be a string, an arbitrarily nested sequence
+3. Non-string types are ignored
+3. Either Input or output file name must contain at least one string</pre>
+</div>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.args_param_factory">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">args_param_factory</tt><big>(</big><em>orig_args</em><big>)</big><a class="headerlink" href="#ruffus.task.args_param_factory" title="Permalink to this definition">¶</a></dt>
+<dd><dl class="docutils">
+<dt>Factory for functions which</dt>
+<dd>yield tuples of inputs, outputs / extras</dd>
+</dl>
+<p>..Note:</p>
+<div class="highlight-python"><pre>1. Each job requires input/output file names
+2. Input/output file names can be a string, an arbitrarily nested sequence
+3. Non-string types are ignored
+3. Either Input or output file name must contain at least one string</pre>
+</div>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.split_param_factory">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">split_param_factory</tt><big>(</big><em>input_files_task_globs</em>, <em>output_files_task_globs</em>, <em>*extra_params</em><big>)</big><a class="headerlink" href="#ruffus.task.split_param_factory" title="Permalink to this definition">¶</a></dt>
+<dd><p>Factory for task_split</p>
+</dd></dl>
+
+</div>
+<div class="section" id="wrappers-around-jobs">
+<h3>Wrappers around jobs:<a class="headerlink" href="#wrappers-around-jobs" title="Permalink to this headline">¶</a></h3>
+<dl class="function">
+<dt id="ruffus.task.job_wrapper_generic">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">job_wrapper_generic</tt><big>(</big><em>param</em>, <em>user_defined_work_func</em>, <em>register_cleanup</em>, <em>touch_files_only</em><big>)</big><a class="reference internal" href="_modules/ruffus/task.html#job_wrapper_generic"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.task.job_wrapper_generic" title="Permalink to this definition">¶</a></dt>
+<dd><p>run func</p>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.job_wrapper_io_files">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">job_wrapper_io_files</tt><big>(</big><em>param</em>, <em>user_defined_work_func</em>, <em>register_cleanup</em>, <em>touch_files_only</em>, <em>output_files_only=False</em><big>)</big><a class="reference internal" href="_modules/ruffus/task.html#job_wrapper_io_files"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.task.job_wrapper_io_files" title="Permalink to this definition">¶</a></dt>
+<dd><p>run func on any i/o if not up to date</p>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.job_wrapper_mkdir">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">job_wrapper_mkdir</tt><big>(</big><em>param</em>, <em>user_defined_work_func</em>, <em>register_cleanup</em>, <em>touch_files_only</em><big>)</big><a class="reference internal" href="_modules/ruffus/task.html#job_wrapper_mkdir"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#ruffus.task.job_wrapper_mkdir" title="Permalink to this definition">¶</a></dt>
+<dd><p>make directories if not exists</p>
+</dd></dl>
+
+</div>
+<div class="section" id="checking-if-job-is-update">
+<h3>Checking if job is update:<a class="headerlink" href="#checking-if-job-is-update" title="Permalink to this headline">¶</a></h3>
+<dl class="function">
+<dt id="ruffus.task.needs_update_check_modify_time">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">needs_update_check_modify_time</tt><big>(</big><em>*params</em>, <em>**kwargs</em><big>)</big><a class="headerlink" href="#ruffus.task.needs_update_check_modify_time" title="Permalink to this definition">¶</a></dt>
+<dd><p>Given input and output files, see if all exist and whether output files are later than input files
+Each can be</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>string: assumed to be a filename “file1”</li>
+<li>any other type</li>
+<li>arbitrary nested sequence of (1) and (2)</li>
+</ol>
+</div></blockquote>
+</dd></dl>
+
+<dl class="function">
+<dt id="ruffus.task.needs_update_check_directory_missing">
+<tt class="descclassname">ruffus.task.</tt><tt class="descname">needs_update_check_directory_missing</tt><big>(</big><em>*params</em>, <em>**kwargs</em><big>)</big><a class="headerlink" href="#ruffus.task.needs_update_check_directory_missing" title="Permalink to this definition">¶</a></dt>
+<dd><dl class="docutils">
+<dt>Called per directory:</dt>
+<dd>Does it exist?
+Is it an ordinary file not a directory? (throw exception</dd>
+</dl>
+</dd></dl>
+
+</div>
+</div>
+<div class="section" id="exceptions-and-errors">
+<h2>Exceptions and Errors<a class="headerlink" href="#exceptions-and-errors" title="Permalink to this headline">¶</a></h2>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">ruffus.Task</a><ul>
+<li><a class="reference internal" href="#decorators">Decorators</a></li>
+<li><a class="reference internal" href="#pipeline-functions">Pipeline functions</a><ul>
+<li><a class="reference internal" href="#pipeline-run">pipeline_run</a></li>
+<li><a class="reference internal" href="#pipeline-printout">pipeline_printout</a></li>
+<li><a class="reference internal" href="#pipeline-printout-graph">pipeline_printout_graph</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#id1">Logging</a></li>
+<li><a class="reference internal" href="#implementation">Implementation:</a><ul>
+<li><a class="reference internal" href="#parameter-factories">Parameter factories:</a></li>
+<li><a class="reference internal" href="#wrappers-around-jobs">Wrappers around jobs:</a></li>
+<li><a class="reference internal" href="#checking-if-job-is-update">Checking if job is update:</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#exceptions-and-errors">Exceptions and Errors</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="decorators/files_re.html"
+ title="previous chapter">@files_re</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="proxy_logger.html"
+ title="next chapter">ruffus.proxy_logger</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/task.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="proxy_logger.html" title="ruffus.proxy_logger"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="decorators/files_re.html" title="@files_re"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/todo.html b/doc/_build/html/todo.html
new file mode 100644
index 0000000..baaeb57
--- /dev/null
+++ b/doc/_build/html/todo.html
@@ -0,0 +1,674 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Future Changes to Ruffus — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Implementation Tips" href="implementation_notes.html" />
+ <link rel="prev" title="Major Features added to Ruffus" href="history.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="implementation_notes.html" title="Implementation Tips"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="history.html" title="Major Features added to Ruffus"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="future-changes-to-ruffus">
+<span id="todo"></span><h1>Future Changes to Ruffus<a class="headerlink" href="#future-changes-to-ruffus" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p>I would appreciated feedback and help on all these issues and where next to take <em>ruffus</em>.</p>
+<p><strong>Future Changes</strong> are features where we more or less know where we are going and how to get there.</p>
+<p><strong>Planned Improvements</strong> describes features we would like in Ruffus but where the implementation
+or syntax has not yet been (fully) worked out.</p>
+<p>If you have suggestions or contributions, please either write to me ( ruffus_lib at llew.org.uk) or
+send a pull request via the <a class="reference external" href="https://github.com/bunbun/ruffus">git site</a>.</p>
+</div></blockquote>
+<div class="section" id="todo-pipeline-printout-graph-should-print-inactive-tasks">
+<span id="todo-inactive-tasks-in-pipeline-printout-graph"></span><h2>Todo: pipeline_printout_graph should print inactive tasks<a class="headerlink" href="#todo-pipeline-printout-graph-should-print-inactive-tasks" title="Permalink to this headline">¶</a></h2>
+</div>
+<div class="section" id="todo-mark-input-strings-as-non-file-names-and-add-support-for-dynamically-returned-parameters">
+<span id="todo-dynamic-strings"></span><h2>Todo: Mark input strings as non-file names, and add support for dynamically returned parameters<a class="headerlink" href="#todo-mark-input-strings-as-non-file-names-and-add-support-for-dynamically-returned-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Use indicator object.</li>
+<li>What is a good name? <tt class="docutils literal"><span class="pre">"output_from()"</span></tt>, <tt class="docutils literal"><span class="pre">"NOT_FILE_NAME"</span></tt> :-)</li>
+<li>They will still participate in suffix, formatter and regex replacement</li>
+</ol>
+<p>Bernie Pope suggests that we should generalise this:</p>
+<p>If any object in the input parameters is a (non-list/tuple) class instance, check (getattr) whether it has a <tt class="docutils literal"><span class="pre">ruffus_params()</span></tt> function.
+If it does, call it to obtain a list which is substituted in place.
+If there are string nested within, these will also take part in Ruffus string substitution.
+Objects with <tt class="docutils literal"><span class="pre">ruffus_params()</span></tt> always “decay” to the results of the function call</p>
+<p><tt class="docutils literal"><span class="pre">output_from</span></tt> would be a simple wrapper which returns the internal string via <tt class="docutils literal"><span class="pre">ruffus_params()</span></tt></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">class</span> <span class="nc">output_from</span> <span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">str</span> <span class="o">=</span> <span class="nb">str</span>
+ <span class="k">def</span> <span class="nf">ruffus_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">str</span><span class="p">]</span>
+</pre></div>
+</div>
+<p>Returning a list should be like wildcards and should not introduce an unnecessary level of indirection for output parameters, i.e. suffix(”.txt”) or formatter() / “{basename[0]}” should work.</p>
+<p>Check!</p>
+</div></blockquote>
+</div>
+<div class="section" id="todo-allow-extra-parameters-to-be-used-in-output-substitution">
+<span id="todo-extra-parameters"></span><h2>Todo: Allow “extra” parameters to be used in output substitution<a class="headerlink" href="#todo-allow-extra-parameters-to-be-used-in-output-substitution" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Formatter substitution can refer to the original elements in the input and extra parameters (without converting them to strings either). This refers to the original (nested) data structure.</p>
+<p>This will allow normal python datatypes to be handed down and slipstreamed into a pipeline more easily.</p>
+<p>The syntax would use Ruffus (> version 2.4) formatter:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span> <span class="o">...</span><span class="p">,</span> <span class="n">formatter</span><span class="p">(),</span> <span class="p">[</span>
+<span class="hll"> <span class="s">"{EXTRAS[0][1][3]}"</span><span class="p">,</span> <span class="c"># EXTRAS</span>
+</span><span class="hll"> <span class="s">"[INPUTS[1][2]]"</span><span class="p">],</span><span class="o">...</span><span class="p">)</span> <span class="c"># INPUTS</span>
+</span><span class="k">def</span> <span class="nf">taskfunc</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p><tt class="docutils literal"><span class="pre">EXTRA</span></tt> and <tt class="docutils literal"><span class="pre">INPUTS</span></tt> indicate that we are referring to the input and extra parameters.</p>
+<p>These are the full (nested) parameters in all their original form. In the case of the input parameters, this obvious depends on the decorator, so</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="s">"a.text"</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="s">"b.text"</span><span class="p">]],</span> <span class="n">formatter</span><span class="p">(),</span> <span class="s">"{INPUTS[0][0]}"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">taskfunc</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>would give</p>
+<div class="highlight-python"><pre>job #1
+ input == "a.text"
+ output == "a"
+
+job #2
+ input == [1, "b.text"]
+ output == 1</pre>
+</div>
+<p>The entire string must consist of <tt class="docutils literal"><span class="pre">INPUTS</span></tt> or <tt class="docutils literal"><span class="pre">EXTRAS</span></tt> followed by optionally N levels of square brackets. i.e. They must match <tt class="docutils literal"><span class="pre">"(INPUTS|EXTRAS)(\[\d+\])+"</span></tt></p>
+<p>No string conversion takes place.</p>
+<p>For <tt class="docutils literal"><span class="pre">INPUTS</span></tt> or <tt class="docutils literal"><span class="pre">EXTRAS</span></tt> which have objects with a <tt class="docutils literal"><span class="pre">ruffus_params()</span></tt> function (see Todo item above),
+the original object rather than the result of <tt class="docutils literal"><span class="pre">ruffus_params()</span></tt> is forwarded.</p>
+</div></blockquote>
+</div>
+<div class="section" id="todo-extra-signalling-before-and-after-each-task-and-job">
+<span id="todo-pre-post-job"></span><h2>Todo: Extra signalling before and after each task and job<a class="headerlink" href="#todo-extra-signalling-before-and-after-each-task-and-job" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@prejob</span><span class="p">(</span><span class="n">custom_func</span><span class="p">)</span>
+<span class="nd">@postjob</span><span class="p">(</span><span class="n">custom_func</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task</span><span class="p">():</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p><tt class="docutils literal"><span class="pre">@prejob</span></tt> / <tt class="docutils literal"><span class="pre">@postjob</span></tt> would be run in the child processes.</p>
+</div></blockquote>
+</div>
+<div class="section" id="todo-split-subdivide-returns-the-actual-output-created">
+<span id="todo-new-decorators"></span><h2>Todo: <tt class="docutils literal"><span class="pre">@split</span></tt> / <tt class="docutils literal"><span class="pre">@subdivide</span></tt> returns the actual output created<a class="headerlink" href="#todo-split-subdivide-returns-the-actual-output-created" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul>
+<li><p class="first"><strong>overrides</strong> (not replaces) wild cards.</p>
+</li>
+<li><p class="first">Returns a list, each with output and extra paramters.</p>
+</li>
+<li><p class="first">Won’t include extraneous files which were not created in the pipeline but which just happened to match the wild card</p>
+</li>
+<li><p class="first">We should have <tt class="docutils literal"><span class="pre">ruffus_output_params</span></tt>, <tt class="docutils literal"><span class="pre">ruffus_extra_params</span></tt> wrappers for clarity:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"a.file"</span><span class="p">,</span> <span class="s">"*.txt"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_into_txt_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="n">output_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"a.txt"</span><span class="p">,</span> <span class="s">"b.txt"</span><span class="p">,</span> <span class="s">"c.txt"</span><span class="p">]</span>
+ <span class="k">for</span> <span class="n">output_file_name</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">pass</span>
+ <span class="k">return</span> <span class="p">[</span>
+ <span class="n">ruffus_output</span><span class="p">(</span><span class="s">"a.file"</span><span class="p">),</span>
+ <span class="p">[</span><span class="n">ruffus_output</span><span class="p">([</span><span class="s">"b.file"</span><span class="p">,</span> <span class="s">"c.file"</span><span class="p">]),</span> <span class="n">ruffus_extras</span><span class="p">(</span><span class="mi">13</span><span class="p">,</span> <span class="mi">14</span><span class="p">)],</span>
+ <span class="p">]</span>
+</pre></div>
+</div>
+</li>
+<li><p class="first">Consider yielding?</p>
+</li>
+</ul>
+</div></blockquote>
+<div class="section" id="checkpointing">
+<h3>Checkpointing<a class="headerlink" href="#checkpointing" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li>If checkpoint file is used, the actual files are saved and checked the next time</li>
+<li>If no files are generated, no files are checked the next time...</li>
+<li>The output files do not have to match the wildcard though we can output a warning message if that happens...
+This is obviously dangerous because the behavior will change if the pipeline is rerun without using the checkpoint file</li>
+<li>What happens if the task function changes?</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="id1">
+<h2>Todo: New decorators<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="todo-originate">
+<h3>Todo: <tt class="docutils literal"><span class="pre">@originate</span></tt><a class="headerlink" href="#todo-originate" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div>Each (serial) invocation returns lists of output parameters until returns
+None. (Empty list = <tt class="docutils literal"><span class="pre">continue</span></tt>, None = <tt class="docutils literal"><span class="pre">break</span></tt>).</div></blockquote>
+</div>
+<div class="section" id="todo-recombine">
+<h3>Todo: <tt class="docutils literal"><span class="pre">@recombine</span></tt><a class="headerlink" href="#todo-recombine" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Like <tt class="docutils literal"><span class="pre">@collate</span></tt> but automatically regroups jobs which were a result of a previous <tt class="docutils literal"><span class="pre">@subdivide</span></tt> / <tt class="docutils literal"><span class="pre">@split</span></tt> (even after intervening <tt class="docutils literal"><span class="pre">@transform</span></tt> )</p>
+<p>This is the only way job trickling can work without stalling the pipeline: We would know
+how many jobs were pending for each <tt class="docutils literal"><span class="pre">@recombine</span></tt> job and which jobs go together.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="todo-named-parameters-in-decorators-for-clarity">
+<h2>Todo: Named parameters in decorators for clarity<a class="headerlink" href="#todo-named-parameters-in-decorators-for-clarity" title="Permalink to this headline">¶</a></h2>
+</div>
+<div class="section" id="todo-bioinformatics-example-to-end-all-examples">
+<span id="todo-bioinformatics-example"></span><h2>Todo: Bioinformatics example to end all examples<a class="headerlink" href="#todo-bioinformatics-example-to-end-all-examples" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>Uses</dt>
+<dd><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">@product</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">@subdivide</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">@transform</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">@collate</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">@merge</span></tt></li>
+</ul>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="todo-allow-the-next-task-to-start-before-all-jobs-in-the-previous-task-have-finished">
+<h2>Todo: Allow the next task to start before all jobs in the previous task have finished<a class="headerlink" href="#todo-allow-the-next-task-to-start-before-all-jobs-in-the-previous-task-have-finished" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Jake (Biesinger) calls this <strong>Job Trickling</strong>!</p>
+<ul class="simple">
+<li>A single long running job no longer will hold up the entire pipeline</li>
+<li>Calculates dependencies dynamically at the job level.</li>
+<li>Goal is to have a long running (months) pipeline to which we can keep adding input...</li>
+<li>We can choose between prioritising completion of the entire pipeline for some jobs
+(depth first) or trying to complete as many tasks as possible (breadth first)</li>
+</ul>
+</div></blockquote>
+<div class="section" id="converting-to-per-job-rather-than-per-task-dependencies">
+<h3>Converting to per-job rather than per task dependencies<a class="headerlink" href="#converting-to-per-job-rather-than-per-task-dependencies" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Some decorators prevent per job (rather than per task) dependency calculations, and
+will call a pipeline stall until the dependent tasks are completed (the current situation):</p>
+<blockquote>
+<div><ul>
+<li><dl class="first docutils">
+<dt>Some types of jobs unavoidably depend on an entire previous task completing:</dt>
+<dd><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">add_inputs()</span></tt>, <tt class="docutils literal"><span class="pre">inputs()</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">@merge</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">@split</span></tt> (implicit <tt class="docutils literal"><span class="pre">@merge</span></tt>)</li>
+</ul>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><tt class="docutils literal"><span class="pre">@split</span></tt>, <tt class="docutils literal"><span class="pre">@originate</span></tt> produce variable amount of output at runtime and must be completed before the next task can be run.</dt>
+<dd><ul class="first last simple">
+<li>Should <tt class="docutils literal"><span class="pre">yield</span></tt> instead of return?</li>
+</ul>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt><tt class="docutils literal"><span class="pre">@collate</span></tt> needs to pattern match all the inputs of a previous task</dt>
+<dd><ul class="first last simple">
+<li>Replace <tt class="docutils literal"><span class="pre">@collate</span></tt> with <tt class="docutils literal"><span class="pre">@recombine</span></tt> which “remembers” and reverses the results of a previous
+<tt class="docutils literal"><span class="pre">@subdivide</span></tt> or <tt class="docutils literal"><span class="pre">@split</span></tt></li>
+<li>Jobs need unique job_id tag</li>
+<li>Jobs are assigned (nested) grouping id which accompany them down the
+pipeline after <tt class="docutils literal"><span class="pre">@subdivide</span></tt> / <tt class="docutils literal"><span class="pre">@split</span></tt> and are removed after <tt class="docutils literal"><span class="pre">@recombine</span></tt></li>
+<li>Should have a count of jobs so we always know <em>when</em> an “input slot” is full</li>
+</ul>
+</dd>
+</dl>
+</li>
+<li><p class="first">Funny “single file” mode for <tt class="docutils literal"><span class="pre">@transform,</span></tt> <tt class="docutils literal"><span class="pre">@files</span></tt> needs to be
+regularised so it is a syntactic (front end) convenience (oddity!)
+and not plague the inards of ruffus</p>
+</li>
+</ul>
+</div></blockquote>
+<p>Breaking change: to force the entirety of the previous task to complete before the next one, use <tt class="docutils literal"><span class="pre">@follows</span></tt></p>
+</div></blockquote>
+</div>
+<div class="section" id="implementation">
+<h3>Implementation<a class="headerlink" href="#implementation" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li>“Push” model. Completing jobs “check in” their outputs to “input slots” for all the sucessor jobs.</li>
+<li>When “input slots” are full for any job, it is put on the dispatch queue to be run.</li>
+<li>The priority (depth first or breadth first) can be set here.</li>
+<li><tt class="docutils literal"><span class="pre">pipeline_run</span></tt> / <tt class="docutils literal"><span class="pre">Pipeline_printout</span></tt> create a task dependency tree structure (from decorator dependencies) (a runtime pipeline object)</li>
+<li>Each task in the pipeline object knows which other tasks wait on it.</li>
+<li>When output is created by a job, it sends messages to (i.e. function calls) all dependent tasks in the pipeline object with the new output</li>
+<li>Sets of output such as from <tt class="docutils literal"><span class="pre">@split</span></tt> and <tt class="docutils literal"><span class="pre">@subdivide</span></tt> and <tt class="docutils literal"><span class="pre">@originate</span></tt> have a
+terminating condition and/or a associated count (# of output)</li>
+<li>Tasks in the pipeline object forward incoming inputs to task input slots (for slots common to all jobs in a
+task: <tt class="docutils literal"><span class="pre">@inputs</span></tt>, <tt class="docutils literal"><span class="pre">@add_inputs</span></tt>) or to slots in new jobs in the pipeline object</li>
+<li>When all slots are full in each job, this triggers putting the job parameters onto the job submission queue</li>
+<li>The pipeline object should allow Ruffus to be reentrant?</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+</div>
+<div class="section" id="planned-improvements-to-ruffus">
+<h1>Planned Improvements to Ruffus<a class="headerlink" href="#planned-improvements-to-ruffus" title="Permalink to this headline">¶</a></h1>
+<blockquote id="todo-run-on-cluster">
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">@split</span></tt> needs to be able to specify at run time the number of
+resulting jobs without using wild cards</li>
+<li>legacy support for wild cards and file names.</li>
+</ul>
+</div></blockquote>
+<div class="section" id="planned-running-python-code-task-functions-transparently-on-remote-cluster-nodes">
+<h2>Planned: Running python code (task functions) transparently on remote cluster nodes<a class="headerlink" href="#planned-running-python-code-task-functions-transparently-on-remote-cluster-nodes" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Wait until next release.</p>
+<p>Will bump Ruffus to v.3.0 if can run python jobs transparently on a cluster!</p>
+<p>abstract out <tt class="docutils literal"><span class="pre">task.run_pooled_job_without_exceptions()</span></tt> as a function which can be supplied to <tt class="docutils literal"><span class="pre">pipeline_run</span></tt></p>
+<p>Common “job” interface:</p>
+<blockquote>
+<div><ul class="simple">
+<li>marshalled arguments</li>
+<li>marshalled function</li>
+<li>submission timestamp</li>
+</ul>
+</div></blockquote>
+<dl class="docutils">
+<dt>Returns</dt>
+<dd><ul class="first last simple">
+<li>completion timestamp</li>
+<li>returned values</li>
+<li>exception</li>
+</ul>
+</dd>
+</dl>
+<ol class="arabic simple">
+<li>Full version use libpythongrid?
+* Christian Widmer <<a class="reference external" href="mailto:ckwidmer%40gmail.com">ckwidmer<span>@</span>gmail<span>.</span>com</a>>
+* Cheng Soon Ong <<a class="reference external" href="mailto:chengsoon.ong%40unimelb.edu.au">chengsoon<span>.</span>ong<span>@</span>unimelb<span>.</span>edu<span>.</span>au</a>>
+* <a class="reference external" href="https://code.google.com/p/pythongrid/source/browse/#git%2Fpythongrid">https://code.google.com/p/pythongrid/source/browse/#git%2Fpythongrid</a>
+* Probably not good to base Ruffus entirely on libpythongrid to minimise dependencies, the use of sophisticated configuration policies etc.</li>
+<li>Start with light-weight file-based protocol
+* specify where the scripts should live
+* use drmaa to start jobs
+* have executable ruffus module which knows how to load deserialise (unmarshall) function / parameters from disk. This would be what drmaa starts up, given the marshalled data as an argument
+* time stamp
+* “heart beat” to check that the job is still running</li>
+<li>Next step: socket-based protocol
+* use specified master port in ruffus script
+* start remote processes using drmaa
+* child receives marshalled data and the address::port in the ruffus script (head node) to initiate hand shake or die
+* process recycling: run successive jobs on the same remote process for reduced overhead, until exceeds max number of jobs on the same process, min/max time on the same process
+* resubmit if die (Don’t do sophisticated stuff like libpythongrid).</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="planned-custom-parameter-generator">
+<span id="todo-custom-parameters"></span><span id="todo-job-trickling"></span><h2>Planned: Custom parameter generator<a class="headerlink" href="#planned-custom-parameter-generator" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Request on mailing list</p>
+<blockquote>
+<div><blockquote>
+<div>I’ve often wished that I could use an arbitrary function to process the input filepath instead of just a regex.</div></blockquote>
+<div class="highlight-python"><div class="highlight"><pre><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">outputs</span><span class="p">,</span> <span class="n">extra_param1</span><span class="p">,</span> <span class="n">extra_param2</span><span class="p">):</span>
+ <span class="c"># do something to generate parameters</span>
+ <span class="k">return</span> <span class="n">new_output_param</span><span class="p">,</span> <span class="n">new_extra_param1</span><span class="p">,</span> <span class="n">new_extra_param2</span>
+</pre></div>
+</div>
+<p>now f() can be used inside a Ruffus decorator to generate the outputs from inputs, instead of being forced to use a regex for the job.</p>
+<p>Cheers,
+Bernie.</p>
+</div></blockquote>
+<p>Leverages built-in Ruffus functionality.
+Don’t have to write entire parameter generation from scratch.</p>
+<ul class="simple">
+<li>Gets passed an iterator where you can do a for loop to get input parameters / a flattened list of files</li>
+<li>Other parameters are forwarded as is</li>
+<li>The duty of the function is to <tt class="docutils literal"><span class="pre">yield</span></tt> input, output, extra parameters</li>
+</ul>
+<p>Simple to do but how do we prevent this from being a job-trickling barrier?</p>
+<p>Postpone until we have an initial design for job-trickling: Ruffus v.4 ;-(</p>
+</div></blockquote>
+</div>
+<div class="section" id="planned-ruffus-gui-interface">
+<span id="todo-gui"></span><h2>Planned: Ruffus GUI interface.<a class="headerlink" href="#planned-ruffus-gui-interface" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>Desktop (PyQT or web-based solution?) I’d love to see an svg pipeline picture that I could actually interact with</div></blockquote>
+</div>
+<div class="section" id="planned-non-decorator-function-interface-to-ruffus">
+<h2>Planned: Non-decorator / Function interface to Ruffus<a class="headerlink" href="#planned-non-decorator-function-interface-to-ruffus" title="Permalink to this headline">¶</a></h2>
+</div>
+<div class="section" id="planned-remove-intermediate-files">
+<span id="todo-intermediate-files"></span><h2>Planned: Remove intermediate files<a class="headerlink" href="#planned-remove-intermediate-files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Often large intermediate files are produced in the middle of a pipeline which could be
+removed. However, their absence would cause the pipeline to appear out of date. What is
+the best way to solve this?</p>
+<p>In gmake, all intermediate files which are not marked <tt class="docutils literal"><span class="pre">.PRECIOUS</span></tt> are deleted.</p>
+<dl class="docutils">
+<dt>We do not want to manually mark intermediate files for several reasons:</dt>
+<dd><ul class="first last simple">
+<li>The syntax would be horrible and clunky</li>
+<li>The gmake distinction between <tt class="docutils literal"><span class="pre">implicit</span></tt> and <tt class="docutils literal"><span class="pre">explicit</span></tt> rules is not one we
+would like to impose on Ruffus</li>
+<li>Gmake uses statically determined (DAG) dependency trees so it is quite natural and
+easy to prune intermediate paths</li>
+</ul>
+</dd>
+</dl>
+<p>Our preferred solution should impose little to no semantic load on Ruffus, i.e. it should
+not make it more complex / difficult to use. There are several alternatives we are
+considering:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Have an <strong>update</strong> mode in which pipeline_run would ignore missing files and only run tasks with existing, out-of-date files.</li>
+<li>Optionally ignore all out-of-date dependencies beyond a specified point in the pipeline</li>
+<li>Add a decorator to flag sections of the pipeline where intermediate files can be removed</li>
+</ol>
+</div></blockquote>
+<p>Option (1) is rather unnerving because it makes inadvertent errors difficult to detect.</p>
+<p>Option (2) involves relying on the user of a script to remember the corect chain of dependencies in
+often complicated pipelines. It would be advised to keep a flowchart to hand. Again,
+the chances of error are much greater.</p>
+<p>Option (3) springs from the observation by Andreas Heger that parts of a pipeline with
+disposable intermediate files can usually be encapsulated as an autonomous section.
+Within this subpipeline, all is well provided that the outputs of the last task are complete
+and up-to-date with reference to the inputs of the first task. Intermediate files
+could be removed with impunity.</p>
+<p>The suggestion is that these autonomous subpipelines could be marked out using the Ruffus
+decorator syntax:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># First task in autonomous subpipeline</span>
+<span class="c">#</span>
+<span class="nd">@files</span><span class="p">(</span><span class="s">"who.isit"</span><span class="p">,</span> <span class="s">"its.me"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c">#</span>
+<span class="c"># Several intermediate tasks</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">subpipeline_task1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".me"</span><span class="p">),</span> <span class="s">".her"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2_etc</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c">#</span>
+<span class="c"># Final task</span>
+<span class="c">#</span>
+<span class="nd">@sub_pipeline</span><span class="p">(</span><span class="n">subpipeline_task1</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">subpipeline_task1</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".her"</span><span class="p">),</span> <span class="s">".you"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p><strong>@sub_pipeline</strong> marks out all tasks between <tt class="docutils literal"><span class="pre">first_task</span></tt> and <tt class="docutils literal"><span class="pre">final_task</span></tt> and
+intermediate files such as <tt class="docutils literal"><span class="pre">"its.me"</span></tt>, <tt class="docutils literal"><span class="pre">"its.her</span></tt> can be deleted. The pipeline will
+only run if <tt class="docutils literal"><span class="pre">"its.you"</span></tt> is missing or out-of-date compared with <tt class="docutils literal"><span class="pre">"who.isit"</span></tt>.</p>
+<p>Over the next few Ruffus releases we will see if this is a good design, and whether
+better keyword can be found than <strong>@sub_pipeline</strong> (candidates include <strong>@shortcut</strong>
+and <strong>@intermediate</strong>)</p>
+</div></blockquote>
+</div>
+<div class="section" id="planned-retry-on-error-num-of-retries">
+<span id="todo-retry"></span><h2>Planned: @retry_on_error(NUM_OF_RETRIES)<a class="headerlink" href="#planned-retry-on-error-num-of-retries" title="Permalink to this headline">¶</a></h2>
+</div>
+<div class="section" id="planned-clean-up">
+<span id="todo-cleanup"></span><h2>Planned: Clean up<a class="headerlink" href="#planned-clean-up" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The plan is to store the files and directories created via
+a standard interface.</p>
+<p>The placeholders for this are a function call <tt class="docutils literal"><span class="pre">register_cleanup</span></tt>.</p>
+<p>Jobs can specify the files they created and which need to be
+deleted by returning a list of file names from the job function.</p>
+<p>So:</p>
+<div class="highlight-python"><pre>raise Exception = Error
+
+return False = halt pipeline now
+
+return string / list of strings = cleanup files/directories later
+
+return anything else = ignored</pre>
+</div>
+<p>The cleanup file/directory store interface can be connected to
+a text file or a database.</p>
+<p>The cleanup function would look like this:</p>
+<div class="highlight-python"><pre>pipeline_cleanup(cleanup_log("../cleanup.log"), [instance ="october19th" ])
+pipeline_cleanup(cleanup_msql_db("user", "password", "hash_record_table"))</pre>
+</div>
+<p>The parameters for where and how to store the list of created files could be
+similarly passed to pipeline_run as an extra parameter:</p>
+<div class="highlight-python"><pre>pipeline_run(cleanup_log("../cleanup.log"), [instance ="october19th" ])
+pipeline_run(cleanup_msql_db("user", "password", "hash_record_table"))</pre>
+</div>
+<p>where <cite>cleanup_log</cite> and <cite>cleanup_msql_db</cite> are classes which have functions for</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>storing file</li>
+<li>retrieving file</li>
+<li>clearing entries</li>
+</ol>
+</div></blockquote>
+<ul>
+<li><p class="first">Files would be deleted in reverse order, and directories after files.</p>
+</li>
+<li><p class="first">By default, only empty directories would be removed.</p>
+<p>But this could be changed with a <tt class="docutils literal"><span class="pre">--forced_remove_dir</span></tt> option</p>
+</li>
+<li><p class="first">An <tt class="docutils literal"><span class="pre">--remove_empty_parent_directories</span></tt> option would be
+supported by <a class="reference external" href="http://docs.python.org/library/os.html#os.removedirs">os.removedirs(path)</a>.</p>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Future Changes to Ruffus</a><ul>
+<li><a class="reference internal" href="#todo-pipeline-printout-graph-should-print-inactive-tasks">Todo: pipeline_printout_graph should print inactive tasks</a></li>
+<li><a class="reference internal" href="#todo-mark-input-strings-as-non-file-names-and-add-support-for-dynamically-returned-parameters">Todo: Mark input strings as non-file names, and add support for dynamically returned parameters</a></li>
+<li><a class="reference internal" href="#todo-allow-extra-parameters-to-be-used-in-output-substitution">Todo: Allow “extra” parameters to be used in output substitution</a></li>
+<li><a class="reference internal" href="#todo-extra-signalling-before-and-after-each-task-and-job">Todo: Extra signalling before and after each task and job</a></li>
+<li><a class="reference internal" href="#todo-split-subdivide-returns-the-actual-output-created">Todo: <tt class="docutils literal"><span class="pre">@split</span></tt> / <tt class="docutils literal"><span class="pre">@subdivide</span></tt> returns the actual output created</a><ul>
+<li><a class="reference internal" href="#checkpointing">Checkpointing</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#id1">Todo: New decorators</a><ul>
+<li><a class="reference internal" href="#todo-originate">Todo: <tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+<li><a class="reference internal" href="#todo-recombine">Todo: <tt class="docutils literal"><span class="pre">@recombine</span></tt></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#todo-named-parameters-in-decorators-for-clarity">Todo: Named parameters in decorators for clarity</a></li>
+<li><a class="reference internal" href="#todo-bioinformatics-example-to-end-all-examples">Todo: Bioinformatics example to end all examples</a></li>
+<li><a class="reference internal" href="#todo-allow-the-next-task-to-start-before-all-jobs-in-the-previous-task-have-finished">Todo: Allow the next task to start before all jobs in the previous task have finished</a><ul>
+<li><a class="reference internal" href="#converting-to-per-job-rather-than-per-task-dependencies">Converting to per-job rather than per task dependencies</a></li>
+<li><a class="reference internal" href="#implementation">Implementation</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#planned-improvements-to-ruffus">Planned Improvements to Ruffus</a><ul>
+<li><a class="reference internal" href="#planned-running-python-code-task-functions-transparently-on-remote-cluster-nodes">Planned: Running python code (task functions) transparently on remote cluster nodes</a></li>
+<li><a class="reference internal" href="#planned-custom-parameter-generator">Planned: Custom parameter generator</a></li>
+<li><a class="reference internal" href="#planned-ruffus-gui-interface">Planned: Ruffus GUI interface.</a></li>
+<li><a class="reference internal" href="#planned-non-decorator-function-interface-to-ruffus">Planned: Non-decorator / Function interface to Ruffus</a></li>
+<li><a class="reference internal" href="#planned-remove-intermediate-files">Planned: Remove intermediate files</a></li>
+<li><a class="reference internal" href="#planned-retry-on-error-num-of-retries">Planned: @retry_on_error(NUM_OF_RETRIES)</a></li>
+<li><a class="reference internal" href="#planned-clean-up">Planned: Clean up</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="history.html"
+ title="previous chapter">Major Features added to Ruffus</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="implementation_notes.html"
+ title="next chapter">Implementation Tips</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/todo.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="#">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="implementation_notes.html" title="Implementation Tips"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="history.html" title="Major Features added to Ruffus"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/advanced_transform.html b/doc/_build/html/tutorials/manual/advanced_transform.html
new file mode 100644
index 0000000..46ecd87
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/advanced_transform.html
@@ -0,0 +1,517 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel" href="parallel.html" />
+ <link rel="prev" title="Chapter 14: @collate: group together disparate input into sets of results" href="collate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="collate.html" title="Chapter 14: @collate: group together disparate input into sets of results"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-transform-ex-chapter-num-add-inputs-and-inputs-controlling-both-input-and-output-files-with-transform">
+<span id="manual-advanced-transform"></span><h1><strong>Chapter 15</strong>: <strong>add_inputs()</strong> <cite>and</cite> <strong>inputs()</strong>: <cite>Controlling both input and output files with</cite> <strong>@transform</strong><a class="headerlink" href="#manual-transform-ex-chapter-num-add-inputs-and-inputs-controlling-both-input-and-output-files-with-transform" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+<p id="index-0">The standard <a class="reference internal" href="transform.html#manual-transform"><em>@transform</em></a> allows you to send a list of data files
+to the same pipelined function and for the resulting <em>outputs</em> parameter to be automatically
+inferred from file names in the <em>inputs</em>.</p>
+<dl class="docutils">
+<dt>There are two situations where you might desire additional flexibility:</dt>
+<dd><ol class="first last arabic simple">
+<li>You need to add additional prequisites or filenames to the <em>inputs</em> of every single one
+of your jobs</li>
+<li>(Less often,) the actual <em>inputs</em> file names are some variant of the <em>outputs</em> of another
+task.</li>
+</ol>
+</dd>
+</dl>
+<p>Either way, it is occasionally very useful to be able to generate the actual <em>inputs</em> as
+well as <em>outputs</em> parameters by regular expression substitution. The following examples will show
+you both how and why you would want to do this.</p>
+</div></blockquote>
+<div class="section" id="adding-additional-input-prerequisites-per-job">
+<h2>Adding additional <em>input</em> prerequisites per job<a class="headerlink" href="#adding-additional-input-prerequisites-per-job" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="example-compiling-c-code">
+<h3>1.) Example: compiling c++ code<a class="headerlink" href="#example-compiling-c-code" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>Suppose we wished to compile some c++ (<tt class="docutils literal"><span class="pre">"*.cpp"</span></tt>) files:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">source_files</span> <span class="o">=</span> <span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<dl class="docutils">
+<dt>The ruffus code would look like this:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<dl class="docutils">
+<dt>This results in the following jobs:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="nb">compile</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go"> Job = [None -> hasty.cpp] completed</span>
+<span class="go"> Job = [None -> tasty.cpp] completed</span>
+<span class="go"> Job = [None -> messy.cpp] completed</span>
+<span class="go">Completed Task = prepare_cpp_source</span>
+
+<span class="go"> Job = [hasty.cpp -> hasty.o] completed</span>
+<span class="go"> Job = [messy.cpp -> messy.o] completed</span>
+<span class="go"> Job = [tasty.cpp -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="example-adding-a-header-file-with-add-inputs">
+<h3>2.) Example: Adding a header file with <strong>add_inputs(..)</strong><a class="headerlink" href="#example-adding-a-header-file-with-add-inputs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>All this is plain vanilla <strong>@transform</strong> syntax. But suppose that we need to add a
+common header file <tt class="docutils literal"><span class="pre">"universal.h"</span></tt> to our compilation.
+The <strong>add_inputs</strong> provides for this with the minimum of fuss:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># create header file</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># compile C++ files with extra header</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">prepare_cpp_source</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Now the input file is a python list, with <tt class="docutils literal"><span class="pre">"universal.h"</span></tt> added to each <tt class="docutils literal"><span class="pre">"*.cpp"</span></tt></p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="nb">compile</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go"> Job = [ [hasty.cpp, universal.h] -> hasty.o] completed</span>
+<span class="go"> Job = [ [messy.cpp, universal.h] -> messy.o] completed</span>
+<span class="go"> Job = [ [tasty.cpp, universal.h] -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="additional-input-prerequisites-can-be-globs-tasks-or-pattern-matches">
+<h2>Additional <em>input</em> prerequisites can be globs, tasks or pattern matches<a class="headerlink" href="#additional-input-prerequisites-can-be-globs-tasks-or-pattern-matches" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>A common requirement is to include the corresponding header file in compilations.
+It is easy to use <strong>add_inputs</strong> to look up additional files via pattern matches.</div></blockquote>
+<div class="section" id="example-adding-matching-header-file">
+<h3>3.) Example: Adding matching header file<a class="headerlink" href="#example-adding-matching-header-file" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>To make this example more fun, we shall also:</dt>
+<dd><ol class="first last arabic simple">
+<li>Give each source code file its own ordinal</li>
+<li>Use <tt class="docutils literal"><span class="pre">add_inputs</span></tt> to add files produced by another task function</li>
+</ol>
+</dd>
+</dl>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># each source file has its own index</span>
+<span class="n">source_names</span> <span class="o">=</span> <span class="p">[(</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"messy.cpp"</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="p">]</span>
+<span class="n">header_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">sn</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">,</span> <span class="s">".h"</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">sn</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span> <span class="ow">in</span> < [...]
+<span class="n">header_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># create header and source files</span>
+<span class="c">#</span>
+<span class="k">for</span> <span class="n">source</span><span class="p">,</span> <span class="n">source_index</span> <span class="ow">in</span> <span class="n">source_names</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="k">for</span> <span class="n">header</span> <span class="ow">in</span> <span class="n">header_names</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">header</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#</span>
+<span class="c"># lookup embedded strings in each source files</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_names</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".embedded"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">get_embedded_strings</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+
+<span class="c"># compile C++ files with extra header</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_names</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span>
+ <span class="n">add_inputs</span><span class="p">(</span> <span class="s">"universal.h"</span><span class="p">,</span>
+ <span class="s">r"\1.h"</span><span class="p">,</span>
+ <span class="n">get_embedded_strings</span> <span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_params</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="nb">compile</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This script gives the following output</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="nb">compile</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go"> Job = [[hasty.cpp, 1] -> hasty.embedded] completed</span>
+<span class="go"> Job = [[messy.cpp, 3] -> messy.embedded] completed</span>
+<span class="go"> Job = [[tasty.cpp, 2] -> tasty.embedded] completed</span>
+<span class="go">Completed Task = get_embedded_strings</span>
+
+<span class="go"> Job = [[[hasty.cpp, 1], # inputs</span>
+<span class="go"> universal.h, # common header</span>
+<span class="go"> hasty.h, # corresponding header</span>
+<span class="go"> hasty.embedded, messy.embedded, tasty.embedded] # output of get_embedded_strings()</span>
+<span class="go"> -> hasty.o] completed</span>
+<span class="go"> Job = [[[messy.cpp, 3], # inputs</span>
+<span class="go"> universal.h, # common header</span>
+<span class="go"> messy.h, # corresponding header</span>
+<span class="go"> hasty.embedded, messy.embedded, tasty.embedded] # output of get_embedded_strings()</span>
+<span class="go"> -> messy.o] completed</span>
+<span class="go"> Job = [[[tasty.cpp, 2], # inputs</span>
+<span class="go"> universal.h, # common header</span>
+<span class="go"> tasty.h, # corresponding header</span>
+<span class="go"> hasty.embedded, messy.embedded, tasty.embedded] # output of get_embedded_strings()</span>
+<span class="go"> -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>We can see that the <tt class="docutils literal"><span class="pre">compile(...)</span></tt> task now has four sets of <em>inputs</em>:</dt>
+<dd><ol class="first last arabic simple">
+<li>The original inputs (e.g. <tt class="docutils literal"><span class="pre">[hasty.cpp,</span> <span class="pre">1]</span></tt>)</li>
+</ol>
+</dd>
+<dt>And three additional added by <strong>add_inputs(...)</strong></dt>
+<dd><ol class="first last arabic simple" start="2">
+<li>A header file (<tt class="docutils literal"><span class="pre">universal.h</span></tt>) common to all jobs</li>
+<li>The matching header (e.g. <tt class="docutils literal"><span class="pre">hasty.h</span></tt>)</li>
+<li>The output from another task <tt class="docutils literal"><span class="pre">get_embedded_strings()</span></tt> (e.g. <tt class="docutils literal"><span class="pre">hasty.embedded,</span> <span class="pre">messy.embedded,</span> <span class="pre">tasty.embedded</span></tt>)</li>
+</ol>
+</dd>
+</dl>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>For input parameters with nested structures (lists or sets), the pattern matching is
+on the first filename string Ruffus comes across (DFS).</p>
+<p>So for <tt class="docutils literal"><span class="pre">["hasty.c",</span> <span class="pre">0]</span></tt>, the pattern matches <tt class="docutils literal"><span class="pre">"hasty.c"</span></tt>.</p>
+<p class="last">If in doubt, use <a class="reference internal" href="tracing_pipeline_parameters.html#manual-tracing-pipeline-parameters"><em>pipeline_printout</em></a> to
+check what parameters Ruffus is using.</p>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-using-regex-instead-of-suffix">
+<h3>4.) Example: Using <strong>regex(..)</strong> instead of <strong>suffix(..)</strong><a class="headerlink" href="#example-using-regex-instead-of-suffix" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Suffix pattern matching is much simpler and hence is usually preferable to the more
+powerful regular expressions. We can rewrite the above example to use <strong>regex</strong> as well
+to give exactly the same output.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># compile C++ files with extra header</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_names</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.+)\.cpp"</span><span class="p">),</span>
+ <span class="n">add_inputs</span><span class="p">(</span> <span class="s">"universal.h"</span><span class="p">,</span>
+ <span class="s">r"\1.h"</span><span class="p">,</span>
+ <span class="n">get_embedded_strings</span> <span class="p">),</span> <span class="s">r"\1.o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_params</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The backreference <tt class="docutils literal"><span class="pre">\g<0></span></tt> usefully substitutes the entire substring matched by
+the regular expression.</p>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="replacing-all-input-parameters-with-inputs">
+<h2>Replacing all input parameters with <strong>inputs(...)</strong><a class="headerlink" href="#replacing-all-input-parameters-with-inputs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>More rarely, it is necessary to replace all the input parameters wholescale.</div></blockquote>
+<div class="section" id="example-running-matching-python-scripts">
+<h3>4.) Example: Running matching python scripts<a class="headerlink" href="#example-running-matching-python-scripts" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>In the following example, we are not compiling C++ source files but invoking
+corresponding python scripts which have the same name.</p>
+<p>Given three c++ files and their corresponding python scripts:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># each source file has its own index</span>
+<span class="n">source_names</span> <span class="o">=</span> <span class="p">[(</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"messy.cpp"</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="p">]</span>
+
+<span class="c">#</span>
+<span class="c"># create c++ source files and corresponding python files</span>
+<span class="c">#</span>
+<span class="k">for</span> <span class="n">source</span><span class="p">,</span> <span class="n">source_index</span> <span class="ow">in</span> <span class="n">source_names</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">,</span> <span class="s">".py"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The Ruffus code will call each python script corresponding to their c++ counterpart:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+
+<span class="c"># run corresponding python files</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_names</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="n">inputs</span><span class="p">(</span> <span class="s">r"\1.py"</span><span class="p">),</span> <span class="s">".results"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">run_python_file</span><span class="p">(</span><span class="n">input_params</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">run_python_file</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>Resulting in this output:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">run_python_file</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+<span class="go"> Job = [hasty.py -> hasty.results] completed</span>
+<span class="go"> Job = [messy.py -> messy.results] completed</span>
+<span class="go"> Job = [tasty.py -> tasty.results] completed</span>
+<span class="go">Completed Task = run_python_file</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="id1">
+<h3>5.) Example: Using <strong>regex</strong> instead of <strong>suffix</strong><a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Again, the same code can be written (less clearly) using the more powerful
+<strong>regex</strong> and python regular expressions:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>from ruffus import *
+
+
+# run corresponding python files
+ at transform(source_names, regex(r"(.+)\.cpp"), inputs( r"\1.py"), r\"1.results")
+def run_python_file(input_params, output_file_name):
+ open(output_file_name, "w")
+
+
+pipeline_run([run_python_file], verbose = 2, multiprocess = 3)</pre>
+</div>
+</div></blockquote>
+<p>This is about as sophisticated as <strong>@transform</strong> ever gets!</p>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 15</strong>: <strong>add_inputs()</strong> <cite>and</cite> <strong>inputs()</strong>: <cite>Controlling both input and output files with</cite> <strong>@transform</strong></a><ul>
+<li><a class="reference internal" href="#adding-additional-input-prerequisites-per-job">Adding additional <em>input</em> prerequisites per job</a><ul>
+<li><a class="reference internal" href="#example-compiling-c-code">1.) Example: compiling c++ code</a></li>
+<li><a class="reference internal" href="#example-adding-a-header-file-with-add-inputs">2.) Example: Adding a header file with <strong>add_inputs(..)</strong></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#additional-input-prerequisites-can-be-globs-tasks-or-pattern-matches">Additional <em>input</em> prerequisites can be globs, tasks or pattern matches</a><ul>
+<li><a class="reference internal" href="#example-adding-matching-header-file">3.) Example: Adding matching header file</a></li>
+<li><a class="reference internal" href="#example-using-regex-instead-of-suffix">4.) Example: Using <strong>regex(..)</strong> instead of <strong>suffix(..)</strong></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#replacing-all-input-parameters-with-inputs">Replacing all input parameters with <strong>inputs(...)</strong></a><ul>
+<li><a class="reference internal" href="#example-running-matching-python-scripts">4.) Example: Running matching python scripts</a></li>
+<li><a class="reference internal" href="#id1">5.) Example: Using <strong>regex</strong> instead of <strong>suffix</strong></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="collate.html"
+ title="previous chapter"><strong>Chapter 14</strong>: <strong>@collate</strong>: <cite>group together disparate input into sets of results</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="parallel.html"
+ title="next chapter"><strong>Chapter 16</strong>: <cite>Esoteric: Running jobs in parallel without using files with</cite> <strong>@parallel</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/advanced_transform.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="collate.html" title="Chapter 14: @collate: group together disparate input into sets of results"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/check_if_uptodate.html b/doc/_build/html/tutorials/manual/check_if_uptodate.html
new file mode 100644
index 0000000..9043536
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/check_if_uptodate.html
@@ -0,0 +1,269 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 17: Writing custom functions to decide which jobs are up to date — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 18: Exceptions thrown inside a pipeline" href="exceptions.html" />
+ <link rel="prev" title="Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel" href="parallel.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="exceptions.html" title="Chapter 18: Exceptions thrown inside a pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-check-if-uptodate-chapter-num-writing-custom-functions-to-decide-which-jobs-are-up-to-date">
+<span id="manual-check-if-uptodate"></span><h1><strong>Chapter 17</strong>: <cite>Writing custom functions to decide which jobs are up to date</cite><a class="headerlink" href="#manual-check-if-uptodate-chapter-num-writing-custom-functions-to-decide-which-jobs-are-up-to-date" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate syntax in detail</em></a></li>
+</ul>
+</td></tr></table>
+</div></blockquote>
+<div class="section" id="check-if-uptodate-manual-dependency-checking">
+<span id="index-0"></span><h2><strong>@check_if_uptodate</strong> : Manual dependency checking<a class="headerlink" href="#check-if-uptodate-manual-dependency-checking" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>tasks specified with</dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="files.html#manual-files"><em>@files</em></a></li>
+<li><a class="reference internal" href="split.html#manual-split"><em>@split</em></a></li>
+<li><a class="reference internal" href="transform.html#manual-transform"><em>@transform</em></a></li>
+<li><a class="reference internal" href="merge.html#manual-merge"><em>@merge</em></a></li>
+<li><a class="reference internal" href="collate.html#manual-collate"><em>@collate</em></a></li>
+</ul>
+</dd>
+</dl>
+<p>have automatic dependency checking based on file modification times.</p>
+<p>Sometimes, you might want to decide have more control over whether to run jobs, especially
+if a task does not rely on or produce files (i.e. with <a class="reference internal" href="parallel.html#manual-parallel"><em>@parallel</em></a>)</p>
+<p>You can write your own custom function to decide whether to run a job.
+This takes as many parameters as your task function, and needs to return a
+tuple for whether an update is required, and why (i.e. <tt class="docutils literal"><span class="pre">tuple(bool,</span> <span class="pre">str)</span></tt>)</p>
+<p>This simple example which creates the file <tt class="docutils literal"><span class="pre">"a.1"</span></tt> if it does not exist:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>could be rewritten more laboriously as:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="k">def</span> <span class="nf">check_file_exists</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">False</span><span class="p">,</span> <span class="s">"File already exists"</span>
+ <span class="k">return</span> <span class="bp">True</span><span class="p">,</span> <span class="s">"</span><span class="si">%s</span><span class="s"> is missing"</span> <span class="o">%</span> <span class="n">output_file</span>
+
+<span class="nd">@parallel</span><span class="p">([[</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">]])</span>
+<span class="nd">@check_if_uptodate</span><span class="p">(</span><span class="n">check_file_exists</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>Both produce the same output:</dt>
+<dd><div class="first last highlight-python"><pre>Task = create_if_necessary
+ Job = [null, "a.1"] completed</pre>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>The function specified by <a class="reference internal" href="#manual-check-if-uptodate"><em>@check_if_uptodate</em></a> can be called
+more than once for each job.</p>
+<p class="last">See the <a class="reference internal" href="dependencies.html#manual-dependencies"><em>description here</em></a> of how <strong>Ruffus</strong> decides which tasks to run.</p>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 17</strong>: <cite>Writing custom functions to decide which jobs are up to date</cite></a><ul>
+<li><a class="reference internal" href="#check-if-uptodate-manual-dependency-checking"><strong>@check_if_uptodate</strong> : Manual dependency checking</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="parallel.html"
+ title="previous chapter"><strong>Chapter 16</strong>: <cite>Esoteric: Running jobs in parallel without using files with</cite> <strong>@parallel</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="exceptions.html"
+ title="next chapter"><strong>Chapter 18</strong>: <cite>Exceptions thrown inside a pipeline</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/check_if_uptodate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="exceptions.html" title="Chapter 18: Exceptions thrown inside a pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/collate.html b/doc/_build/html/tutorials/manual/collate.html
new file mode 100644
index 0000000..0fd1add
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/collate.html
@@ -0,0 +1,269 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 14: @collate: group together disparate input into sets of results — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform" href="advanced_transform.html" />
+ <link rel="prev" title="Chapter 13: Generating parameters on the fly with @files" href="onthefly.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="advanced_transform.html" title="Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 13: Generating parameters on the fly with @files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-collate-chapter-num-collate-group-together-disparate-input-into-sets-of-results">
+<span id="manual-collate"></span><h1><strong>Chapter 14</strong>: <strong>@collate</strong>: <cite>group together disparate input into sets of results</cite><a class="headerlink" href="#manual-collate-chapter-num-collate-group-together-disparate-input-into-sets-of-results" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate syntax in detail</em></a></li>
+</ul>
+</td></tr></table>
+<p>It is often very useful to group together disparate <em>inputs</em> into several categories, each of which
+lead to a separate <em>output</em>. In the example shown below, we produce separate summaries of results
+depending on which species the file belongs to.</p>
+<p><strong>Ruffus</strong> uses the term <tt class="docutils literal"><span class="pre">collate</span></tt> in a rough analogy to the way printers group together
+copies of documents appropriately.</p>
+</div></blockquote>
+<div class="section" id="collating-many-inputs-each-into-a-single-output">
+<span id="index-0"></span><h2>Collating many <em>inputs</em> each into a single <em>output</em><a class="headerlink" href="#collating-many-inputs-each-into-a-single-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Our example starts with some files which presumably have been created by some
+earlier stages of our pipeline. We simulate this here with this code:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">files_names</span> <span class="o">=</span> <span class="p">[</span> <span class="s">"mammals.tiger.wild.animals"</span>
+ <span class="s">"mammals.lion.wild.animals"</span>
+ <span class="s">"mammals.lion.handreared.animals"</span>
+ <span class="s">"mammals.dog.tame.animals"</span>
+ <span class="s">"mammals.dog.wild.animals"</span>
+ <span class="s">"mammals.dog.feral.animals"</span>
+ <span class="s">"reptiles.crocodile.wild.animals"</span> <span class="p">]</span>
+<span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">files_names</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>However, we are only interested in mammals, and we would like the files of each species to
+end up in its own directory, i.e. <tt class="docutils literal"><span class="pre">tiger</span></tt>, <tt class="docutils literal"><span class="pre">lion</span></tt> and <tt class="docutils literal"><span class="pre">dog</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">os</span>
+<span class="n">os</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="s">"tiger"</span><span class="p">)</span>
+<span class="n">os</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="s">"lion"</span><span class="p">)</span>
+<span class="n">os</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="s">"dog"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Now we would like to place each file in a different destination, depending on its
+species. The following regular expression marks out the species name <tt class="docutils literal"><span class="pre">r'mammals.([^.]+)'</span></tt>.
+For <tt class="docutils literal"><span class="pre">mammals.tiger.wild.animals</span></tt>, the first matching group (<tt class="docutils literal"><span class="pre">\1</span></tt>) == <tt class="docutils literal"><span class="pre">"tiger"</span></tt></p>
+<p>Then, the following:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@collate</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span> <span class="c"># inputs = all *.animal files</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r'mammals.([^.]+)'</span><span class="p">),</span> <span class="c"># regular expression</span>
+ <span class="s">r'\1/animals.in_my_zoo'</span><span class="p">,</span> <span class="c"># single output file per species</span>
+ <span class="s">r'\1'</span> <span class="p">)</span> <span class="c"># species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="c"># summarise all animals of this species</span>
+ <span class="k">print</span> <span class="s">"Collating </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">species</span>
+
+ <span class="n">o</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">infiles</span><span class="p">:</span>
+ <span class="n">o</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">captured</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">capture_mammals</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>puts each captured mammal in its own directory:</p>
+<div class="highlight-python"><pre>Task = capture_mammals
+ Job = [(mammals.lion.handreared.animals, mammals.lion.wild.animals) -> lion/animals.in_my_zoo] completed
+ Job = [(mammals.tiger.wild.animals, ) -> tiger/animals.in_my_zoo] completed
+ Job = [(mammals.dog.tame.animals, mammals.dog.wild.animals, mammals.dog.feral.animals) -> dog/animals.in_my_zoo] completed</pre>
+</div>
+<p>The crocodile has been discarded because it isn’t a mammal and the file name
+doesn’t match the <tt class="docutils literal"><span class="pre">mammal</span></tt> part of the regular expression.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 14</strong>: <strong>@collate</strong>: <cite>group together disparate input into sets of results</cite></a><ul>
+<li><a class="reference internal" href="#collating-many-inputs-each-into-a-single-output">Collating many <em>inputs</em> each into a single <em>output</em></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="onthefly.html"
+ title="previous chapter"><strong>Chapter 13</strong>: <cite>Generating parameters on the fly with</cite> <strong>@files</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="advanced_transform.html"
+ title="next chapter"><strong>Chapter 15</strong>: <strong>add_inputs()</strong> <cite>and</cite> <strong>inputs()</strong>: <cite>Controlling both input and output files with</cite> <strong>@transform</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/collate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="advanced_transform.html" title="Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 13: Generating parameters on the fly with @files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/dependencies.html b/doc/_build/html/tutorials/manual/dependencies.html
new file mode 100644
index 0000000..5b7b8c2
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/dependencies.html
@@ -0,0 +1,335 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 12: Checking dependencies to run tasks in order — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 13: Generating parameters on the fly with @files" href="onthefly.html" />
+ <link rel="prev" title="Chapter 11: Manage concurrency for a specific task with @jobs_limit" href="jobs_limit.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 13: Generating parameters on the fly with @files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="Chapter 11: Manage concurrency for a specific task with @jobs_limit"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-dependencies-chapter-num-checking-dependencies-to-run-tasks-in-order">
+<span id="manual-dependencies"></span><h1><strong>Chapter 12</strong>: <cite>Checking dependencies to run tasks in order</cite><a class="headerlink" href="#manual-dependencies-chapter-num-checking-dependencies-to-run-tasks-in-order" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+</ul>
+</td></tr></table>
+<p id="index-0">How does <strong>Ruffus</strong> decide how to run your pipeline?</p>
+<blockquote>
+<div><ul class="simple">
+<li>In which order should pipelined functions be called?</li>
+<li>Which parts of the pipeline are up-to-date and do not need to be rerun?</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="running-all-out-of-date-tasks-and-dependents">
+<h2>Running all out-of-date tasks and dependents<a class="headerlink" href="#running-all-out-of-date-tasks-and-dependents" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><img alt="../../_images/manual_dependencies_flowchart_intro.png" src="../../_images/manual_dependencies_flowchart_intro.png" />
+<p>By default, <em>ruffus</em> will</p>
+<blockquote>
+<div><ul class="simple">
+<li>build a flow chart (dependency tree) of pipelined tasks (functions)</li>
+<li>start from the most ancestral tasks with the fewest dependencies (<tt class="docutils literal"><span class="pre">task1</span></tt> and <tt class="docutils literal"><span class="pre">task4</span></tt> in the flowchart above).</li>
+<li>walk up the tree to find the first incomplete / out-of-date tasks (i.e. <tt class="docutils literal"><span class="pre">task3</span></tt> and <tt class="docutils literal"><span class="pre">task5</span></tt>.</li>
+<li>start running from there</li>
+</ul>
+</div></blockquote>
+<dl class="docutils">
+<dt>All down-stream (dependent) tasks will be re-run anyway, so we don’t have to test</dt>
+<dd>whether they are up-to-date or not.</dd>
+</dl>
+<div class="admonition note" id="manual-dependencies-checking-multiple-times">
+<p class="first admonition-title">Note</p>
+<p>This means that <strong>ruffus</strong> <em>may</em> ask any task if their jobs are out of date more than once:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>once when deciding which parts of the pipeline have to be run</li>
+<li>once just before executing the task.</li>
+</ul>
+</div></blockquote>
+</div>
+<p><em>Ruffus</em> tries to be clever / efficient, and does the minimal amount of querying.</p>
+</div></blockquote>
+</div>
+<div class="section" id="a-simple-example">
+<span id="manual-dependencies-example"></span><h2>A simple example<a class="headerlink" href="#a-simple-example" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="four-successive-tasks-to-run">
+<h3>Four successive tasks to run:<a class="headerlink" href="#four-successive-tasks-to-run" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The full code is available <a class="reference internal" href="dependencies_code.html#manual-dependencies-code"><em>here</em></a>.</p>
+</div>
+<p>Suppose we have four successive tasks to run, whose flowchart we can print out
+by running:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span><span class="s">"flowchart.png"</span><span class="p">,</span> <span class="s">"png"</span><span class="p">,</span> <span class="p">[</span><span class="n">task4</span><span class="p">],</span>
+ <span class="n">draw_vertically</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<img alt="../../_images/manual_dependencies_flowchart1.png" src="../../_images/manual_dependencies_flowchart1.png" />
+<p>We can see that all four tasks need to run reach the target task4.</p>
+</div></blockquote>
+</div>
+<div class="section" id="pipeline-tasks-are-up-to-date">
+<h3>Pipeline tasks are up-to-date:<a class="headerlink" href="#pipeline-tasks-are-up-to-date" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>After the pipeline runs (<tt class="docutils literal"><span class="pre">python</span> <span class="pre">simpler.py</span> <span class="pre">-d</span> <span class="pre">""</span></tt>), all tasks are up to date and the flowchart shows:</p>
+<img alt="../../_images/manual_dependencies_flowchart2.png" src="../../_images/manual_dependencies_flowchart2.png" />
+</div></blockquote>
+</div>
+<div class="section" id="some-tasks-out-of-date">
+<h3>Some tasks out of date:<a class="headerlink" href="#some-tasks-out-of-date" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>If we then made task2 and task4 out of date by modifying their <em>inputs</em> files:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">open</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"a.3"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>the flowchart would show:</p>
+<ol class="arabic simple">
+<li>the pipeline only has to rerun from <tt class="docutils literal"><span class="pre">task2</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">task1</span></tt> is complete / up-to-date</li>
+<li><tt class="docutils literal"><span class="pre">task3</span></tt> will have to be re-run because it follows (depends on) <tt class="docutils literal"><span class="pre">task2</span></tt>.</li>
+</ol>
+<img alt="../../_images/manual_dependencies_flowchart3.png" src="../../_images/manual_dependencies_flowchart3.png" />
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="forced-reruns">
+<h2>Forced Reruns<a class="headerlink" href="#forced-reruns" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Even if a pipeline stage appears to be up to date,
+you can always force the pipeline to include from one or more task functions.</p>
+<p>This is particularly useful, for example, if the pipeline data hasn’t changed but
+the analysis or computional code has.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">task4</span><span class="p">],</span> <span class="p">[</span><span class="n">task1</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>will run all tasks from <tt class="docutils literal"><span class="pre">task1</span></tt> to <tt class="docutils literal"><span class="pre">task4</span></tt></p>
+</div></blockquote>
+<p>Both the “target” and the “forced” lists can include as many tasks as you wish. All dependencies
+are still carried out and out-of-date jobs rerun.</p>
+</div></blockquote>
+</div>
+<div class="section" id="esoteric-option-minimal-reruns">
+<h2>Esoteric option: Minimal Reruns<a class="headerlink" href="#esoteric-option-minimal-reruns" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>In the above example, you could point out that <tt class="docutils literal"><span class="pre">task3</span></tt> is not out of date. And if we were only interested
+in the immediate dependencies or prerequisites leading up to <tt class="docutils literal"><span class="pre">task4</span></tt>, we might not
+need task2 to rerun at all, only <tt class="docutils literal"><span class="pre">task4</span></tt>.</p>
+<p>This rather dangerous option is useful if you don’t want to keep all the intermediate
+files/results from upstream tasks. The pipeline will only not involve any incomplete
+tasks which precede an up-to-date result.</p>
+<p>This is seldom what you intend, and you should always check that the appropriate stages
+of the pipeline are executed in the flowchart output.</p>
+<p>In such cases, we can rerun the pipeline with the following option:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">task4</span><span class="p">],</span> <span class="n">gnu_make_maximal_rebuild_mode</span> <span class="o">=</span> <span class="bp">False</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>and only <tt class="docutils literal"><span class="pre">task4</span></tt> will rerun.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 12</strong>: <cite>Checking dependencies to run tasks in order</cite></a><ul>
+<li><a class="reference internal" href="#running-all-out-of-date-tasks-and-dependents">Running all out-of-date tasks and dependents</a></li>
+<li><a class="reference internal" href="#a-simple-example">A simple example</a><ul>
+<li><a class="reference internal" href="#four-successive-tasks-to-run">Four successive tasks to run:</a></li>
+<li><a class="reference internal" href="#pipeline-tasks-are-up-to-date">Pipeline tasks are up-to-date:</a></li>
+<li><a class="reference internal" href="#some-tasks-out-of-date">Some tasks out of date:</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#forced-reruns">Forced Reruns</a></li>
+<li><a class="reference internal" href="#esoteric-option-minimal-reruns">Esoteric option: Minimal Reruns</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="jobs_limit.html"
+ title="previous chapter"><strong>Chapter 11</strong>: <cite>Manage concurrency for a specific task with</cite> <strong>@jobs_limit</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="onthefly.html"
+ title="next chapter"><strong>Chapter 13</strong>: <cite>Generating parameters on the fly with</cite> <strong>@files</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/dependencies.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 13: Generating parameters on the fly with @files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="Chapter 11: Manage concurrency for a specific task with @jobs_limit"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/dependencies_code.html b/doc/_build/html/tutorials/manual/dependencies_code.html
new file mode 100644
index 0000000..e6224f9
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/dependencies_code.html
@@ -0,0 +1,289 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Chapter 9: Checking dependencies to run tasks in order — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Chapter 16: Logging progress through a pipeline" href="logging_code.html" />
+ <link rel="prev" title="Step 8: Signal the completion of each stage of our pipeline" href="../simple_tutorial/step8_posttask.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="logging_code.html" title="Code for Chapter 16: Logging progress through a pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../simple_tutorial/step8_posttask.html" title="Step 8: Signal the completion of each stage of our pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-chapter-9-checking-dependencies-to-run-tasks-in-order">
+<span id="manual-dependencies-code"></span><h1>Code for Chapter 9: Checking dependencies to run tasks in order<a class="headerlink" href="#code-for-chapter-9-checking-dependencies-to-run-tasks-in-order" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+<li><a class="reference internal" href="dependencies.html#manual-dependencies-example"><em>Back</em></a></li>
+</ul>
+<p>This example shows how dependencies work</p>
+</div></blockquote>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">json</span>
+
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="k">def</span> <span class="nf">task_helper</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> cat input file content to output file</span>
+<span class="sd"> after writing out job parameters</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">infile</span><span class="p">:</span>
+ <span class="n">output_text</span> <span class="o">=</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">()))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">output_text</span> <span class="o">=</span> <span class="s">"None"</span>
+ <span class="n">output_text</span> <span class="o">+=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span> <span class="o">+</span> <span class="s">" -> "</span> <span class="o">+</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">outfile</span><span class="p">)</span> <span class="o">+</ [...]
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">output_text</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task1</span>
+<span class="c">#</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">'a.1'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> First task</span>
+<span class="sd"> """</span>
+ <span class="n">task_helper</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task2</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r'.1'</span><span class="p">),</span> <span class="s">'.2'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Second task</span>
+<span class="sd"> """</span>
+ <span class="n">task_helper</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task3</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task2</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r'.2'</span><span class="p">),</span> <span class="s">'.3'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task3</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Third task</span>
+<span class="sd"> """</span>
+ <span class="n">task_helper</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task4</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task3</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r'.3'</span><span class="p">),</span> <span class="s">'.4'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task4</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Fourth task</span>
+<span class="sd"> """</span>
+ <span class="n">task_helper</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">)</span>
+
+<span class="n">pipeline_printout_graph</span> <span class="p">(</span><span class="s">"flowchart.png"</span><span class="p">,</span> <span class="s">"png"</span><span class="p">,</span> <span class="p">[</span><span class="n">task4</span><span class="p">],</span> <span class="n">draw_vertically</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span> <span class="n">no_key_legend</span> <span class="o">=</span> <span class="bp">True</span> [...]
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">task4</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">task4</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> <span class="n">logger</span> <span class="o">=</span> <span class="n">logger_proxy</span><span class="p">)</span>
+<span class="go"> job = [null, "a.1"]</span>
+<span class="go"> job = ["a.1", "a.2"]</span>
+<span class="go"> job = ["a.2", "a.3"]</span>
+<span class="go"> job = ["a.3", "a.4"]</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Chapter 9: Checking dependencies to run tasks in order</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../simple_tutorial/step8_posttask.html"
+ title="previous chapter">Step 8: Signal the completion of each stage of our pipeline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="logging_code.html"
+ title="next chapter">Code for Chapter 16: Logging progress through a pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/dependencies_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="logging_code.html" title="Code for Chapter 16: Logging progress through a pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../simple_tutorial/step8_posttask.html" title="Step 8: Signal the completion of each stage of our pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/exceptions.html b/doc/_build/html/tutorials/manual/exceptions.html
new file mode 100644
index 0000000..02afe3f
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/exceptions.html
@@ -0,0 +1,255 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 18: Exceptions thrown inside a pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 19: Logging progress through a pipeline" href="logging.html" />
+ <link rel="prev" title="Chapter 17: Writing custom functions to decide which jobs are up to date" href="check_if_uptodate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 19: Logging progress through a pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 17: Writing custom functions to decide which jobs are up to date"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-exceptions-chapter-num-exceptions-thrown-inside-a-pipeline">
+<span id="manual-exceptions"></span><h1><strong>Chapter 18</strong>: <cite>Exceptions thrown inside a pipeline</cite><a class="headerlink" href="#manual-exceptions-chapter-num-exceptions-thrown-inside-a-pipeline" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+</ul>
+</td></tr></table>
+<p id="index-0">The goal for <strong>Ruffus</strong> is that exceptions should just work <em>out-of-the-box</em> without any fuss.
+This is especially important for exceptions that come from your code which may be raised
+in a different process. Often multiple parallel operations (jobs or tasks) fail at the
+same time. <strong>Ruffus</strong> will forward each of these exceptions with the tracebacks so you
+can jump straight to the offending line.</p>
+<p>This example shows separate exceptions from two jobs running in parallel:</p>
+<blockquote>
+<div><img alt="../../_images/manual_exceptions.png" src="../../_images/manual_exceptions.png" />
+</div></blockquote>
+</div></blockquote>
+<span class="target" id="manual-exceptions-multiple-errors"></span><div class="section" id="multiple-errors">
+<span id="index-1"></span><h2>Multiple Errors<a class="headerlink" href="#multiple-errors" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>For any task where exceptions are thrown, <em>Ruffus</em> will continue executing all the jobs
+currently in progress (up to the maximum number of concurrent jobs
+(<tt class="docutils literal"><span class="pre">multiprocess</span></tt>) set in <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>).
+Each of these may raise separate exceptions.
+This seems a fair tradeoff between being able to gather detailed error information for
+running jobs, and not wasting too much time for a task that is going to fail anyway.</div></blockquote>
+</div>
+<div class="section" id="interrupting-the-pipeline">
+<span id="manual-exceptions-interrupting"></span><span id="index-2"></span><h2>Interrupting the pipeline<a class="headerlink" href="#interrupting-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If your task function raises a <tt class="docutils literal"><span class="pre">Ruffus.JobSignalledBreak</span></tt> Exception, this will immediately
+halt the pipeline at that point, without waiting for other jobs in the queue to complete:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@parallel</span><span class="p">([[</span><span class="s">'A'</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="s">'B'</span><span class="p">,</span><span class="mi">3</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">parallel_task</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">param1</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">name</span> <span class="o">==</span> <span class="s">'A'</span><span class="p">:</span> <span class="k">return</span> <span class="bp">False</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>produces the following (abbreviated):</p>
+<blockquote>
+<div><div class="highlight-python"><pre>task.RethrownJobError:
+
+ Exceptions running jobs for
+ 'def parallel_task(...):'
+
+ Original exception:
+
+ Exception #1
+ task.JobSignalledBreak: Job = ["A", 1] returned False
+ for Job = ["A", 1]</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 18</strong>: <cite>Exceptions thrown inside a pipeline</cite></a><ul>
+<li><a class="reference internal" href="#multiple-errors">Multiple Errors</a></li>
+<li><a class="reference internal" href="#interrupting-the-pipeline">Interrupting the pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="check_if_uptodate.html"
+ title="previous chapter"><strong>Chapter 17</strong>: <cite>Writing custom functions to decide which jobs are up to date</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="logging.html"
+ title="next chapter"><strong>Chapter 19</strong>: <cite>Logging progress through a pipeline</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/exceptions.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 19: Logging progress through a pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 17: Writing custom functions to decide which jobs are up to date"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/files.html b/doc/_build/html/tutorials/manual/files.html
new file mode 100644
index 0000000..a0c8136
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/files.html
@@ -0,0 +1,397 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 3: Passing parameters to the pipeline with @files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 4: Chaining pipeline Tasks together automatically" href="tasks_and_globs_in_inputs.html" />
+ <link rel="prev" title="Chapter 2: Tasks and Recipes" href="tasks_as_recipes.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="tasks_and_globs_in_inputs.html" title="Chapter 4: Chaining pipeline Tasks together automatically"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="tasks_as_recipes.html" title="Chapter 2: Tasks and Recipes"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-files-chapter-num-passing-parameters-to-the-pipeline-with-files">
+<span id="manual-files"></span><h1><strong>Chapter 3</strong>: <cite>Passing parameters to the pipeline with</cite> <strong>@files</strong><a class="headerlink" href="#manual-files-chapter-num-passing-parameters-to-the-pipeline-with-files" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/files.html#decorators-files"><em>@files syntax in detail</em></a></li>
+</ul>
+</td></tr></table>
+<div class="line-block">
+<div class="line">The python functions which do the actual work of each stage or
+<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> of a <strong>Ruffus</strong> pipeline are written by you.</div>
+<div class="line">The role of <strong>Ruffus</strong> is to make sure these functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if desired.</div>
+</div>
+<p>The easiest way to specify parameters to <em>Ruffus</em> <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions is to use
+the <a class="reference internal" href="../../decorators/files.html#decorators-files"><em>@files</em></a> decorator.</p>
+</div></blockquote>
+<div class="section" id="files">
+<span id="index-0"></span><h2><strong>@files</strong><a class="headerlink" href="#files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Running this code:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="p">[</span><span class="s">'a.2'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">],</span> <span class="s">'A file'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">single_job_io_task</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="n">outfiles</span><span class="p">:</span> <span class="nb">open</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># prepare input file</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<dl class="docutils">
+<dt>Is equivalent to calling:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">single_job_io_task</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="p">[</span><span class="s">'a.2'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">],</span> <span class="s">'A file'</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+<dt>And produces:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.1 -> [a.2, b.2], A file] completed</span>
+<span class="go">Completed Task = single_job_io_task</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<p><strong>Ruffus</strong> will automatically check if your task is up to date. The second time <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a>
+is called, nothing will happen. But if you update <tt class="docutils literal"><span class="pre">a.1</span></tt>, the task will rerun:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="nb">open</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.1 -> [a.2, b.2], A file] completed</span>
+<span class="go">Completed Task = single_job_io_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>See <a class="reference internal" href="tasks_as_recipes.html#manual-skip-up-to-date"><em>chapter 2</em></a> for a more in-depth discussion of how <strong>Ruffus</strong>
+decides which parts of the pipeline are complete and up-to-date.</p>
+</div></blockquote>
+</div>
+<div class="section" id="running-the-same-code-on-different-parameters-in-parallel">
+<span id="manual-files-parallel"></span><span id="index-1"></span><h2>Running the same code on different parameters in parallel<a class="headerlink" href="#running-the-same-code-on-different-parameters-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Your pipeline may require the same function to be called multiple times on independent parameters.
+In which case, you can supply all the parameters to @files, each will be sent to separate jobs that
+may run in parallel if necessary. <strong>Ruffus</strong> will check if each separate <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a> is up-to-date using
+the <em>inputs</em> and <em>outputs</em> (first two) parameters (See the <a class="reference internal" href="tasks_as_recipes.html#manual-io-parameters"><em>chapter 2</em></a> ).</p>
+<p>For example, if a sequence
+(e.g. a list or tuple) of 5 parameters are passed to <strong>@files</strong>, that indicates
+there will also be 5 separate jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'job1.file'</span> <span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'job2.file'</span><span class="p">,</span> <span class="mi">4</span> <span class="p">],</span> <span class="c"># 2st job</span>
+ <span class="p">[</span> <span class="s">'job3.file'</span><span class="p">,</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="p">],</span> <span class="c"># 3st job</span>
+ <span class="p">[</span> <span class="mi">67</span><span class="p">,</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="s">'job4.file'</span><span class="p">]</span> <span class="p">],</span> <span class="c"># 4st job</span>
+ <span class="p">[</span> <span class="s">'job5.file'</span> <span class="p">],</span> <span class="c"># 5st job</span>
+ <span class="p">]</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task_file</span><span class="p">(</span><span class="o">*</span><span class="n">params</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="line-block">
+<div class="line"><strong>Ruffus</strong> creates as many jobs as there are elements in <tt class="docutils literal"><span class="pre">parameters</span></tt>.</div>
+<div class="line">In turn, each of these elements consist of series of parameters which will be
+passed to each separate job.</div>
+</div>
+<p>Thus the above code is equivalent to calling:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">task_file</span><span class="p">(</span><span class="s">'job1.file'</span><span class="p">)</span>
+<span class="n">task_file</span><span class="p">(</span><span class="s">'job2.file'</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
+<span class="n">task_file</span><span class="p">(</span><span class="s">'job3.file'</span><span class="p">,</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
+<span class="n">task_file</span><span class="p">(</span><span class="mi">67</span><span class="p">,</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="s">'job4.file'</span><span class="p">])</span>
+<span class="n">task_file</span><span class="p">(</span><span class="s">'job5.file'</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>What <tt class="docutils literal"><span class="pre">task_file()</span></tt> does with these parameters is up to you!</p>
+<p>The only constraint on the parameters is that <strong>Ruffus</strong> will treat any first
+parameter of each job as the <em>inputs</em> and any second as the <em>output</em>. Any
+strings in the <em>inputs</em> or <em>output</em> parameters (including those nested in sequences)
+will be treated as file names.</p>
+<p>Thus, to pick the parameters out of one of the above jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">task_file</span><span class="p">(</span><span class="mi">67</span><span class="p">,</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="s">'job4.file'</span><span class="p">])</span>
+</pre></div>
+</div>
+<div class="line-block">
+<div class="line"><em>inputs</em> == <tt class="docutils literal"><span class="pre">67</span></tt></div>
+<div class="line"><em>outputs</em> == <tt class="docutils literal"><span class="pre">[13,</span> <span class="pre">'job4.file']</span></tt></div>
+<div class="line"><br /></div>
+<div class="line-block">
+<div class="line">The solitary output filename is <tt class="docutils literal"><span class="pre">job4.file</span></tt></div>
+</div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="checking-if-jobs-are-up-to-date">
+<span id="manual-files-example"></span><span id="manual-files-is-uptodate"></span><span id="index-2"></span><h3>Checking if jobs are up to date<a class="headerlink" href="#checking-if-jobs-are-up-to-date" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="line-block">
+<div class="line">Usually we do not want to run all the stages in a pipeline but only where
+the input data has changed or is no longer up to date.</div>
+<div class="line">One easy way to do this is to check the modification times for files produced
+at each stage of the pipeline.</div>
+</div>
+<div class="line-block">
+<div class="line">Let us first create our starting files <tt class="docutils literal"><span class="pre">a.1</span></tt> and <tt class="docutils literal"><span class="pre">b.1</span></tt></div>
+<div class="line">We can then run the following pipeline function to create</div>
+</div>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">a.2</span></tt> from <tt class="docutils literal"><span class="pre">a.1</span></tt> and</li>
+<li><tt class="docutils literal"><span class="pre">b.2</span></tt> from <tt class="docutils literal"><span class="pre">b.1</span></tt></li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># create starting files</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"b.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'a.1'</span><span class="p">,</span> <span class="s">'a.2'</span><span class="p">,</span> <span class="s">'A file'</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'b.1'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">,</span> <span class="s">'B file'</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_io_task</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
+ <span class="c"># copy infile contents to outfile</span>
+ <span class="n">infile_text</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">infile_text</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span> <span class="o">+</span> <span class="n">text</spa [...]
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>This produces the following output:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.1 -> a.2, A file] completed</span>
+<span class="go"> Job = [b.1 -> b.2, B file] completed</span>
+<span class="go">Completed Task = parallel_io_task</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<div class="line-block">
+<div class="line">If you called <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> again, nothing would happen because the files are up to date:</div>
+<div class="line"><tt class="docutils literal"><span class="pre">a.2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">a.1</span></tt> and</div>
+<div class="line"><tt class="docutils literal"><span class="pre">b.2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">b.1</span></tt></div>
+</div>
+<dl class="docutils">
+<dt>However, if you subsequently modified <tt class="docutils literal"><span class="pre">a.1</span></tt> again:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">open</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>you would see the following:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_io_task</span><span class="p">])</span>
+<span class="go">Task = parallel_io_task</span>
+<span class="go"> Job = ["a.1" -> "a.2", "A file"] completed</span>
+<span class="go"> Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date</span>
+<span class="go">Completed Task = parallel_io_task</span>
+</pre></div>
+</div>
+<p>The 2nd job is up to date and will be skipped.</p>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 3</strong>: <cite>Passing parameters to the pipeline with</cite> <strong>@files</strong></a><ul>
+<li><a class="reference internal" href="#files"><strong>@files</strong></a></li>
+<li><a class="reference internal" href="#running-the-same-code-on-different-parameters-in-parallel">Running the same code on different parameters in parallel</a><ul>
+<li><a class="reference internal" href="#checking-if-jobs-are-up-to-date">Checking if jobs are up to date</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="tasks_as_recipes.html"
+ title="previous chapter"><strong>Chapter 2</strong>: <cite>Tasks and Recipes</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="tasks_and_globs_in_inputs.html"
+ title="next chapter"><strong>Chapter 4</strong>: Chaining pipeline <cite>Tasks</cite> together automatically</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/files.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="tasks_and_globs_in_inputs.html" title="Chapter 4: Chaining pipeline Tasks together automatically"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="tasks_as_recipes.html" title="Chapter 2: Tasks and Recipes"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/files_re.html b/doc/_build/html/tutorials/manual/files_re.html
new file mode 100644
index 0000000..3e776fe
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/files_re.html
@@ -0,0 +1,311 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 20: @files_re: Deprecated syntax using regular expressions — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Recipes" href="../../recipes.html" />
+ <link rel="prev" title="Chapter 19: Logging progress through a pipeline" href="logging.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../../recipes.html" title="Recipes"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 19: Logging progress through a pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-files-re-chapter-num-files-re-deprecated-syntax-using-regular-expressions">
+<span id="manual-files-re"></span><h1><strong>Chapter 20</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite><a class="headerlink" href="#manual-files-re-chapter-num-files-re-deprecated-syntax-using-regular-expressions" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/files_re.html#decorators-files-re"><em>@files_re</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+</div></blockquote>
+<div class="section" id="files-re">
+<span id="index-0"></span><h2><strong>@files_re</strong><a class="headerlink" href="#files-re" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This is older, now deprecated syntax.</p>
+<p><strong>@files_re</strong> combines the functionality of @transform, @collate and @merge in
+one overloaded decorator.</p>
+<p>This is the reason why its use is discouraged. <strong>@files_re</strong> syntax is far too overloaded
+and context-dependent to support its myriad of different functions.</p>
+<p>The following documentation is provided to help maintain historical <strong>ruffus</strong> usage.</p>
+</div></blockquote>
+<div class="section" id="transforming-input-and-output-filenames">
+<h3>Transforming input and output filenames<a class="headerlink" href="#transforming-input-and-output-filenames" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>For example, the following code from <strong>Chapter 8</strong> takes files from
+the previous pipeline task, and makes new output parameters with the <tt class="docutils literal"><span class="pre">.sums</span></tt> suffix
+in place of the <tt class="docutils literal"><span class="pre">.chunks</span></tt> suffix:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">step_4_split_numbers_into_chunks</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_5_calculate_sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># calculate sums and sums of squares for all values in the input_file_name</span>
+ <span class="c"># writing to output_file_name</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This can be written using @files_re equivalently:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>@files_re(step_4_split_numbers_into_chunks, r".chunks", r".sums")
+def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+""</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+<span class="target" id="manual-files-re-combine"></span></div>
+<div class="section" id="collating-many-inputs-into-a-single-output">
+<span id="index-1"></span><h3>Collating many <em>inputs</em> into a single <em>output</em><a class="headerlink" href="#collating-many-inputs-into-a-single-output" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Similarly, the following code from <a class="reference internal" href="collate.html#manual-collate"><em>|manual.collate.chapter_num|</em></a> collects <strong>inputs</strong>
+from the same species in the same directory:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@collate</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span> <span class="c"># inputs = all *.animal files</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r'mammals.([^.]+)'</span><span class="p">),</span> <span class="c"># regular expression</span>
+ <span class="s">r'\1/animals.in_my_zoo'</span><span class="p">,</span> <span class="c"># single output file per species</span>
+ <span class="s">r'\1'</span> <span class="p">)</span> <span class="c"># species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="c"># summarise all animals of this species</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This can be written using @files_re equivalently using the <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-combine"><em>combine</em></a> indicator:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@files_re</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span> <span class="c"># inputs = all *.animal files</span>
+ <span class="s">r'mammals.([^.]+)'</span><span class="p">,</span> <span class="c"># regular expression</span>
+ <span class="n">combine</span><span class="p">(</span><span class="s">r'\1/animals.in_my_zoo'</span><span class="p">),</span> <span class="c"># single output file per species</span>
+ <span class="s">r'\1'</span> <span class="p">)</span> <span class="c"># species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="c"># summarise all animals of this species</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="generating-input-and-output-parameter-using-regular-expresssions">
+<h3>Generating <em>input</em> and <em>output</em> parameter using regular expresssions<a class="headerlink" href="#generating-input-and-output-parameter-using-regular-expresssions" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The following code generates additional <em>Input</em> prerequisite file names which match the original <em>Input</em> files names.</p>
+<p>We want each job of our <tt class="docutils literal"><span class="pre">analyse()</span></tt> function to get corresponding pairs
+of <tt class="docutils literal"><span class="pre">xx.chunks</span></tt> and <tt class="docutils literal"><span class="pre">xx.red_indian</span></tt> files when</p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">*.chunks</span></tt> are generated by the task function <tt class="docutils literal"><span class="pre">split_up_problem()</span></tt> and
+<tt class="docutils literal"><span class="pre">*.red_indian</span></tt> are generated by the task function <tt class="docutils literal"><span class="pre">make_red_indians()</span></tt>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">make_red_indians</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">split_up_problem</span><span class="p">,</span> <span class="c"># starting set of *inputs*</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*).chunks"</span><span class="p">),</span> <span class="c"># regular expression</span>
+ <span class="n">inputs</span><span class="p">([</span><span class="s">r"\g<0>"</span><span class="p">,</span> <span class="c"># xx.chunks</span>
+ <span class="s">r"\1.red_indian"</span><span class="p">]),</span> <span class="c"># important.file</span>
+ <span class="s">r"\1.results"</span> <span class="c"># xx.results</span>
+ <span class="p">)</span>
+<span class="k">def</span> <span class="nf">analyse</span><span class="p">(</span><span class="n">input_filenames</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">"Do analysis here"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The equivalent code using @files_re looks very similar:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>@follows(make_red_indians)
+ at files_re( split_up_problem, # starting set of *inputs*
+ r"(.*).chunks", # regular expression
+ [r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results") # xx.results
+def analyse(input_filenames, output_file_name):
+ "Do analysis here"</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 20</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a><ul>
+<li><a class="reference internal" href="#files-re"><strong>@files_re</strong></a><ul>
+<li><a class="reference internal" href="#transforming-input-and-output-filenames">Transforming input and output filenames</a></li>
+<li><a class="reference internal" href="#collating-many-inputs-into-a-single-output">Collating many <em>inputs</em> into a single <em>output</em></a></li>
+<li><a class="reference internal" href="#generating-input-and-output-parameter-using-regular-expresssions">Generating <em>input</em> and <em>output</em> parameter using regular expresssions</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="logging.html"
+ title="previous chapter"><strong>Chapter 19</strong>: <cite>Logging progress through a pipeline</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../../recipes.html"
+ title="next chapter">Recipes</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/files_re.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../../recipes.html" title="Recipes"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 19: Logging progress through a pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/follows.html b/doc/_build/html/tutorials/manual/follows.html
new file mode 100644
index 0000000..d74b725
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/follows.html
@@ -0,0 +1,363 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 1 : Arranging tasks into a pipeline with @follows — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 2: Tasks and Recipes" href="tasks_as_recipes.html" />
+ <link rel="prev" title="Ruffus Manual" href="manual_introduction.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="tasks_as_recipes.html" title="Chapter 2: Tasks and Recipes"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="manual_introduction.html" title="Ruffus Manual"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-follows-chapter-num-arranging-tasks-into-a-pipeline-with-follows">
+<span id="manual-follows"></span><h1><strong>Chapter 1</strong> : <cite>Arranging tasks into a pipeline with</cite> <strong>@follows</strong><a class="headerlink" href="#manual-follows-chapter-num-arranging-tasks-into-a-pipeline-with-follows" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows syntax in detail</em></a></li>
+</ul>
+</div></blockquote>
+<div class="section" id="follows">
+<span id="index-0"></span><h2><strong>@follows</strong><a class="headerlink" href="#follows" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The order in which stages or <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>s of a pipeline are arranged are set
+explicitly by the <a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows(...)</em></a> python decorator:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"First task"</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Second task"</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">second_task</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Final task"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>the <tt class="docutils literal"><span class="pre">@follows</span></tt> decorator indicate that the <tt class="docutils literal"><span class="pre">first_task</span></tt> function precedes <tt class="docutils literal"><span class="pre">second_task</span></tt> in
+the pipeline.</p>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>We shall see in <a class="reference internal" href="tasks_and_globs_in_inputs.html#manual-tasks-as-input"><em>Chapter 2</em></a> that the order of pipeline <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>s can also be inferred implicitly
+for the following decorators</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><a class="reference internal" href="split.html#manual-split"><em>@split(...)</em></a></li>
+<li><a class="reference internal" href="transform.html#manual-transform"><em>@transform(...)</em></a></li>
+<li><a class="reference internal" href="merge.html#manual-merge"><em>@merge(...)</em></a></li>
+<li><a class="reference internal" href="collate.html#manual-collate"><em>@collate(...)</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="running">
+<span id="index-1"></span><h3>Running<a class="headerlink" href="#running" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>Now we can run the pipeline by:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">final_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>Because <tt class="docutils literal"><span class="pre">final_task</span></tt> depends on <tt class="docutils literal"><span class="pre">second_task</span></tt> which depends on <tt class="docutils literal"><span class="pre">first_task</span></tt> , all
+three functions will be executed in order.</p>
+</div></blockquote>
+</div>
+<div class="section" id="displaying">
+<span id="index-2"></span><h3>Displaying<a class="headerlink" href="#displaying" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>We can see a flowchart of our fledgling pipeline by executing:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="s">"manual_follows1.png"</span><span class="p">,</span>
+ <span class="s">"png"</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">final_task</span><span class="p">],</span>
+ <span class="n">no_key_legend</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>producing the following flowchart</p>
+<blockquote>
+<div><img alt="../../_images/manual_follows1.png" src="../../_images/manual_follows1.png" />
+</div></blockquote>
+<dl class="docutils">
+<dt>or in text format with:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">final_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</dd>
+<dt>which produces the following:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">Task</span> <span class="o">=</span> <span class="n">first_task</span>
+<span class="n">Task</span> <span class="o">=</span> <span class="n">second_task</span>
+<span class="n">Task</span> <span class="o">=</span> <span class="n">final_task</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="defining-pipeline-tasks-out-of-order">
+<span id="manual-follows-out-of-order"></span><span id="index-3"></span><h2>Defining pipeline tasks out of order<a class="headerlink" href="#defining-pipeline-tasks-out-of-order" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>All this assumes that all your pipelined tasks are defined in order.
+(<tt class="docutils literal"><span class="pre">first_task</span></tt> before <tt class="docutils literal"><span class="pre">second_task</span></tt> before <tt class="docutils literal"><span class="pre">final_task</span></tt>)</p>
+<div class="line-block">
+<div class="line">This is usually the most sensible way to arrange your code.</div>
+</div>
+<p>If you wish to refer to tasks which are not yet defined, you can do so by quoting the function name as a string:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="s">"second_task"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Final task"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>You can refer to tasks (functions) in other modules, in which case the full
+qualified name must be used:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="s">"other_module.second_task"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Final task"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="multiple-dependencies">
+<span id="manual-follows-multiple-dependencies"></span><span id="index-4"></span><h2>Multiple dependencies<a class="headerlink" href="#multiple-dependencies" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Each task can depend on more than one antecedent task.</p>
+<dl class="docutils">
+<dt>This can be indicated either by stacking <tt class="docutils literal"><span class="pre">@follows</span></tt>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">)</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="s">"second_task"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">():</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</dd>
+<dt>or in a more concise way:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="s">"second_task"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">final_task</span><span class="p">():</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<span class="target" id="manual-follows-mkdir"></span></div>
+<div class="section" id="making-directories-automatically-with-mkdir">
+<span id="index-5"></span><h2>Making directories automatically with <a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>mkdir</em></a><a class="headerlink" href="#making-directories-automatically-with-mkdir" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A common prerequisite for any computational task, is making sure that the destination
+directories exist.</p>
+<p><strong>Ruffus</strong> provides special syntax to support this, using the special
+<a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>mkdir</em></a> dependency. For example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">mkdir</span><span class="p">(</span><span class="s">"output/results/here"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Second task"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>will make sure that <tt class="docutils literal"><span class="pre">output/results/here</span></tt> exists before <cite>second_task</cite> is run.</p>
+<p>In other words, it will make the <tt class="docutils literal"><span class="pre">output/results/here</span></tt> directory if it does not exist.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 1</strong> : <cite>Arranging tasks into a pipeline with</cite> <strong>@follows</strong></a><ul>
+<li><a class="reference internal" href="#follows"><strong>@follows</strong></a><ul>
+<li><a class="reference internal" href="#running">Running</a></li>
+<li><a class="reference internal" href="#displaying">Displaying</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#defining-pipeline-tasks-out-of-order">Defining pipeline tasks out of order</a></li>
+<li><a class="reference internal" href="#multiple-dependencies">Multiple dependencies</a></li>
+<li><a class="reference internal" href="#making-directories-automatically-with-mkdir">Making directories automatically with <tt class="docutils literal"><span class="pre">mkdir</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="manual_introduction.html"
+ title="previous chapter"><strong>Ruffus</strong> Manual</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="tasks_as_recipes.html"
+ title="next chapter"><strong>Chapter 2</strong>: <cite>Tasks and Recipes</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/follows.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="tasks_as_recipes.html" title="Chapter 2: Tasks and Recipes"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="manual_introduction.html" title="Ruffus Manual"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/jobs_limit.html b/doc/_build/html/tutorials/manual/jobs_limit.html
new file mode 100644
index 0000000..c0721e1
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/jobs_limit.html
@@ -0,0 +1,262 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 11: Manage concurrency for a specific task with @jobs_limit — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 12: Checking dependencies to run tasks in order" href="dependencies.html" />
+ <link rel="prev" title="Chapter 10: Signal the completion of each stage of our pipeline with @posttask" href="posttask.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="dependencies.html" title="Chapter 12: Checking dependencies to run tasks in order"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 10: Signal the completion of each stage of our pipeline with @posttask"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-jobs-limit-chapter-num-manage-concurrency-for-a-specific-task-with-jobs-limit">
+<span id="manual-jobs-limit"></span><h1><strong>Chapter 11</strong>: <cite>Manage concurrency for a specific task with</cite> <strong>@jobs_limit</strong><a class="headerlink" href="#manual-jobs-limit-chapter-num-manage-concurrency-for-a-specific-task-with-jobs-limit" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+</div></blockquote>
+<div class="section" id="jobs-limit">
+<span id="index-0"></span><h2><strong>@jobs_limit</strong><a class="headerlink" href="#jobs-limit" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Calling <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(multiprocess = NNN)</em></a> allows
+multiple jobs (from multiple independent tasks) to be run in parallel. However, there
+are some operations which consume so many resources that we might want them to run
+with less or no concurrency.</p>
+<p>For example, we might want to download some files via FTP but the server restricts
+requests from each IP address. Even if the rest of the pipeline is running 100 jobs in
+parallel, the FTP downloading must be restricted to 2 files at a time. We would really
+like to keep the pipeline running as is, but let this one operation run either serially,
+or with little concurrency.</p>
+<p>If setting <tt class="docutils literal"><span class="pre">multiprocess</span> <span class="pre">=</span> <span class="pre">NNN</span></tt> sets the pipeline-wide concurrency to <tt class="docutils literal"><span class="pre">NNN</span></tt>, then
+<tt class="docutils literal"><span class="pre">@jobs_limit(MMM)</span></tt> sets concurrency at a much finer level, at <tt class="docutils literal"><span class="pre">MMM</span></tt> just for jobs
+in the indicated task.</p>
+<p>The optional name (e.g. <tt class="docutils literal"><span class="pre">@jobs_limit(3,</span> <span class="pre">"ftp_download_limit")</span></tt>) allows the same limit to
+be shared across multiple tasks. To be pedantic: a limit of <tt class="docutils literal"><span class="pre">3</span></tt> jobs at a time would be applied
+across all tasks which have a <tt class="docutils literal"><span class="pre">@jobs_limit</span></tt> named <tt class="docutils literal"><span class="pre">"ftp_download_limit"</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># make list of 10 files</span>
+<span class="nd">@split</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"*stage1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">make_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="mi">5</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.small_stage1"</span> <span class="o">%</span> <span class="n">i</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.big_stage1"</span> <span class="o">%</span> <span class="n">i</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s">"ftp_download_limit"</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">make_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".small_stage1"</span><span class="p">),</span> <span class="s">".stage2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage1_small</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s">"ftp_download_limit"</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">make_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".big_stage1"</span><span class="p">),</span> <span class="s">".stage2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage1_big</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">([</span><span class="n">stage1_small</span><span class="p">,</span> <span class="n">stage1_big</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".stage2"</span><span class="p">),</span> <span class="s">".stage3"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage2</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">stage2</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">10</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>will run the 10 jobs of <tt class="docutils literal"><span class="pre">stage1_big</span></tt> and <tt class="docutils literal"><span class="pre">stage1_small</span></tt> 3 at a time (highlighted in blue),
+a limit shared across the two tasks. <tt class="docutils literal"><span class="pre">stage2</span></tt> jobs run 5 at a time (in red).
+These limits override the numbers set in <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> (<tt class="docutils literal"><span class="pre">multiprocess</span> <span class="pre">=</span> <span class="pre">10</span></tt>):</p>
+<blockquote>
+<div><img alt="../../_images/jobs_limit2.png" src="../../_images/jobs_limit2.png" />
+</div></blockquote>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 11</strong>: <cite>Manage concurrency for a specific task with</cite> <strong>@jobs_limit</strong></a><ul>
+<li><a class="reference internal" href="#jobs-limit"><strong>@jobs_limit</strong></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="posttask.html"
+ title="previous chapter"><strong>Chapter 10</strong>: <cite>Signal the completion of each stage of our pipeline with</cite> <strong>@posttask</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="dependencies.html"
+ title="next chapter"><strong>Chapter 12</strong>: <cite>Checking dependencies to run tasks in order</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/jobs_limit.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="dependencies.html" title="Chapter 12: Checking dependencies to run tasks in order"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 10: Signal the completion of each stage of our pipeline with @posttask"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/logging.html b/doc/_build/html/tutorials/manual/logging.html
new file mode 100644
index 0000000..39f236a
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/logging.html
@@ -0,0 +1,375 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 19: Logging progress through a pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 20: @files_re: Deprecated syntax using regular expressions" href="files_re.html" />
+ <link rel="prev" title="Chapter 18: Exceptions thrown inside a pipeline" href="exceptions.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="files_re.html" title="Chapter 20: @files_re: Deprecated syntax using regular expressions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="exceptions.html" title="Chapter 18: Exceptions thrown inside a pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-logging-chapter-num-logging-progress-through-a-pipeline">
+<span id="manual-logging"></span><h1><strong>Chapter 19</strong>: <cite>Logging progress through a pipeline</cite><a class="headerlink" href="#manual-logging-chapter-num-logging-progress-through-a-pipeline" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+</ul>
+</td></tr></table>
+<p id="index-0">There are two parts to logging with <strong>Ruffus</strong>:</p>
+<ul>
+<li><p class="first">Logging progress through the pipeline</p>
+<blockquote>
+<div><p>This produces the sort of output displayed in this manual:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_io_task</span><span class="p">])</span>
+<span class="go">Task = parallel_io_task</span>
+<span class="go"> Job = ["a.1" -> "a.2", "A file"] completed</span>
+<span class="go"> Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date</span>
+<span class="go">Completed Task = parallel_io_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+<li><p class="first">Logging your own messages from within your pipelined functions.</p>
+<blockquote>
+<div><p>Because <strong>Ruffus</strong> may run these in separate process (multiprocessing), some
+attention has to be paid to how to send and synchronise your log messages
+across process boundaries.</p>
+</div></blockquote>
+</li>
+</ul>
+<p>We shall deal with these in turn.</p>
+</div></blockquote>
+<div class="section" id="logging-task-job-completion">
+<span id="manual-logging-pipeline"></span><h2>Logging task/job completion<a class="headerlink" href="#logging-task-job-completion" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>By default, <em>Ruffus</em> logs each task and each job as it is completed to
+<tt class="docutils literal"><span class="pre">sys.stderr</span></tt>.</p>
+<p><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> includes an optional <tt class="docutils literal"><span class="pre">logger</span></tt> parameter which defaults to
+<tt class="docutils literal"><span class="pre">stderr_logger</span></tt>. Set this to <tt class="docutils literal"><span class="pre">black_hole_logger</span></tt> to turn off all tracking messages as
+the pipeline runs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">pipelined_task</span><span class="p">],</span> <span class="n">logger</span> <span class="o">=</span> <span class="n">black_hole_logger</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="controlling-logging-verbosity">
+<h3>Controlling logging verbosity<a class="headerlink" href="#controlling-logging-verbosity" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> currently has five levels of verbosity, set by the optional <tt class="docutils literal"><span class="pre">verbose</span></tt>
+parameter which defaults to 1:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>verbose = 0: nothing
+verbose = 1: logs completed jobs/tasks;
+verbose = 2: logs up to date jobs in incomplete tasks
+verbose = 3: logs reason for running job
+verbose = 4: logs messages useful only for debugging ruffus pipeline code</pre>
+</div>
+<p><tt class="docutils literal"><span class="pre">Verbose</span></tt> > 2 are intended for debugging <strong>Ruffus</strong> by the developers and the details
+are liable to change from release to release</p>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="using-your-own-logging">
+<h3>Using your own logging<a class="headerlink" href="#using-your-own-logging" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>You can specify your own logging by providing a log object to <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> .
+This log object should have <tt class="docutils literal"><span class="pre">debug()</span></tt> and <tt class="docutils literal"><span class="pre">info()</span></tt> methods.</p>
+<p>Instead of writing your own, it is usually more convenient to use the python
+<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a>
+module which provides logging classes with rich functionality. The following sets up
+a logger to a rotating set of files:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">logging.handlers</span>
+
+<span class="n">LOG_FILENAME</span> <span class="o">=</span> <span class="s">'/tmp/ruffus.log'</span>
+
+<span class="c"># Set up a specific logger with our desired output level</span>
+<span class="n">my_ruffus_logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s">'My_Ruffus_logger'</span><span class="p">)</span>
+<span class="n">my_ruffus_logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">)</span>
+
+<span class="c"># Add the log message handler to the logger</span>
+<span class="n">handler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">handlers</span><span class="o">.</span><span class="n">RotatingFileHandler</span><span class="p">(</span>
+ <span class="n">LOG_FILENAME</span><span class="p">,</span> <span class="n">maxBytes</span><span class="o">=</span><span class="mi">2000</span><span class="p">,</span> <span class="n">backupCount</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
+
+<span class="n">my_ruffus_logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">handler</span><span class="p">)</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="sd">"""Description: Create the file if it does not exists"""</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">],</span> <span class="p">[</span><span class="n">create_if_necessary</span><span class="p">],</span> <span class="n">logger</span><span class="o">=</span><span class="n">my_ruffus_logger</span><span class="p">)</span>
+<span class="k">print</span> <span class="nb">open</span><span class="p">(</span><span class="s">"/tmp/ruffus.log"</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>The contents of <tt class="docutils literal"><span class="pre">/tmp/ruffus.log</span></tt> are as specified:</dt>
+<dd><div class="first last highlight-python"><pre>Task = create_if_necessary
+ Description: Create the file if it does not exists
+ Job = [null -> "a.1"] completed</pre>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="your-own-logging-within-each-job">
+<span id="manual-logging-per-job"></span><h2>Your own logging <em>within</em> each job<a class="headerlink" href="#your-own-logging-within-each-job" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is often useful to log the messages from within each of your pipelined functions.</p>
+<p>However, each job runs in a separate process, and it is <em>not</em> a good
+idea to pass the logging object itself between jobs:</p>
+<ol class="arabic simple">
+<li>logging is not synchronised between processes</li>
+<li><a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> objects can not be
+<a class="reference external" href="http://docs.python.org/library/pickle.html">pickle</a>d and sent across processes</li>
+</ol>
+<p>The best thing to do is to have a centralised log and to have each job invoke the
+logging methods (e.g. <cite>debug</cite>, <cite>warning</cite>, <cite>info</cite> etc.) across the process boundaries in
+the centralised log.</p>
+<p>The <strong>Ruffus</strong> <a class="reference internal" href="../../proxy_logger.html#proxy-logger"><em>proxy_logger</em></a> module provides an easy way to share
+<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> objects among
+jobs. This requires just two simple steps:</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><a class="reference internal" href="logging_code.html#manual-logging-code"><em>The full code</em></a> shows how this can be coded.</p>
+</div>
+</div></blockquote>
+<div class="section" id="set-up-log-from-config-file">
+<h3>1. Set up log from config file<a class="headerlink" href="#set-up-log-from-config-file" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus.proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="p">(</span><span class="n">logger_proxy</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span>
+ <span class="p">{</span><span class="s">"file_name"</span> <span class="p">:</span><span class="s">"/my/lg.log"</span><span class="p">})</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="give-each-job-proxy-to-logger">
+<h3>2. Give each job proxy to logger<a class="headerlink" href="#give-each-job-proxy-to-logger" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Now, pass:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">logger_proxy</span></tt> (which forwards logging calls across jobs) and</li>
+<li><tt class="docutils literal"><span class="pre">logging_mutex</span></tt> (which prevents different jobs which are logging simultaneously
+from being jumbled up)</li>
+</ul>
+</div></blockquote>
+<p>to each job:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">'a.1'</span><span class="p">,</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">ignore_infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Log within task</span>
+<span class="sd"> """</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Here we go"</span><span class="p">)</span>
+ <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">logger_proxy</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Here we go logging"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 19</strong>: <cite>Logging progress through a pipeline</cite></a><ul>
+<li><a class="reference internal" href="#logging-task-job-completion">Logging task/job completion</a><ul>
+<li><a class="reference internal" href="#controlling-logging-verbosity">Controlling logging verbosity</a></li>
+<li><a class="reference internal" href="#using-your-own-logging">Using your own logging</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#your-own-logging-within-each-job">Your own logging <em>within</em> each job</a><ul>
+<li><a class="reference internal" href="#set-up-log-from-config-file">1. Set up log from config file</a></li>
+<li><a class="reference internal" href="#give-each-job-proxy-to-logger">2. Give each job proxy to logger</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="exceptions.html"
+ title="previous chapter"><strong>Chapter 18</strong>: <cite>Exceptions thrown inside a pipeline</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="files_re.html"
+ title="next chapter"><strong>Chapter 20</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/logging.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="files_re.html" title="Chapter 20: @files_re: Deprecated syntax using regular expressions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="exceptions.html" title="Chapter 18: Exceptions thrown inside a pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/logging_code.html b/doc/_build/html/tutorials/manual/logging_code.html
new file mode 100644
index 0000000..5ab6a30
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/logging_code.html
@@ -0,0 +1,376 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Chapter 16: Logging progress through a pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Chapter 10: Generating parameters on the fly" href="onthefly_code.html" />
+ <link rel="prev" title="Code for Chapter 9: Checking dependencies to run tasks in order" href="dependencies_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Code for Chapter 10: Generating parameters on the fly"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="dependencies_code.html" title="Code for Chapter 9: Checking dependencies to run tasks in order"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-chapter-16-logging-progress-through-a-pipeline">
+<span id="manual-logging-code"></span><h1>Code for Chapter 16: Logging progress through a pipeline<a class="headerlink" href="#code-for-chapter-16-logging-progress-through-a-pipeline" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+<li><a class="reference internal" href="logging.html#manual-logging-per-job"><em>Back</em></a></li>
+</ul>
+<p>This example shows how to log messages from within each of your pipelined functions.</p>
+</div></blockquote>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+
+<span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span><span class="nn">os</span>
+<span class="c"># use simplejson in place of json for python < 2.6</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">json</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">simplejson</span>
+ <span class="n">json</span> <span class="o">=</span> <span class="n">simplejson</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Shared logging</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="n">logger_args</span><span class="o">=</span><span class="p">{}</span>
+<span class="n">logger_args</span><span class="p">[</span><span class="s">"file_name"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"pipeline.log"</span>
+<span class="n">logger_args</span><span class="p">[</span><span class="s">"level"</span><span class="p">]</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span>
+<span class="n">logger_args</span><span class="p">[</span><span class="s">"rotating"</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">logger_args</span><span class="p">[</span><span class="s">"maxBytes"</span><span class="p">]</span><span class="o">=</span><span class="mi">20000</span>
+<span class="n">logger_args</span><span class="p">[</span><span class="s">"backupCount"</span><span class="p">]</span><span class="o">=</span><span class="mi">10</span>
+<span class="n">logger_args</span><span class="p">[</span><span class="s">"formatter"</span><span class="p">]</span><span class="o">=</span><span class="s">"</span><span class="si">%(asctime)s</span><span class="s"> - </span><span class="si">%(name)s</span><span class="s"> - </span><span class="si">%(levelname)6s</span><span class="s"> - </span><span class="si">%(message)s</span><span class="s">"</span>
+
+<span class="p">(</span><span class="n">logger_proxy</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span> <span class="n">logger_args</span><span class="p">)</span>
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Helper Function which writes to a shared log</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="k">def</span> <span class="nf">test_job_io</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> cat input files content to output files</span>
+<span class="sd"> after writing out job parameters</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># dump parameters</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">)</span><span class="c"># + extra_params[0:-3]</span>
+ <span class="c">#</span>
+ <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span> <span class="o">=</span> <span class="n">extra_params</span>
+ <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">logger_proxy</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"job = </span><span class="si">%s</span><span class="s">, process name = </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">params</span><span class="p">),</span>
+ <span class="n">multiprocessing</span><span class="o">.</span><span class="n">current_process</span><span class="p">()</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">' job = </span><span class="si">%s</span><span class="se">\n</span><span class="s">'</span> <span class="o">%</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
+ <span class="n">infiles</span> <span class="o">=</span> <span class="p">[</span><span class="n">infiles</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="n">infiles</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">infiles</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">outfiles</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
+ <span class="n">outfiles</span> <span class="o">=</span> <span class="p">[</span><span class="n">outfiles</span><span class="p">]</span>
+ <span class="n">output_text</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">infiles</span><span class="p">:</span>
+ <span class="n">output_text</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">f</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+ <span class="n">output_text</span> <span class="o">=</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">output_text</span><span class="p">))</span>
+ <span class="n">output_text</span> <span class="o">+=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">infiles</span><span class="p">)</span> <span class="o">+</span> <span class="s">" -> "</span> <span class="o">+</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">outfiles</span><span class="p">)</span> <span class="o">+ [...]
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">outfiles</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">output_text</span><span class="p">)</span>
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Tasks</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c">#</span>
+<span class="c"># task1</span>
+<span class="c">#</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">'a.1'</span><span class="p">,</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task1</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="o">*</span><span class="n">extra_params</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> First task</span>
+<span class="sd"> """</span>
+ <span class="n">test_job_io</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task2</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">'.1'</span><span class="p">),</span> <span class="s">'.2'</span><span class="p">,</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="o">*</span><span class="n">extra_params</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Second task</span>
+<span class="sd"> """</span>
+ <span class="n">test_job_io</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task3</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task2</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">'.2'</span><span class="p">),</span> <span class="s">'.3'</span><span class="p">,</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task3</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="o">*</span><span class="n">extra_params</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Third task</span>
+<span class="sd"> """</span>
+ <span class="n">test_job_io</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">)</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># task4</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">task3</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">'.3'</span><span class="p">),</span> <span class="s">'.4'</span><span class="p">,</span> <span class="n">logger_proxy</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task4</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="o">*</span><span class="n">extra_params</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Fourth task</span>
+<span class="sd"> """</span>
+ <span class="n">test_job_io</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Necessary to protect the "entry point" of the program under windows.</span>
+<span class="c"># see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">task4</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> <span class="n">logger</span> <span class="o">=</span> <span class="n">logger_proxy</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">task4</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> <span class="n">logger</span> <span class="o">=</span> <span class="n">logger_proxy</span><span class="p">)</span>
+<span class="go"> job = [null, "a.1"]</span>
+<span class="go"> job = ["a.1", "a.2"]</span>
+<span class="go"> job = ["a.2", "a.3"]</span>
+<span class="go"> job = ["a.3", "a.4"]</span>
+</pre></div>
+</div>
+<p>Pipeline.log will contain our unimaginative log messages:</p>
+<div class="highlight-python"><pre>2009-11-15 03:04:55,884 - my_logger - DEBUG - job = [null, "a.1"], process name = PoolWorker-2
+2009-11-15 03:04:56,941 - my_logger - INFO - Job = [None -> a.1, <LoggingProxy>, <thread.lock>] completed
+2009-11-15 03:04:56,942 - my_logger - INFO - Completed Task = task1
+2009-11-15 03:04:56,945 - my_logger - DEBUG - job = ["a.1", "a.2"], process name = PoolWorker-4
+2009-11-15 03:04:57,962 - my_logger - INFO - Job = [a.1 -> a.2, <LoggingProxy>, <thread.lock>] completed
+2009-11-15 03:04:57,962 - my_logger - INFO - Completed Task = task2
+2009-11-15 03:04:57,965 - my_logger - DEBUG - job = ["a.2", "a.3"], process name = PoolWorker-3
+2009-11-15 03:04:59,009 - my_logger - INFO - Job = [a.2 -> a.3, <LoggingProxy>, <thread.lock>] completed
+2009-11-15 03:04:59,010 - my_logger - INFO - Completed Task = task3
+2009-11-15 03:04:59,013 - my_logger - DEBUG - job = ["a.3", "a.4"], process name = PoolWorker-5
+2009-11-15 03:05:00,024 - my_logger - INFO - Job = [a.3 -> a.4, <LoggingProxy>, <thread.lock>] completed
+2009-11-15 03:05:00,025 - my_logger - INFO - Completed Task = task4</pre>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Chapter 16: Logging progress through a pipeline</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="dependencies_code.html"
+ title="previous chapter">Code for Chapter 9: Checking dependencies to run tasks in order</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="onthefly_code.html"
+ title="next chapter">Code for Chapter 10: Generating parameters on the fly</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/logging_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Code for Chapter 10: Generating parameters on the fly"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="dependencies_code.html" title="Code for Chapter 9: Checking dependencies to run tasks in order"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/manual_code.html b/doc/_build/html/tutorials/manual/manual_code.html
new file mode 100644
index 0000000..aee5a44
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/manual_code.html
@@ -0,0 +1,193 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for the manual tutorial — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for the simple tutorial: 8 steps to Ruffus" href="../simple_tutorial/simple_tutorial_code.html" />
+ <link rel="prev" title="Ruffus code" href="../../examples/bioinformatics/part2_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../simple_tutorial/simple_tutorial_code.html" title="Code for the simple tutorial: 8 steps to Ruffus"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../../examples/bioinformatics/part2_code.html" title="Ruffus code"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-the-manual-tutorial">
+<span id="manual-code"></span><h1>Code for the manual tutorial<a class="headerlink" href="#code-for-the-manual-tutorial" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_introduction.html#manual-introduction"><em>The **Ruffus** manual</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../../examples/bioinformatics/part2_code.html"
+ title="previous chapter">Ruffus code</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../simple_tutorial/simple_tutorial_code.html"
+ title="next chapter">Code for the simple tutorial: 8 steps to <em>Ruffus</em></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/manual_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../simple_tutorial/simple_tutorial_code.html" title="Code for the simple tutorial: 8 steps to Ruffus"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../../examples/bioinformatics/part2_code.html" title="Ruffus code"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/manual_contents.html b/doc/_build/html/tutorials/manual/manual_contents.html
new file mode 100644
index 0000000..ee18bc5
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/manual_contents.html
@@ -0,0 +1,213 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus Manual: Table of Contents: — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Ruffus Manual" href="manual_introduction.html" />
+ <link rel="prev" title="ruffus.proxy_logger" href="../../proxy_logger.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="manual_introduction.html" title="Ruffus Manual"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../../proxy_logger.html" title="ruffus.proxy_logger"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-manual-table-of-contents">
+<span id="manual"></span><h1><strong>Ruffus</strong> Manual: Table of Contents:<a class="headerlink" href="#ruffus-manual-table-of-contents" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p><a class="reference internal" href="manual_introduction.html#manual-introduction"><em>manual_introduction</em></a></p>
+<ol class="arabic simple">
+<li><a class="reference internal" href="follows.html#manual-follows"><em>follows</em></a></li>
+<li><a class="reference internal" href="tasks_as_recipes.html#manual-tasks-as-recipes"><em>tasks_as_recipes</em></a></li>
+<li><a class="reference internal" href="files.html#manual-files"><em>files</em></a></li>
+<li><a class="reference internal" href="tasks_and_globs_in_inputs.html#manual-tasks-and-globs-in-inputs"><em>tasks_and_globs_in_inputs</em></a></li>
+<li><a class="reference internal" href="tracing_pipeline_parameters.html#manual-tracing-pipeline-parameters"><em>tracing_pipeline_parameters</em></a></li>
+<li><a class="reference internal" href="parallel_processing.html#manual-multiprocessing"><em>parallel_processing</em></a></li>
+<li><a class="reference internal" href="split.html#manual-split"><em>split</em></a></li>
+<li><a class="reference internal" href="transform.html#manual-transform"><em>transform</em></a></li>
+<li><a class="reference internal" href="merge.html#manual-merge"><em>merge</em></a></li>
+<li><a class="reference internal" href="posttask.html#manual-posttask"><em>posttask</em></a></li>
+<li><a class="reference internal" href="jobs_limit.html#manual-jobs-limit"><em>jobs_limit</em></a></li>
+<li><a class="reference internal" href="dependencies.html#manual-dependencies"><em>dependencies</em></a></li>
+<li><a class="reference internal" href="onthefly.html#manual-on-the-fly"><em>onthefly</em></a></li>
+<li><a class="reference internal" href="collate.html#manual-collate"><em>collate</em></a></li>
+<li><a class="reference internal" href="advanced_transform.html#manual-advanced-transform"><em>advanced_transform</em></a></li>
+<li><a class="reference internal" href="parallel.html#manual-parallel"><em>parallel</em></a></li>
+<li><a class="reference internal" href="check_if_uptodate.html#manual-check-if-uptodate"><em>check_if_uptodate</em></a></li>
+<li><a class="reference internal" href="exceptions.html#manual-exceptions"><em>exceptions</em></a></li>
+<li><a class="reference internal" href="logging.html#manual-logging"><em>logging</em></a></li>
+<li><a class="reference internal" href="files_re.html#manual-files-re"><em>files_re</em></a></li>
+</ol>
+</div></blockquote>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../../proxy_logger.html"
+ title="previous chapter">ruffus.proxy_logger</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="manual_introduction.html"
+ title="next chapter"><strong>Ruffus</strong> Manual</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/manual_contents.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="manual_introduction.html" title="Ruffus Manual"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../../proxy_logger.html" title="ruffus.proxy_logger"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/manual_introduction.html b/doc/_build/html/tutorials/manual/manual_introduction.html
new file mode 100644
index 0000000..1b415be
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/manual_introduction.html
@@ -0,0 +1,365 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus Manual — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 1 : Arranging tasks into a pipeline with @follows" href="follows.html" />
+ <link rel="prev" title="Ruffus Manual: Table of Contents:" href="manual_contents.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="follows.html" title="Chapter 1 : Arranging tasks into a pipeline with @follows"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="manual_contents.html" title="Ruffus Manual: Table of Contents:"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="manual-introduction"></span><div class="section" id="ruffus-manual">
+<span id="index-0"></span><h1><strong>Ruffus</strong> Manual<a class="headerlink" href="#ruffus-manual" title="Permalink to this headline">¶</a></h1>
+<div class="line-block">
+<div class="line">The chapters of this manual go through each of the features of <strong>Ruffus</strong> in turn.</div>
+<div class="line">Some of these (especially those labelled <strong>esoteric</strong> or <strong>deprecated</strong>) may not
+be of interest to all users of <strong>Ruffus</strong>.</div>
+</div>
+<p>If you are looking for a quick introduction to <strong>Ruffus</strong>, you may want to look at the
+<a class="reference internal" href="../simple_tutorial/simple_tutorial.html#simple-tutorial"><em>Simple Tutorial</em></a> first, some of which content is shared with,
+or elaborated on, by this manual.</p>
+<div class="section" id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <strong>Ruffus</strong> module is a lightweight way to run computational pipelines.</p>
+<p>Computational pipelines often become quite simple
+if we breakdown the process into simple stages.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Ruffus refers to each stage of your pipeline as a <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>.</p>
+</div>
+<div class="line-block">
+<div class="line">Let us start with the usual “Hello World”.</div>
+<div class="line">We have the following two python functions which
+we would like to turn into an automatic pipeline:</div>
+</div>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_hello_world.png" src="../../_images/simple_tutorial_hello_world.png" />
+</div></blockquote>
+<p>The simplest <strong>Ruffus</strong> pipeline would look like this:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_intro_follows.png" src="../../_images/simple_tutorial_intro_follows.png" />
+</div></blockquote>
+<p>The functions which do the actual work of each stage of the pipeline remain unchanged.
+The role of <strong>Ruffus</strong> is to make sure these functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if desired.</p>
+<p>There are three simple parts to building a <strong>ruffus</strong> pipeline</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>importing ruffus</li>
+<li>“Decorating” functions which are part of the pipeline</li>
+<li>Running the pipeline!</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+<span class="target" id="manual-introduction-import"></span></div>
+<div class="section" id="importing-ruffus">
+<span id="index-1"></span><h2>Importing ruffus<a class="headerlink" href="#importing-ruffus" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The most convenient way to use ruffus is to import the various names directly:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This will allow <strong>ruffus</strong> terms to be used directly in your code. This is also
+the style we have adopted for this manual.</p>
+<table border="1" class="docutils">
+<colgroup>
+<col width="50%" />
+<col width="50%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head stub">Category</th>
+<th class="head">Terms</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><th class="stub"><em>Pipeline functions</em></th>
+<td><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout</span>
+<span class="n">pipeline_printout_graph</span>
+<span class="n">pipeline_run</span>
+<span class="n">register_cleanup</span>
+</pre></div>
+</div>
+</td>
+</tr>
+<tr class="row-odd"><th class="stub"><em>Decorators</em></th>
+<td><div class="first last highlight-python"><pre>@follows
+ at files
+ at split
+ at transform
+ at merge
+ at collate
+ at posttask
+ at jobs_limit
+ at parallel
+ at check_if_uptodate
+ at files_re</pre>
+</div>
+</td>
+</tr>
+<tr class="row-even"><th class="stub"><em>Loggers</em></th>
+<td><div class="first last highlight-python"><div class="highlight"><pre><span class="n">stderr_logger</span>
+<span class="n">black_hole_logger</span>
+</pre></div>
+</div>
+</td>
+</tr>
+<tr class="row-odd"><th class="stub"><em>Parameter disambiguating Indicators</em></th>
+<td><div class="first last highlight-python"><div class="highlight"><pre><span class="n">suffix</span>
+<span class="n">regex</span>
+<span class="n">inputs</span>
+<span class="n">touch_file</span>
+<span class="n">combine</span>
+<span class="n">mkdir</span>
+<span class="n">output_from</span>
+</pre></div>
+</div>
+</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<dl class="docutils">
+<dt>If any of these clash with names in your code, you can use qualified names instead:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">ruffus</span>
+
+<span class="n">ruffus</span><span class="o">.</span><span class="n">pipeline_printout</span><span class="p">(</span><span class="s">"..."</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div>
+<div class="section" id="decorating-functions">
+<span id="manual-introduction-decorators"></span><span id="index-2"></span><h2>“Decorating” functions<a class="headerlink" href="#decorating-functions" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You need to tag or <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a> existing code to tell <strong>Ruffus</strong> that they are part
+of the pipeline.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a>s are ways to tag or mark out functions.</p>
+<p>They start with an <tt class="docutils literal"><span class="pre">@</span></tt> prefix and take a number of parameters in parenthesis.</p>
+<img alt="../../_images/simple_tutorial_decorator_syntax.png" class="last" src="../../_images/simple_tutorial_decorator_syntax.png" />
+</div>
+<p>The <strong>ruffus</strong> decorator <a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a> makes sure that
+<tt class="docutils literal"><span class="pre">second_task</span></tt> follows <tt class="docutils literal"><span class="pre">first_task</span></tt>.</p>
+<div class="line-block">
+<div class="line">Multiple <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a>s can be used for each <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function to add functionality
+to <em>Ruffus</em> pipeline functions.</div>
+<div class="line">However, the decorated python functions can still be
+called normally, outside of <em>Ruffus</em>.</div>
+<div class="line"><em>Ruffus</em> <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a>s can be added to (stacked on top of) any function in any order.</div>
+</div>
+<ul class="simple">
+<li><a class="reference internal" href="follows.html#manual-follows"><em>More on @follows in |manual.follows.chapter_num|</em></a></li>
+<li><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows syntax in detail</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="running-the-pipeline">
+<span id="manual-introduction-running-pipeline"></span><span id="index-3"></span><h2>Running the pipeline<a class="headerlink" href="#running-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>We run the pipeline by specifying the <strong>last</strong> stage (<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function) of your pipeline.
+Ruffus will know what other functions this depends on, following the appropriate chain of
+dependencies automatically, making sure that the entire pipeline is up-to-date.</p>
+<p>In our example above, because <tt class="docutils literal"><span class="pre">second_task</span></tt> depends on <tt class="docutils literal"><span class="pre">first_task</span></tt>, both functions are executed in order.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Ruffus</strong> by default prints out the <tt class="docutils literal"><span class="pre">verbose</span></tt> progress through your pipeline,
+interleaved with our <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt>.</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_hello_world_output.png" src="../../_images/simple_tutorial_hello_world_output.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Ruffus</strong> Manual</a><ul>
+<li><a class="reference internal" href="#introduction">Introduction</a></li>
+<li><a class="reference internal" href="#importing-ruffus">Importing ruffus</a></li>
+<li><a class="reference internal" href="#decorating-functions">“Decorating” functions</a></li>
+<li><a class="reference internal" href="#running-the-pipeline">Running the pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="manual_contents.html"
+ title="previous chapter"><strong>Ruffus</strong> Manual: Table of Contents:</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="follows.html"
+ title="next chapter"><strong>Chapter 1</strong> : <cite>Arranging tasks into a pipeline with</cite> <strong>@follows</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/manual_introduction.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="follows.html" title="Chapter 1 : Arranging tasks into a pipeline with @follows"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="manual_contents.html" title="Ruffus Manual: Table of Contents:"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/merge.html b/doc/_build/html/tutorials/manual/merge.html
new file mode 100644
index 0000000..17cc046
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/merge.html
@@ -0,0 +1,291 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 9: Merge multiple input into a single result — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 10: Signal the completion of each stage of our pipeline with @posttask" href="posttask.html" />
+ <link rel="prev" title="Chapter 8: Applying the same recipe to create many different files with @transform" href="transform.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 10: Signal the completion of each stage of our pipeline with @posttask"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 8: Applying the same recipe to create many different files with @transform"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-merge-chapter-num-merge-multiple-input-into-a-single-result">
+<span id="manual-merge"></span><h1><strong>Chapter 9</strong>: <strong>Merge</strong> <cite>multiple input into a single result</cite><a class="headerlink" href="#manual-merge-chapter-num-merge-multiple-input-into-a-single-result" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+<p>At the conclusion of our pipeline, or at key selected points, we might need a
+summary of our progress, gathering data from a multitude of files or disparate <em>inputs</em>,
+and summarised in the <em>output</em> of a single <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a>.</p>
+<p><em>Ruffus</em> uses the <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> decorator for this purpose.</p>
+<p>Although, <strong>@merge</strong> tasks multiple <em>inputs</em> and produces a single <em>output</em>, <strong>Ruffus</strong>
+is again agnostic as to the sort of data contained within <em>output</em>. It can be a single
+(string) file name, or an arbitrary complicated nested structure with numbers, objects etc.
+As always, strings contained (even with nested sequences) within <em>output</em> will be treated
+as file names for the purpose of checking if the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> is up-to-date.</p>
+</div></blockquote>
+<div class="section" id="merge">
+<span id="index-0"></span><h2><strong>@merge</strong><a class="headerlink" href="#merge" title="Permalink to this headline">¶</a></h2>
+<p>This example is borrowed from <a class="reference internal" href="../simple_tutorial/step6_transform.html#simple-tutorial-6th-step"><em>step 6</em></a> of the simple tutorial.</p>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><a class="reference internal" href="../simple_tutorial/step6_transform_code.html#simple-tutorial-6th-step-code"><em>Accompanying Python Code</em></a></p>
+</div>
+</div></blockquote>
+<div class="section" id="combining-partial-solutions-calculating-variances">
+<h3>Combining partial solutions: Calculating variances<a class="headerlink" href="#combining-partial-solutions-calculating-variances" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><table border="1" class="borderless docutils">
+<colgroup>
+<col width="1%" />
+<col width="99%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><p class="first centered">
+<strong>Step 6 from:</strong></p><img alt="../../_images/simple_tutorial_step5_sans_key.png" class="last" src="../../_images/simple_tutorial_step5_sans_key.png" />
+</td>
+<td><blockquote class="first last">
+<div><p>We wanted to calculate the sample variance of a large list of random numbers. We
+have seen previously how we can split up this large problem into small pieces
+(using <a class="reference internal" href="split.html#manual-split"><em>@split</em></a> in <strong>Chapter 7</strong>), and work out the
+partial solutions for each sub-problem (calculating sums with <a class="reference internal" href="transform.html#manual-transform"><em>@transform</em></a>
+in <strong>Chapter 8</strong> ).</p>
+<p>All that remains is to join up the partial solutions from the different <tt class="docutils literal"><span class="pre">.sums</span></tt> files
+and turn these into the variance as follows:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">sum_squared</span> <span class="o">-</span> <span class="nb">sum</span> <span class="o">*</span> <span class="nb">sum</span> <span class="o">/</span> <span class="n">N</span><span class="p">)</span><span class="o">/</span><span class="n">N</span>
+</pre></div>
+</div>
+<p>where <tt class="docutils literal"><span class="pre">N</span></tt> is the number of values</p>
+<p>See the <a class="reference external" href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">wikipedia</a> entry for a discussion of
+why this is a very naive approach!</p>
+</div></blockquote>
+</td>
+</tr>
+</tbody>
+</table>
+<p>To do this, all we have to do is go through all the values in <tt class="docutils literal"><span class="pre">*.sums</span></tt>, i.e.
+add up the <tt class="docutils literal"><span class="pre">sums</span></tt> and <tt class="docutils literal"><span class="pre">sum_squared</span></tt> for each chunk. We can then apply the above (naive) formula.</p>
+<dl class="docutils">
+<dt>Merging files is straightforward in <strong>Ruffus</strong>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nd">@merge</span><span class="p">(</span><span class="n">step_5_calculate_sum_of_squares</span><span class="p">,</span> <span class="s">"variance.result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_6_calculate_variance</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># add together sums and sums of squares from each input_file_name</span>
+ <span class="c"># calculate variance and write to output_file_name</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>The <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> decorator tells <em>Ruffus</em> to take all the files from the step 5 task (i.e. <tt class="docutils literal"><span class="pre">*.sums</span></tt>),
+and produced a merge file in the form of <tt class="docutils literal"><span class="pre">variance.result</span></tt>.</p>
+<dl class="docutils">
+<dt>Thus if <tt class="docutils literal"><span class="pre">step_5_calculate_sum_of_squares</span></tt> created</dt>
+<dd><div class="first last line-block">
+<div class="line"><tt class="docutils literal"><span class="pre">1.sums</span></tt> and</div>
+<div class="line"><tt class="docutils literal"><span class="pre">2.sums</span></tt> etc.</div>
+</div>
+</dd>
+</dl>
+<p>This would result in the following function call:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">step_6_calculate_variance</span> <span class="p">([</span><span class="s">"1.sums"</span><span class="p">,</span> <span class="s">"2.sums"</span><span class="p">],</span> <span class="s">"variance.result"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The final result is, of course, in <tt class="docutils literal"><span class="pre">variance.result</span></tt>.</p>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 9</strong>: <strong>Merge</strong> <cite>multiple input into a single result</cite></a><ul>
+<li><a class="reference internal" href="#merge"><strong>@merge</strong></a><ul>
+<li><a class="reference internal" href="#combining-partial-solutions-calculating-variances">Combining partial solutions: Calculating variances</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform.html"
+ title="previous chapter"><strong>Chapter 8</strong>: <cite>Applying the same recipe to create many different files with</cite> <strong>@transform</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="posttask.html"
+ title="next chapter"><strong>Chapter 10</strong>: <cite>Signal the completion of each stage of our pipeline with</cite> <strong>@posttask</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/merge.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 10: Signal the completion of each stage of our pipeline with @posttask"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 8: Applying the same recipe to create many different files with @transform"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/onthefly.html b/doc/_build/html/tutorials/manual/onthefly.html
new file mode 100644
index 0000000..bafc1a6
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/onthefly.html
@@ -0,0 +1,324 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 13: Generating parameters on the fly with @files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 14: @collate: group together disparate input into sets of results" href="collate.html" />
+ <link rel="prev" title="Chapter 12: Checking dependencies to run tasks in order" href="dependencies.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="collate.html" title="Chapter 14: @collate: group together disparate input into sets of results"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="dependencies.html" title="Chapter 12: Checking dependencies to run tasks in order"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-on-the-fly-chapter-num-generating-parameters-on-the-fly-with-files">
+<span id="manual-on-the-fly"></span><h1><strong>Chapter 13</strong>: <cite>Generating parameters on the fly with</cite> <strong>@files</strong><a class="headerlink" href="#manual-on-the-fly-chapter-num-generating-parameters-on-the-fly-with-files" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files on-the-fly syntax in detail</em></a></li>
+</ul>
+</td></tr></table>
+<div class="line-block">
+<div class="line">Sometimes, it is necessary, or perhaps more convenient, to generate parameters on the fly or
+at runtime.</div>
+<div class="line">This powerful ability to generate the exact parameters you need is
+sometimes worth the slight increase in complexity.</div>
+</div>
+</div></blockquote>
+<div class="section" id="files">
+<span id="index-0"></span><h2><strong>@files</strong><a class="headerlink" href="#files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>To generate parameters on the fly, pass the decorator <strong>files</strong> with a <a class="reference internal" href="../../glossary.html#term-generator"><em class="xref std std-term">generator</em></a> function which
+yields one list / tuple of parameters per job. For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="k">def</span> <span class="nf">generate_parameters_on_the_fly</span><span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> returns one list of parameters per job</span>
+<span class="sd"> """</span>
+ <span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'A.input'</span><span class="p">,</span> <span class="s">'A.output'</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">)],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'B.input'</span><span class="p">,</span> <span class="s">'B.output'</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'C.input'</span><span class="p">,</span> <span class="s">'C.output'</span><span class="p">,</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+ <span class="k">for</span> <span class="n">job_parameters</span> <span class="ow">in</span> <span class="n">parameters</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">job_parameters</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">generate_parameters_on_the_fly</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">pipeline_task</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">,</span> <span class="n">extra</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s"> + </span><span class="si">%d</span><span class="s"> => </span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">extra</span><span class="p">[</span><span class=" [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">pipeline_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>Produces:</p>
+<div class="highlight-python"><pre>Task = parallel_task
+ 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ 5 + 6 = 11
+ Job = ["C", 5, 6] completed</pre>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Be aware that the parameter generating function may be invoked
+<a class="reference internal" href="dependencies.html#manual-dependencies-checking-multiple-times"><em>more than once</em></a>:</p>
+<blockquote class="last">
+<div><div class="line-block">
+<div class="line">The first time to check if this part of the pipeline is up-to-date.</div>
+<div class="line">The second time when the pipeline task function is run.</div>
+</div>
+</div></blockquote>
+</div>
+<p>The resulting <em>inputs</em>, <em>outputs</em> and any additional extra parameters per job are
+treated normally for the purposes of checking to see if jobs are up-to-date and
+need to be re-run.</p>
+</div></blockquote>
+</div>
+<div class="section" id="permutations-and-combinations">
+<h2>Permutations and Combinations<a class="headerlink" href="#permutations-and-combinations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <a class="reference internal" href="onthefly_code.html#manual-on-the-fly-code"><em>accompanying example</em></a> provides a more realistic reason why
+you would want to generate parameters on the fly. It is a fun piece of code, which generates
+N x M combinations from two sets of files as the <em>inputs</em> of a pipeline stage.</p>
+<p>The <em>inputs</em> / <em>outputs</em> filenames are generated as a pair of nested for-loops to produce
+the N (outside loop) x M (inside loop) combinations, with the appropriate parameters
+for each job <tt class="docutils literal"><span class="pre">yield</span></tt>ed per iteration of the inner loop. The gist of this is:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># Step 1:</span>
+<span class="c">#</span>
+<span class="c"># N x M jobs</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">generate_simulation_params</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> Custom function to generate</span>
+<span class="sd"> file names for gene/gwas simulation study</span>
+<span class="sd"> """</span>
+ <span class="k">for</span> <span class="n">sim_file</span> <span class="ow">in</span> <span class="n">get_simulation_files</span><span class="p">():</span>
+ <span class="k">for</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">)</span> <span class="ow">in</span> <span class="n">get_gene_gwas_file_pairs</span><span class="p">():</span>
+ <span class="n">result_file</span> <span class="o">=</span> <span class="s">"</span><span class="si">%s</span><span class="s">.</span><span class="si">%s</span><span class="s">.results"</span> <span class="o">%</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">sim_file</span><span class="p">)</span>
+ <span class="k">yield</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">,</span> <span class="n">sim_file</span><span class="p">),</span> <span class="n">result_file</span>
+
+
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">generate_simulation_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">gwas_simulation</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="s">"..."</span>
+</pre></div>
+</div>
+<dl class="docutils">
+<dt>If <tt class="docutils literal"><span class="pre">get_gene_gwas_file_pairs()</span></tt> produces:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="p">[</span><span class="s">'a.sim'</span><span class="p">,</span> <span class="s">'b.sim'</span><span class="p">,</span> <span class="s">'c.sim'</span><span class="p">]</span>
+</pre></div>
+</div>
+</dd>
+<dt>and <tt class="docutils literal"><span class="pre">get_gene_gwas_file_pairs()</span></tt> produces:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="p">[(</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">),</span> <span class="p">(</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">)]</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>then we would end up with <tt class="docutils literal"><span class="pre">3</span></tt> x <tt class="docutils literal"><span class="pre">2</span></tt> = <tt class="docutils literal"><span class="pre">6</span></tt> jobs and the following equivalent function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">,</span> <span class="s">'a.sim'</span><span class="p">),</span> <span class="s">"1.gene.a.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">,</span> <span class="s">'a.sim'</span><span class="p">),</span> <span class="s">"2.gene.a.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">,</span> <span class="s">'b.sim'</span><span class="p">),</span> <span class="s">"1.gene.b.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">,</span> <span class="s">'b.sim'</span><span class="p">),</span> <span class="s">"2.gene.b.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">,</span> <span class="s">'c.sim'</span><span class="p">),</span> <span class="s">"1.gene.c.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">,</span> <span class="s">'c.sim'</span><span class="p">),</span> <span class="s">"2.gene.c.sim.results"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p>The <a class="reference internal" href="onthefly_code.html#manual-on-the-fly-code"><em>accompanying code</em></a> looks slightly more complicated because
+of some extra bookkeeping.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 13</strong>: <cite>Generating parameters on the fly with</cite> <strong>@files</strong></a><ul>
+<li><a class="reference internal" href="#files"><strong>@files</strong></a></li>
+<li><a class="reference internal" href="#permutations-and-combinations">Permutations and Combinations</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="dependencies.html"
+ title="previous chapter"><strong>Chapter 12</strong>: <cite>Checking dependencies to run tasks in order</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="collate.html"
+ title="next chapter"><strong>Chapter 14</strong>: <strong>@collate</strong>: <cite>group together disparate input into sets of results</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/onthefly.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="collate.html" title="Chapter 14: @collate: group together disparate input into sets of results"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="dependencies.html" title="Chapter 12: Checking dependencies to run tasks in order"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/onthefly_code.html b/doc/_build/html/tutorials/manual/onthefly_code.html
new file mode 100644
index 0000000..8987d1b
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/onthefly_code.html
@@ -0,0 +1,507 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Chapter 10: Generating parameters on the fly — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Chapter 6: Applying the same recipe to create many different files" href="transform_code.html" />
+ <link rel="prev" title="Code for Chapter 16: Logging progress through a pipeline" href="logging_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform_code.html" title="Code for Chapter 6: Applying the same recipe to create many different files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="logging_code.html" title="Code for Chapter 16: Logging progress through a pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-chapter-10-generating-parameters-on-the-fly">
+<span id="manual-on-the-fly-code"></span><h1>Code for Chapter 10: Generating parameters on the fly<a class="headerlink" href="#code-for-chapter-10-generating-parameters-on-the-fly" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files on-the-fly syntax in detail</em></a></li>
+<li><a class="reference internal" href="onthefly.html#manual-on-the-fly"><em>Back</em></a></li>
+</ul>
+<div class="line-block">
+<div class="line">This script takes N pairs of input file pairs (with the suffices .gene and .gwas)</div>
+<div class="line">and runs them against M sets of simulation data (with the suffix .simulation)</div>
+<div class="line">A summary per input file pair is then produced</div>
+</div>
+<p>In pseudo-code:</p>
+<blockquote>
+<div><p>STEP_1:</p>
+<div class="highlight-python"><pre>for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res</pre>
+</div>
+<p>STEP_2:</p>
+<div class="highlight-python"><pre>for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean</pre>
+</div>
+<div class="line-block">
+<div class="line">n = CNT_GENE_GWAS_FILES</div>
+<div class="line">m = CNT_SIMULATION_FILES</div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># constants</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="n">working_dir</span> <span class="o">=</span> <span class="s">"temp_NxM"</span>
+<span class="n">simulation_data_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation"</span><span class="p">)</span>
+<span class="n">gene_data_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"gene"</span><span class="p">)</span>
+<span class="n">CNT_GENE_GWAS_FILES</span> <span class="o">=</span> <span class="mi">2</span>
+<span class="n">CNT_SIMULATION_FILES</span> <span class="o">=</span> <span class="mi">3</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># imports</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">sys</span>
+<span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">izip</span>
+<span class="kn">import</span> <span class="nn">glob</span>
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Functions</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># get gene gwas file pairs</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">get_gene_gwas_file_pairs</span><span class="p">(</span> <span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Helper function to get all *.gene, *.gwas from the direction specified</span>
+<span class="sd"> in --gene_data_dir</span>
+
+<span class="sd"> Returns</span>
+<span class="sd"> file pairs with both .gene and .gwas extensions,</span>
+<span class="sd"> corresponding roots (no extension) of each file</span>
+<span class="sd"> """</span>
+ <span class="n">gene_files</span> <span class="o">=</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gene"</span><span class="p">))</span>
+ <span class="n">gwas_files</span> <span class="o">=</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gwas"</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="n">common_roots</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o" [...]
+ <span class="n">common_roots</span> <span class="o">&=</span><span class="nb">set</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class [...]
+ <span class="n">common_roots</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">common_roots</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="n">p</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="p">;</span> <span class="n">g_dir</span> <span class="o">=</span> <span class="n">gene_data_dir</span>
+ <span class="n">file_pairs</span> <span class="o">=</span> <span class="p">[[</span><span class="n">p</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">g_dir</span><span class="p">,</span> <span class="n">x</span> <span class="o">+</span> <span class="s">".gene"</span><span class="p">),</span> <span class="n">p</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">g_dir</span><span cla [...]
+ <span class="k">return</span> <span class="n">file_pairs</span><span class="p">,</span> <span class="n">common_roots</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># get simulation files</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">get_simulation_files</span><span class="p">(</span> <span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Helper function to get all *.simulation from the direction specified</span>
+<span class="sd"> in --simulation_data_dir</span>
+<span class="sd"> Returns</span>
+<span class="sd"> file with .simulation extensions,</span>
+<span class="sd"> corresponding roots (no extension) of each file</span>
+<span class="sd"> """</span>
+ <span class="n">simulation_files</span> <span class="o">=</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"*.simulation"</span><span class="p">))</span>
+ <span class="n">simulation_roots</span> <span class="o">=</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class [...]
+ <span class="k">return</span> <span class="n">simulation_files</span><span class="p">,</span> <span class="n">simulation_roots</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Main logic</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+
+
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># setup_simulation_data</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c">#</span>
+<span class="c"># mkdir: makes sure output directories exist before task</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="n">simulation_data_dir</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">setup_simulation_data</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> create simulation files</span>
+<span class="sd"> """</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">CNT_GENE_GWAS_FILES</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"</span><span class="si">%03d</span><span class="s">.gene"</span> <span class="o">%</span> <span class="n">i</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"</span><span class="si">%03d</span><span class="s">.gwas"</span> <span class="o">%</span> <span class="n">i</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># gene files without corresponding gwas and vice versa</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"orphan1.gene"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"orphan2.gwas"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"orphan3.gwas"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">CNT_SIMULATION_FILES</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"</span><span class="si">%03d</span><span class="s">.simulation"</span> <span class="o">%</span> <span class="n">i</span><span class="p">),</span> <span class="s">"w"</span><span [...]
+
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># cleanup_simulation_data</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">try_rmdir</span> <span class="p">(</span><span class="n">d</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">d</span><span class="p">):</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">rmdir</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">OSError</span><span class="p">:</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Warning:</span><span class="se">\t</span><span class="si">%s</span><span class="s"> is not empty and will not be removed.</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="n">d</span><span class="p">)</span>
+
+
+
+<span class="k">def</span> <span class="nf">cleanup_simulation_data</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> cleanup files</span>
+<span class="sd"> """</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Cleanup working directory and simulation files.</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup gene and gwas files</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gene"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gwas"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup simulation</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"*.simulation"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup working_dir</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">,</span> <span class="s">&qu [...]
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"*.mean"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">working_dir</span><span class="p">)</span>
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># Step 1:</span>
+<span class="c">#</span>
+<span class="c"># for n_file in NNN_pairs_of_input_files:</span>
+<span class="c"># for m_file in MMM_simulation_data:</span>
+<span class="c">#</span>
+<span class="c"># [n_file.gene,</span>
+<span class="c"># n_file.gwas,</span>
+<span class="c"># m_file.simulation] -> working_dir/n_file.m_file.simulation_res</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">generate_simulation_params</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> Custom function to generate</span>
+<span class="sd"> file names for gene/gwas simulation study</span>
+<span class="sd"> """</span>
+ <span class="n">simulation_files</span><span class="p">,</span> <span class="n">simulation_file_roots</span> <span class="o">=</span> <span class="n">get_simulation_files</span><span class="p">()</span>
+ <span class="n">gene_gwas_file_pairs</span><span class="p">,</span> <span class="n">gene_gwas_file_roots</span> <span class="o">=</span> <span class="n">get_gene_gwas_file_pairs</span><span class="p">()</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">sim_file</span><span class="p">,</span> <span class="n">sim_file_root</span> <span class="ow">in</span> <span class="n">izip</span><span class="p">(</span><span class="n">simulation_files</span><span class="p">,</span> <span class="n">simulation_file_roots</span><span class="p">):</span>
+ <span class="k">for</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">),</span> <span class="n">gene_file_root</span> <span class="ow">in</span> <span class="n">izip</span><span class="p">(</span><span class="n">gene_gwas_file_pairs</span><span class="p">,</span> <span class="n">gene_gwas_file_roots</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="n">result_file</span> <span class="o">=</span> <span class="s">"</span><span class="si">%s</span><span class="s">.</span><span class="si">%s</span><span class="s">.simulation_res"</span> <span class="o">%</span> <span class="p">(</span><span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><span class="p">)</span>
+ <span class="n">result_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">,</span> <span class="n">result_file</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="k">yield</span> <span class="p">[</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">,</span> <span class="n">sim_file</span><span class="p">],</span> <span class="n">result_file_path</span><span class="p">,</span> <span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><span class="p">,</span> <span class="n">result_file</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># mkdir: makes sure output directories exist before task</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">)))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">generate_simulation_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">gwas_simulation</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">result_file_path</span><span class="p">,</span> <span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><span class="p">,</span> <span class="n">result_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Dummy calculation of gene gwas vs simulation data</span>
+<span class="sd"> Normally runs in parallel on a computational cluster</span>
+<span class="sd"> """</span>
+ <span class="p">(</span><span class="n">gene_file</span><span class="p">,</span>
+ <span class="n">gwas_file</span><span class="p">,</span>
+ <span class="n">simulation_data_file</span><span class="p">)</span> <span class="o">=</span> <span class="n">input_files</span>
+ <span class="c">#</span>
+ <span class="n">simulation_res_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">result_file_path</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">simulation_res_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> + </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><spa [...]
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># Step 2:</span>
+<span class="c">#</span>
+<span class="c"># Statistical summary per gene/gwas file pair</span>
+<span class="c">#</span>
+<span class="c"># for n_file in NNN_pairs_of_input_files:</span>
+<span class="c"># working_dir/simulation_results/n.*.simulation_res</span>
+<span class="c"># -> working_dir/n.mean</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+
+<span class="nd">@collate</span><span class="p">(</span><span class="n">gwas_simulation</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"simulation_results/(\d+).\d+.simulation_res"</span><span class="p">),</span> <span class="s">r"\1.mean"</span><span class="p">)</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">OK</span><span class="se">\n</span><span class="s">"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">statistical_summary</span> <span class="p">(</span><span class="n">result_files</span><span class="p">,</span> <span class="n">summary_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Simulate statistical summary</span>
+<span class="sd"> """</span>
+ <span class="n">summary_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">summary_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">result_files</span><span class="p">:</span>
+ <span class="n">summary_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">f</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+
+
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">setup_simulation_data</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">statistical_summary</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+
+<span class="c"># uncomment to printout flowchar</span>
+<span class="c">#</span>
+<span class="c"># pipeline_printout(sys.stdout, [statistical_summary], verbose=2)</span>
+<span class="c"># graph_printout ("flowchart.jpg", "jpg", [statistical_summary])</span>
+<span class="c">#</span>
+
+<span class="n">cleanup_simulation_data</span> <span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">setup_simulation_data</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+<span class="go"> Make directories [temp_NxM/gene, temp_NxM/simulation] completed</span>
+<span class="go">Completed Task = setup_simulation_data_mkdir_1</span>
+<span class="go"> Job completed</span>
+<span class="go">Completed Task = setup_simulation_data</span>
+
+
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">statistical_summary</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+<span class="go"> Make directories [temp_NxM, temp_NxM/simulation_results] completed</span>
+<span class="go">Completed Task = gwas_simulation_mkdir_1</span>
+<span class="go"> Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/001.000.simulation_res, 001, 000, 001.000.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/000.000.simulation_res, 000, 000, 000.000.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/001.001.simulation_res, 001, 001, 001.001.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/000.001.simulation_res, 000, 001, 000.001.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/000.002.simulation_res, 000, 002, 000.002.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/001.002.simulation_res, 001, 002, 001.002.simulation_res] completed</span>
+<span class="go">Completed Task = gwas_simulation</span>
+<span class="go"> Job = [[temp_NxM/simulation_results/000.000.simulation_res, temp_NxM/simulation_results/000.001.simulation_res, temp_NxM/simulation_results/000.002.simulation_res] -> temp_NxM/000.mean] completed</span>
+<span class="go"> Job = [[temp_NxM/simulation_results/001.000.simulation_res, temp_NxM/simulation_results/001.001.simulation_res, temp_NxM/simulation_results/001.002.simulation_res] -> temp_NxM/001.mean] completed</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Chapter 10: Generating parameters on the fly</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="logging_code.html"
+ title="previous chapter">Code for Chapter 16: Logging progress through a pipeline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform_code.html"
+ title="next chapter">Code for Chapter 6: Applying the same recipe to create many different files</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/onthefly_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform_code.html" title="Code for Chapter 6: Applying the same recipe to create many different files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="logging_code.html" title="Code for Chapter 16: Logging progress through a pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/parallel.html b/doc/_build/html/tutorials/manual/parallel.html
new file mode 100644
index 0000000..64bca9c
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/parallel.html
@@ -0,0 +1,248 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 16: Esoteric: Running jobs in parallel without using files with @parallel — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 17: Writing custom functions to decide which jobs are up to date" href="check_if_uptodate.html" />
+ <link rel="prev" title="Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform" href="advanced_transform.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 17: Writing custom functions to decide which jobs are up to date"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="advanced_transform.html" title="Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-parallel-chapter-num-esoteric-running-jobs-in-parallel-without-using-files-with-parallel">
+<span id="manual-parallel"></span><h1><strong>Chapter 16</strong>: <cite>Esoteric: Running jobs in parallel without using files with</cite> <strong>@parallel</strong><a class="headerlink" href="#manual-parallel-chapter-num-esoteric-running-jobs-in-parallel-without-using-files-with-parallel" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/parallel.html#decorators-parallel"><em>@parallel</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+</div></blockquote>
+<div class="section" id="parallel">
+<span id="index-0"></span><h2><strong>@parallel</strong><a class="headerlink" href="#parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>@parallel</strong> supplies parameters for multiple <strong>jobs</strong> exactly like <a class="reference internal" href="files.html#manual-files"><em>@files</em></a> except that:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first two parameters are not treated like <em>inputs</em> and <em>ouputs</em> parameters,
+and strings are not assumed to be file names</li>
+<li>Thus no checking of whether each job is up-to-date is made using <em>inputs</em> and <em>outputs</em> files</li>
+<li>No expansions of <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns or <em>output</em> from previous tasks is carried out.</li>
+</ol>
+</div></blockquote>
+<p>This syntax is most useful when a pipeline stage does not involve creating or consuming any files, and
+you wish to forego the conveniences of <a class="reference internal" href="files.html#manual-files"><em>@files</em></a>, <a class="reference internal" href="transform.html#manual-transform"><em>@transform</em></a> etc.</p>
+<p>The following code performs some arithmetic in parallel:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">sys</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'A'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'B'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'C'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+<span class="nd">@parallel</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_task</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">param1</span><span class="p">,</span> <span class="n">param2</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">" Parallel task </span><span class="si">%s</span><span class="s">: "</span> <span class="o">%</span> <span class="n">name</span><span class="p">)</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s"> + </span><span class="si">%d</span><span class="s"> = </span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">param1</span><span class="p">,</span> <span class="n" [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>produces the following:</p>
+<div class="highlight-python"><pre>Task = parallel_task
+ Parallel task A: 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ Parallel task B: 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ Parallel task C: 5 + 6 = 11
+ Job = ["C", 5, 6] completed</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 16</strong>: <cite>Esoteric: Running jobs in parallel without using files with</cite> <strong>@parallel</strong></a><ul>
+<li><a class="reference internal" href="#parallel"><strong>@parallel</strong></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="advanced_transform.html"
+ title="previous chapter"><strong>Chapter 15</strong>: <strong>add_inputs()</strong> <cite>and</cite> <strong>inputs()</strong>: <cite>Controlling both input and output files with</cite> <strong>@transform</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="check_if_uptodate.html"
+ title="next chapter"><strong>Chapter 17</strong>: <cite>Writing custom functions to decide which jobs are up to date</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/parallel.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 17: Writing custom functions to decide which jobs are up to date"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="advanced_transform.html" title="Chapter 15: add_inputs() and inputs(): Controlling both input and output files with @transform"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/parallel_processing.html b/doc/_build/html/tutorials/manual/parallel_processing.html
new file mode 100644
index 0000000..3b936e1
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/parallel_processing.html
@@ -0,0 +1,239 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 6: Running Tasks and Jobs in parallel — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 7: Splitting up large tasks / files with @split" href="split.html" />
+ <link rel="prev" title="Chapter 5: Tracing pipeline parameters" href="tracing_pipeline_parameters.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 7: Splitting up large tasks / files with @split"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="tracing_pipeline_parameters.html" title="Chapter 5: Tracing pipeline parameters"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-multiprocessing-chapter-num-running-tasks-and-jobs-in-parallel">
+<span id="manual-multiprocessing"></span><h1><strong>Chapter 6</strong>: <cite>Running Tasks and Jobs in parallel</cite><a class="headerlink" href="#manual-multiprocessing-chapter-num-running-tasks-and-jobs-in-parallel" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+</ul>
+</td></tr></table>
+</div></blockquote>
+<div class="section" id="multi-processing">
+<h2>Multi Processing<a class="headerlink" href="#multi-processing" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><em>Ruffus</em> uses python <a class="reference external" href="http://docs.python.org/library/multiprocessing.html">multiprocessing</a> to run
+each job in a separate process.</p>
+<p>This means that jobs do <em>not</em> necessarily complete in the order of the defined parameters.
+Task hierachies are, of course, inviolate: upstream tasks run before downstream, dependent tasks.</p>
+<p>Tasks that are independent (i.e. do not precede each other) may be run in parallel as well.</p>
+<p>The number of concurrent jobs can be set in <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>If <tt class="docutils literal"><span class="pre">multiprocess</span></tt> is set to 1, then jobs will be run on a single process.</p>
+</div></blockquote>
+</div>
+<div class="section" id="data-sharing">
+<h2>Data sharing<a class="headerlink" href="#data-sharing" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Running jobs in separate processes allows <em>Ruffus</em> to make full use of the multiple
+processors in modern computers. However, some of the
+<a class="reference external" href="http://docs.python.org/library/multiprocessing.html#multiprocessing-programming">multiprocessing guidelines</a>
+should be borne in mind when writing <em>Ruffus</em> pipelines. In particular:</p>
+<ul class="simple">
+<li>Try not to pass large amounts of data between jobs, or at least be aware that this has to be marshalled
+across process boundaries.</li>
+<li>Only data which can be <a class="reference external" href="http://docs.python.org/library/pickle.html">pickled</a> can be passed as
+parameters to <em>Ruffus</em> task functions. Happily, that applies to almost any Python data type.
+The use of the rare, unpicklable object will cause python to complain (fail) loudly when <em>Ruffus</em> pipelines
+are run.</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 6</strong>: <cite>Running Tasks and Jobs in parallel</cite></a><ul>
+<li><a class="reference internal" href="#multi-processing">Multi Processing</a></li>
+<li><a class="reference internal" href="#data-sharing">Data sharing</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="tracing_pipeline_parameters.html"
+ title="previous chapter"><strong>Chapter 5</strong>: Tracing pipeline parameters</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="split.html"
+ title="next chapter"><strong>Chapter 7</strong>: <cite>Splitting up large tasks / files with</cite> <strong>@split</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/parallel_processing.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 7: Splitting up large tasks / files with @split"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="tracing_pipeline_parameters.html" title="Chapter 5: Tracing pipeline parameters"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/posttask.html b/doc/_build/html/tutorials/manual/posttask.html
new file mode 100644
index 0000000..5ad76df
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/posttask.html
@@ -0,0 +1,288 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 10: Signal the completion of each stage of our pipeline with @posttask — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 11: Manage concurrency for a specific task with @jobs_limit" href="jobs_limit.html" />
+ <link rel="prev" title="Chapter 9: Merge multiple input into a single result" href="merge.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="Chapter 11: Manage concurrency for a specific task with @jobs_limit"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 9: Merge multiple input into a single result"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-posttask-chapter-num-signal-the-completion-of-each-stage-of-our-pipeline-with-posttask">
+<span id="manual-posttask"></span><h1><strong>Chapter 10</strong>: <cite>Signal the completion of each stage of our pipeline with</cite> <strong>@posttask</strong><a class="headerlink" href="#manual-posttask-chapter-num-signal-the-completion-of-each-stage-of-our-pipeline-with-posttask" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+<p>It is often useful to signal the completion of each task by specifying a specific
+action to be taken or function to be called. This can range from
+printing out some message, or <tt class="docutils literal"><span class="pre">touching</span></tt> some sentinel file,
+to emailing the author.This is particular useful if the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> is a recipe apply to an unspecified number
+of parameters in parallel in different <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a>s. If the task is never run, or if it
+fails, needless-to-say no task completion action will happen.</p>
+<p><em>Ruffus</em> uses the <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> decorator for this purpose.</p>
+</div></blockquote>
+<div class="section" id="posttask">
+<span id="index-0"></span><h2><strong>@posttask</strong><a class="headerlink" href="#posttask" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>We can signal the completion of each task by specifying
+one or more function(s) using <tt class="docutils literal"><span class="pre">@posttask</span></tt></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="k">def</span> <span class="nf">task_finished</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"hooray"</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">task_finished</span><span class="p">)</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>This is such a short function, we might as well write it in-line:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"hooray</span><span class="se">\n</span><span class="s">"</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The function(s) provided to <tt class="docutils literal"><span class="pre">@posttask</span></tt> will be called if the pipeline passes
+through a task, even if none of its jobs are run because they are up-to-date.
+This happens when a upstream task is out-of-date, and the execution passes through
+this point in the pipeline. See the example in <a class="reference internal" href="dependencies.html#manual-dependencies"><em>Chapter 9</em></a>
+of this manual.</p>
+</div>
+</div>
+<div class="section" id="touch-file">
+<span id="manual-posttask-touch-file"></span><span id="index-1"></span><h2><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a><a class="headerlink" href="#touch-file" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The most common way to note the completion of a task is to create some sort of
+“flag” file. Each stage in a traditional <tt class="docutils literal"><span class="pre">make</span></tt> pipeline would contain a
+<tt class="docutils literal"><span class="pre">touch</span> <span class="pre">completed.flag</span></tt>.</p>
+<p>This is so common that <strong>Ruffus</strong> provides a special shorthand called
+<a class="reference internal" href="../../decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">touch_file</span><span class="p">(</span><span class="s">"task_completed.flag"</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="adding-several-post-task-actions">
+<h2>Adding several post task actions<a class="headerlink" href="#adding-several-post-task-actions" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You can, of course, add more than one different action to be taken on completion of the
+task, either by stacking up as many <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> decorators
+as necessary, or by including several functions in the same <strong>@posttask</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="n">print_hooray</span><span class="p">,</span> <span class="n">print_whoppee</span><span class="p">)</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">print_hip_hip</span><span class="p">,</span> <span class="n">touch_file</span><span class="p">(</span><span class="s">"sentinel_flag"</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">your_pipeline_function</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 10</strong>: <cite>Signal the completion of each stage of our pipeline with</cite> <strong>@posttask</strong></a><ul>
+<li><a class="reference internal" href="#posttask"><strong>@posttask</strong></a></li>
+<li><a class="reference internal" href="#touch-file"><tt class="docutils literal"><span class="pre">touch_file</span></tt></a></li>
+<li><a class="reference internal" href="#adding-several-post-task-actions">Adding several post task actions</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="merge.html"
+ title="previous chapter"><strong>Chapter 9</strong>: <strong>Merge</strong> <cite>multiple input into a single result</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="jobs_limit.html"
+ title="next chapter"><strong>Chapter 11</strong>: <cite>Manage concurrency for a specific task with</cite> <strong>@jobs_limit</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/posttask.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="jobs_limit.html" title="Chapter 11: Manage concurrency for a specific task with @jobs_limit"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 9: Merge multiple input into a single result"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/split.html b/doc/_build/html/tutorials/manual/split.html
new file mode 100644
index 0000000..3359bac
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/split.html
@@ -0,0 +1,352 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 7: Splitting up large tasks / files with @split — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 8: Applying the same recipe to create many different files with @transform" href="transform.html" />
+ <link rel="prev" title="Chapter 6: Running Tasks and Jobs in parallel" href="parallel_processing.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 8: Applying the same recipe to create many different files with @transform"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="parallel_processing.html" title="Chapter 6: Running Tasks and Jobs in parallel"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-split-chapter-num-splitting-up-large-tasks-files-with-split">
+<span id="manual-split"></span><h1><strong>Chapter 7</strong>: <cite>Splitting up large tasks / files with</cite> <strong>@split</strong><a class="headerlink" href="#manual-split-chapter-num-splitting-up-large-tasks-files-with-split" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+<p>A common requirement in computational pipelines is to split up a large task into
+small jobs which can be run on different processors, (or sent to a computational
+cluster). Very often, the number of jobs depends dynamically on the size of the
+task, and cannot be known for sure beforehand.</p>
+<p><em>Ruffus</em> uses the <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> decorator to indicate that
+the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function will produce an indeterminate number of output files.</p>
+</div></blockquote>
+<div class="section" id="split">
+<span id="index-0"></span><h2><strong>@split</strong><a class="headerlink" href="#split" title="Permalink to this headline">¶</a></h2>
+<p>This example is borrowed from <a class="reference internal" href="../simple_tutorial/step5_split.html#simple-tutorial-5th-step"><em>step 4</em></a> of the simple tutorial.</p>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">See <a class="reference internal" href="../simple_tutorial/step5_split_code.html#simple-tutorial-5th-step-code"><em>accompanying Python Code</em></a></p>
+</div>
+</div></blockquote>
+<div class="section" id="splitting-up-a-long-list-of-random-numbers-to-calculate-their-variance">
+<h3>Splitting up a long list of random numbers to calculate their variance<a class="headerlink" href="#splitting-up-a-long-list-of-random-numbers-to-calculate-their-variance" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><table border="1" class="borderless docutils">
+<colgroup>
+<col width="1%" />
+<col width="99%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><p class="first centered">
+<strong>Step 5 from the tutorial:</strong></p><img alt="../../_images/simple_tutorial_step5_sans_key.png" class="last" src="../../_images/simple_tutorial_step5_sans_key.png" />
+</td>
+<td><blockquote class="first last">
+<div><p>Suppose we had a list of 100,000 random numbers in the file <tt class="docutils literal"><span class="pre">random_numbers.list</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">random</span>
+<span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">'random_numbers.list'</span><span class="p">,</span> <span class="s">'w'</span><span class="p">)</span>
+<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">'</span><span class="si">%g</span><span class="se">\n</span><span class="s">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>We might want to calculate the sample variance more quickly by splitting them
+into <tt class="docutils literal"><span class="pre">NNN</span></tt> parcels of 1000 numbers each and working on them in parallel.
+In this case we known that <tt class="docutils literal"><span class="pre">NNN</span> <span class="pre">==</span> <span class="pre">100</span></tt> but usually the number of resulting files
+is only apparent after we have finished processing our starting file.</p>
+</div></blockquote>
+</td>
+</tr>
+</tbody>
+</table>
+<p>Our pipeline function needs to take the random numbers file <tt class="docutils literal"><span class="pre">random_numbers.list</span></tt>,
+read the random numbers from it, and write to a new file every 100 lines.</p>
+<p>The <em>Ruffus</em> decorator <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is designed specifically for
+splitting up <em>inputs</em> into an indeterminate <tt class="docutils literal"><span class="pre">NNN</span></tt> number of <em>outputs</em>:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_split.png" src="../../_images/simple_tutorial_split.png" />
+</div></blockquote>
+<p>Ruffus will set</p>
+<blockquote>
+<div><div class="line-block">
+<div class="line"><tt class="docutils literal"><span class="pre">input_file_name</span></tt> to <tt class="docutils literal"><span class="pre">"random_numbers.list"</span></tt></div>
+<div class="line"><tt class="docutils literal"><span class="pre">output_files</span></tt> to all files which match <tt class="docutils literal"><span class="pre">*.chunks</span></tt> (i.e. <tt class="docutils literal"><span class="pre">"1.chunks"</span></tt>, <tt class="docutils literal"><span class="pre">"2.chunks"</span></tt> etc.).</div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="output-files">
+<span id="manual-split-output-files"></span><h2>Output files<a class="headerlink" href="#output-files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <em>output</em> (second) parameter of <strong>@split</strong> usually contains a
+<a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern like the <tt class="docutils literal"><span class="pre">*.chunks</span></tt> above.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><strong>Ruffus</strong> is quite relaxed about the contents of the <tt class="docutils literal"><span class="pre">output</span></tt> parameter.
+Strings are treated as file names. Strings containing <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern are expanded.
+Other types are passed verbatim to the decorated task function.</p>
+</div>
+<p>The files which match the <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> will be passed as the actual parameters to the job
+function. Thus, the first time you run the example code <tt class="docutils literal"><span class="pre">*.chunks</span></tt> will return an empty list because
+no <tt class="docutils literal"><span class="pre">.chunks</span></tt> files have been created, resulting in the following:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">step_4_split_numbers_into_chunks</span> <span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">,</span> <span class="p">[])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>After that <tt class="docutils literal"><span class="pre">*.chunks</span></tt> will match the list of current <tt class="docutils literal"><span class="pre">.chunks</span></tt> files created by
+the previous pipeline run.</p>
+<p>File names in <em>output</em> are generally out of date or superfluous. They are useful
+mainly for cleaning-up detritus from previous runs
+(have a look at <a class="reference internal" href="../simple_tutorial/step5_split_code.html#simple-tutorial-5th-step-code"><em>step_4_split_numbers_into_chunks(...)</em></a>).</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>It is important, nevertheless, to specify correctly the list of <em>output</em> files.
+Otherwise, dependent tasks will not know what files you have created, and it will
+not be possible automatically to chain together the <em>ouput</em> of this pipeline task into the
+<em>inputs</em> of the next step.</p>
+<p>You can specify multiple <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns to match <em>all</em> the files which are the
+result of the splitting task function. These can even cover different directories,
+or groups of file names. This is a more extreme example:</p>
+<blockquote class="last">
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"input.file"</span><span class="p">,</span> <span class="p">[</span><span class="s">'a*.bits'</span><span class="p">,</span> <span class="s">'b*.pieces'</span><span class="p">,</span> <span class="s">'somewhere_else/c*.stuff'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">split_function</span> <span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="s">"Code to split up 'input.file'"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<p>The actual resulting files of this task function are not constrained by the file names
+in the <em>output</em> parameter of the function. The whole point of <strong>@split</strong> is that number
+of resulting output files cannot be known beforehand, after all.</p>
+</div></blockquote>
+<div class="section" id="example">
+<h3>Example<a class="headerlink" href="#example" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><dl class="docutils">
+<dt>Suppose random_numbers.list can be split into four pieces, this function will create</dt>
+<dd><tt class="docutils literal"><span class="pre">1.chunks</span></tt>, <tt class="docutils literal"><span class="pre">2.chunks</span></tt>, <tt class="docutils literal"><span class="pre">3.chunks</span></tt>, <tt class="docutils literal"><span class="pre">4.chunks</span></tt></dd>
+</dl>
+<p>Subsequently, we receive a larger <tt class="docutils literal"><span class="pre">random_numbers.list</span></tt> which should be split into 10
+pieces. If the pipeline is called again, the task function receives the following parameters:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">step_4_split_numbers_into_chunks</span><span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">,</span>
+ <span class="p">[</span><span class="s">"1.chunks"</span><span class="p">,</span> <span class="c"># previously created files</span>
+ <span class="s">"2.chunks"</span><span class="p">,</span> <span class="c">#</span>
+ <span class="s">"3.chunks"</span><span class="p">,</span> <span class="c">#</span>
+ <span class="s">"4.chunks"</span> <span class="p">])</span> <span class="c">#</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This doesn’t stop the function from creating the extra <tt class="docutils literal"><span class="pre">5.chunks</span></tt>, <tt class="docutils literal"><span class="pre">6.chunks</span></tt> etc.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Any tasks <strong>@follow</strong>ing and specifying
+<tt class="docutils literal"><span class="pre">step_4_split_numbers_into_chunks(...)</span></tt> as its <em>inputs</em> parameter is going to receive
+<tt class="docutils literal"><span class="pre">1.chunks</span></tt>, <tt class="docutils literal"><span class="pre">...</span></tt>, <tt class="docutils literal"><span class="pre">10.chunks</span></tt> and not merely the first four files.</p>
+<p class="last">In other words, dependent / down-stream tasks which obtain output files automatically
+from the task decorated by <strong>@split</strong> receive the most current file list.
+The <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns will be matched again to see exactly what files the task function
+has created in reality <em>after</em> the task completes.</p>
+</div>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 7</strong>: <cite>Splitting up large tasks / files with</cite> <strong>@split</strong></a><ul>
+<li><a class="reference internal" href="#split"><strong>@split</strong></a><ul>
+<li><a class="reference internal" href="#splitting-up-a-long-list-of-random-numbers-to-calculate-their-variance">Splitting up a long list of random numbers to calculate their variance</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#output-files">Output files</a><ul>
+<li><a class="reference internal" href="#example">Example</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="parallel_processing.html"
+ title="previous chapter"><strong>Chapter 6</strong>: <cite>Running Tasks and Jobs in parallel</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform.html"
+ title="next chapter"><strong>Chapter 8</strong>: <cite>Applying the same recipe to create many different files with</cite> <strong>@transform</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/split.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 8: Applying the same recipe to create many different files with @transform"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="parallel_processing.html" title="Chapter 6: Running Tasks and Jobs in parallel"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/tasks_and_globs_in_inputs.html b/doc/_build/html/tutorials/manual/tasks_and_globs_in_inputs.html
new file mode 100644
index 0000000..9db57ae
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/tasks_and_globs_in_inputs.html
@@ -0,0 +1,418 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 4: Chaining pipeline Tasks together automatically — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 5: Tracing pipeline parameters" href="tracing_pipeline_parameters.html" />
+ <link rel="prev" title="Chapter 3: Passing parameters to the pipeline with @files" href="files.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="tracing_pipeline_parameters.html" title="Chapter 5: Tracing pipeline parameters"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="files.html" title="Chapter 3: Passing parameters to the pipeline with @files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-tasks-and-globs-in-inputs-chapter-num-chaining-pipeline-tasks-together-automatically">
+<span id="manual-tasks-and-globs-in-inputs"></span><h1><strong>Chapter 4</strong>: Chaining pipeline <cite>Tasks</cite> together automatically<a class="headerlink" href="#manual-tasks-and-globs-in-inputs-chapter-num-chaining-pipeline-tasks-together-automatically" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+<p id="index-0">In the previous chapter, we explained that <strong>ruffus</strong> determines the data flow through
+your pipeline by calling your <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions (normal python functions written
+by you) with the right parameters at the right time, making sure that</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>only out-of-date parts of the pipeline will be re-run</li>
+<li>multiple jobs can be run in parallel (on different processors if possible)</li>
+<li>pipeline stages can be chained together automatically</li>
+</ol>
+</div></blockquote>
+<p>This chapter is devoted to the last item: how the output of one stage of the pipeline
+is piped into as the input of the next stage.</p>
+</div></blockquote>
+<span class="target" id="manual-tasks-as-input"></span><div class="section" id="tasks-in-the-inputs-parameters-implicit-dependencies">
+<span id="index-1"></span><h2>Tasks in the <em>inputs</em> parameters: Implicit dependencies<a class="headerlink" href="#tasks-in-the-inputs-parameters-implicit-dependencies" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Ruffus</strong> treats the first two parameters of each job in each task as the <em>inputs</em> and
+<em>outputs</em> parameters respectively. If the <em>inputs</em> parameter contains strings, these
+will be treated as the names of files required by that job.</p>
+<p>If the <em>inputs</em> parameter contains any <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>s, <strong>Ruffus</strong> will take the output
+from these specified tasks as part of the current <em>inputs</em> parameter. In addition,
+such tasks will be listed as prequisites, much as if you had included them in a
+separate <tt class="docutils literal"><span class="pre">@follows</span></tt> decorator.</p>
+<p>For example, supposed we wanted to take the output files from <tt class="docutils literal"><span class="pre">task1</span></tt> and feed
+them automatically to <tt class="docutils literal"><span class="pre">task2</span></tt>, we might write the following code</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">task1_ouput_files</span> <span class="o">=</span> <span class="p">(</span><span class="s">"task1.output_a"</span><span class="p">,</span> <span class="s">"task1.output_b"</span><span class="p">,</span> <span class="s">"task1.output_c"</span><span class="p">)</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">task1</span><span class="p">)</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">task1_ouput_files</span><span class="p">,</span> <span class="s">"task2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This can be replaced by the much more concise syntax:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@files</span><span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="s">"task2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>This means:</dt>
+<dd><ul class="first last simple">
+<li>Take the output from <tt class="docutils literal"><span class="pre">task1</span></tt>, and feed it automatically into <tt class="docutils literal"><span class="pre">task2</span></tt>.</li>
+<li>Also make sure that <tt class="docutils literal"><span class="pre">task2</span></tt> becomes a dependency of <tt class="docutils literal"><span class="pre">task1</span></tt>.</li>
+</ul>
+</dd>
+</dl>
+<p>In other words, <tt class="docutils literal"><span class="pre">task1</span></tt> and <tt class="docutils literal"><span class="pre">task2</span></tt> have been chained together automatically.
+This is both a great convenience and makes the flow of data through a pipeline much clearer.</p>
+</div></blockquote>
+</div>
+<div class="section" id="refering-to-tasks-by-name-in-the-inputs-parameters">
+<span id="manual-output-from"></span><span id="index-2"></span><h2>Refering to tasks by name in the <em>inputs</em> parameters<a class="headerlink" href="#refering-to-tasks-by-name-in-the-inputs-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="follows.html#manual-follows-out-of-order"><em>Chapter 1</em></a> explains that task functions can be
+defined in any order so long as undefined tasks are referred to by their (fully qualified if
+necessary) function name string.</p>
+<p>You can similarly refer to tasks in the <em>inputs</em> parameter by name, as a text string.
+Normally <strong>Ruffus</strong> assumes that strings are file names. To indicate that that
+you are referring to task function names instead, you need to
+wrap the relevant parameter or (nested) parts of the parameter with the indicator object
+<a class="reference internal" href="../../decorators/indicator_objects.html#decorators-output-from"><em>output_from(“task_name”)</em></a>. Thus,</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">([</span><span class="s">"a.file"</span><span class="p">,</span> <span class="p">(</span><span class="s">"b.file"</span><span class="p">,</span> <span class="n">output_from</span><span class="p">(</span><span class="s">"task1"</span><span class="p">,</span> <span class="mi">76</span><span class="p">,</span> <span class="s">"task2"</span><spa [...]
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>is equivalent to:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">([</span><span class="s">"a.file"</span><span class="p">,</span> <span class="p">(</span><span class="s">"b.file"</span><span class="p">,</span> <span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="mi">76</span><span class="p">,</span> <span class="n">task2</span><span class="p">))],</span> <span class="s">"*.split" [...]
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="globs-in-the-inputs-parameters">
+<span id="manual-globs-as-input"></span><span id="index-3"></span><h2>Globs in the <em>inputs</em> parameters<a class="headerlink" href="#globs-in-the-inputs-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>As a syntactic convenience, <strong>Ruffus</strong> also allows you to specify a
+<a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern (e.g. <tt class="docutils literal"><span class="pre">*.txt</span></tt>) in the
+<em>input</em> parameter, it will be expanded automatically to the actually matching
+file names. This applies to any strings within <em>inputs</em> which contain the letters: <tt class="docutils literal"><span class="pre">*?[]</span></tt>.</div></blockquote>
+</div>
+<div class="section" id="mixing-globs-tasks-and-files-as-inputs">
+<span id="manual-mixing-tasks-globs-files"></span><span id="index-4"></span><h2>Mixing globs, tasks and files as <strong>inputs</strong><a class="headerlink" href="#mixing-globs-tasks-and-files-as-inputs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Ruffus</strong> is very flexible in allowing you to mix
+<a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns, references to tasks and file names
+in the data structures you pass as the <strong>inputs</strong> parameters.</p>
+<p>Suppose, in the previous example,</p>
+<blockquote>
+<div><ul>
+<li><dl class="first docutils">
+<dt>that <tt class="docutils literal"><span class="pre">task1</span></tt> produces the files</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="s">"task1.output_a"</span>
+<span class="s">"task1.output_b"</span>
+<span class="s">"task1.output_c"</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+<li><dl class="first docutils">
+<dt>that the following additional files are also present</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="s">"extra.a"</span>
+<span class="s">"extra.c"</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</li>
+</ul>
+</div></blockquote>
+<p>Then,</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@files</span><span class="p">([</span><span class="s">"1_more.file"</span><span class="p">,</span> <span class="s">"2_more.file"</span><span class="p">,</span> <span class="n">task1</span><span class="p">,</span> <span class="s">"extra.*"</span><span class="p">],</span> <span class="s">"task2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>would result in the combination of the specified file name, the expansion of the <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>,
+and the results from the previous task:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nb">input</span> <span class="o">==</span> <span class="p">[</span>
+ <span class="s">"1_more.file"</span> <span class="p">,</span> <span class="c"># specified file</span>
+ <span class="s">"2_more.file"</span> <span class="p">,</span> <span class="c"># specified file</span>
+ <span class="s">"task1.output_a"</span><span class="p">,</span> <span class="c"># from previous task</span>
+ <span class="s">"task1.output_b"</span><span class="p">,</span> <span class="c"># from previous task</span>
+ <span class="s">"task1.output_c"</span><span class="p">,</span> <span class="c"># from previous task</span>
+ <span class="s">"extra.a"</span> <span class="p">,</span> <span class="c"># from glob expansion</span>
+ <span class="s">"extra.c"</span> <span class="p">,</span> <span class="c"># from glob expansion</span>
+ <span class="p">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>In other words, <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns and tasks are expanded “in place” when they are part of
+python lists, sets, or tuples.</p>
+</div></blockquote>
+</div>
+<div class="section" id="appending-globs-or-tasks-to-pre-existing-lists-sets-or-tuples">
+<span id="manual-appending-tasks-globs-to-lists-sets-tuples"></span><h2>Appending globs or tasks to pre-existing lists, sets or tuples<a class="headerlink" href="#appending-globs-or-tasks-to-pre-existing-lists-sets-or-tuples" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Sometimes we want to the <em>inputs</em> parameter to contain be a combination of <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>s and tasks,
+and an existing list of file names.</p>
+<p>To elaborate on the above example, suppose we have a list of files:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">file_list</span> <span class="o">=</span> <span class="p">[</span> <span class="s">"1_more.file"</span><span class="p">,</span>
+ <span class="s">"2_more.file"</span><span class="p">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Now we want the input to <tt class="docutils literal"><span class="pre">task2</span></tt> to be:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">file_list</span> <span class="o">+</span> <span class="n">task1</span> <span class="o">+</span> <span class="s">"extra.*"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The closest that we can express this in python syntax is by turning task1 and the <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a>
+to a list first then adding them together:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@files</span><span class="p">(</span><span class="n">file_list</span> <span class="o">+</span> <span class="p">[</span><span class="n">task1</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="s">"extra.*"</span><span class="p">],</span> <span class="s">"task2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The same also works with tuples:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">file_list</span> <span class="o">=</span> <span class="p">(</span> <span class="s">"1_more.file"</span><span class="p">,</span>
+ <span class="s">"2_more.file"</span><span class="p">)</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">file_list</span> <span class="o">+</span> <span class="p">(</span><span class="n">task1</span><span class="p">,</span> <span class="s">"extra.*"</span><span class="p">),</span> <span class="s">"task2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>and sets (using the set concatenation operator):</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">file_list</span> <span class="o">=</span> <span class="nb">set</span><span class="p">([</span> <span class="s">"1_more.file"</span><span class="p">,</span>
+ <span class="s">"2_more.file"</span><span class="p">])</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">file_list</span> <span class="o">|</span> <span class="nb">set</span><span class="p">([</span><span class="n">task1</span> <span class="o">+</span> <span class="s">"extra.*"</span><span class="p">]),</span> <span class="s">"task2.output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task2</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="understanding-complex-inputs-and-outputs-parameters">
+<span id="manual-understanding-complex-inputs"></span><h2>Understanding complex <em>inputs</em> and <em>outputs</em> parameters<a class="headerlink" href="#understanding-complex-inputs-and-outputs-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>In all cases, <strong>Ruffus</strong> tries to do the right thing, and to make the simple or
+obvious case require the simplest, least onerous syntax.</p>
+<p>If sometimes <strong>Ruffus</strong> does not behave the way you expect, please write to the authors:
+it may be a bug!</p>
+<p>In all other cases, the best thing to do, is write your <strong>Ruffus</strong> specifications, and
+check the results of <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout</em></a>
+to make sure that your wishes are properly
+reflected in the parameters sent to your pipelined tasks.</p>
+<p>In other words, read the <a class="reference internal" href="tracing_pipeline_parameters.html#manual-tracing-pipeline-parameters"><em>next chapter</em></a>!</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 4</strong>: Chaining pipeline <cite>Tasks</cite> together automatically</a><ul>
+<li><a class="reference internal" href="#tasks-in-the-inputs-parameters-implicit-dependencies">Tasks in the <em>inputs</em> parameters: Implicit dependencies</a></li>
+<li><a class="reference internal" href="#refering-to-tasks-by-name-in-the-inputs-parameters">Refering to tasks by name in the <em>inputs</em> parameters</a></li>
+<li><a class="reference internal" href="#globs-in-the-inputs-parameters">Globs in the <em>inputs</em> parameters</a></li>
+<li><a class="reference internal" href="#mixing-globs-tasks-and-files-as-inputs">Mixing globs, tasks and files as <strong>inputs</strong></a></li>
+<li><a class="reference internal" href="#appending-globs-or-tasks-to-pre-existing-lists-sets-or-tuples">Appending globs or tasks to pre-existing lists, sets or tuples</a></li>
+<li><a class="reference internal" href="#understanding-complex-inputs-and-outputs-parameters">Understanding complex <em>inputs</em> and <em>outputs</em> parameters</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="files.html"
+ title="previous chapter"><strong>Chapter 3</strong>: <cite>Passing parameters to the pipeline with</cite> <strong>@files</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="tracing_pipeline_parameters.html"
+ title="next chapter"><strong>Chapter 5</strong>: Tracing pipeline parameters</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/tasks_and_globs_in_inputs.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="tracing_pipeline_parameters.html" title="Chapter 5: Tracing pipeline parameters"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="files.html" title="Chapter 3: Passing parameters to the pipeline with @files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/tasks_as_recipes.html b/doc/_build/html/tutorials/manual/tasks_as_recipes.html
new file mode 100644
index 0000000..b50a914
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/tasks_as_recipes.html
@@ -0,0 +1,353 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 2: Tasks and Recipes — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 3: Passing parameters to the pipeline with @files" href="files.html" />
+ <link rel="prev" title="Chapter 1 : Arranging tasks into a pipeline with @follows" href="follows.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="files.html" title="Chapter 3: Passing parameters to the pipeline with @files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="follows.html" title="Chapter 1 : Arranging tasks into a pipeline with @follows"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-tasks-as-recipes-chapter-num-tasks-and-recipes">
+<span id="manual-tasks-as-recipes"></span><h1><strong>Chapter 2</strong>: <cite>Tasks and Recipes</cite><a class="headerlink" href="#manual-tasks-as-recipes-chapter-num-tasks-and-recipes" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+<div class="line-block" id="index-0">
+<div class="line">The python functions which do the actual work of each stage or
+<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> of a <strong>Ruffus</strong> pipeline are written by you.</div>
+<div class="line">The role of <strong>Ruffus</strong> is to make sure these functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if desired.</div>
+</div>
+<p><strong>Ruffus</strong> manages the data flowing through your pipeline by supplying the correct
+parameters to your pipeline functions. In this way, you will get the following features
+for free:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>only out-of-date parts of the pipeline will be re-run</li>
+<li>multiple jobs can be run in parallel (on different processors if possible)</li>
+<li>pipeline stages can be chained together automatically</li>
+</ol>
+</div></blockquote>
+<p>Much of the functionality of <strong>ruffus</strong> involves determining the data flow through
+your pipeline, by governing how the output of one stage of the pipeline is supplied
+as parameters to the functions of the next.</p>
+</div></blockquote>
+<div class="section" id="skip-jobs-which-are-up-to-date">
+<span id="manual-skip-up-to-date"></span><span id="index-1"></span><h2>Skip jobs which are up to date<a class="headerlink" href="#skip-jobs-which-are-up-to-date" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Very often it will necessary to re-run a computational pipeline, because part of the
+data has changed. <strong>Ruffus</strong> will run only those stages of the pipeline
+which are absolutely necessary.</p>
+<p>By default, <strong>Ruffus</strong> uses file modification times to see which parts of the pipeline
+are out of date, and which <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>s need to be run again. This is so convenient that
+even if a pipeline is not file-based (if it, for example, uses database tables instead),
+it may be worth while to use dummy, “sentinel” files to manage the stages of a pipeline.</p>
+<p>(It is also possible, as we shall
+see later, to add custom functions to determine which parts of the pipeline are out
+of date. see <a class="reference internal" href="../../decorators/parallel.html#decorators-parallel"><em>@parallel</em></a> and
+<a class="reference internal" href="../../decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a>.)</p>
+</div></blockquote>
+<span class="target" id="manual-io-parameters"><span id="index-2"></span></span></div>
+<div class="section" id="inputs-and-outputs-parameters">
+<span id="index-3"></span><h2><em>Inputs</em> and <em>Outputs</em> parameters<a class="headerlink" href="#inputs-and-outputs-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Ruffus</strong> treats the first two parameters of each job in each task as the <em>inputs</em> and
+<em>outputs</em> parameters respectively. If these parameters are strings, or are sequences
+which contain strings, these will be treated as the names of files required by and
+produced by that job. The presence and modification times of the <em>inputs</em> and <em>outputs</em> files
+will be used to check if it is necessry to rerun the job.</p>
+<p>Apart from this, <strong>Ruffus</strong> imposes no other restrictions on the parameters for jobs, which
+are passed verbatim to task functions.</p>
+<p>Most of the time, it is sensible to stick with file names (strings) in the <em>inputs</em> and
+<em>outputs</em> parameters but <strong>Ruffus</strong> does not try to second-guess what sort of data you
+will be passing through your pipelines (except that strings represent file names).</p>
+<p>Thus, given the following over-elaborate parameters (parameter passing will be discussed in
+more detail from <a class="reference internal" href="files.html#manual-files"><em>|manual.files.chapter_num|</em></a>):</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="p">[</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s">"afile.name"</span><span class="p">,</span> <span class="p">(</span><span class="s">"bfile.name"</span><span class="p">,</span> <span class="mi">72</span><span class="p">)],</span>
+ <span class="p">[[</span><span class="mi">56</span><span class="p">,</span> <span class="mf">3.3</span><span class="p">],</span> <span class="nb">set</span><span class="p">(</span><span class="n">custom_object</span><span class="p">(),</span> <span class="s">"output.file"</span><span class="p">)],</span>
+ <span class="mf">33.3</span><span class="p">,</span>
+ <span class="s">"oops"</span><span class="p">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This will be passed <cite>“as is”</cite> to your task function:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">do_something</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s">"afile.name"</span><span class="p">,</span> <span class="p">(</span><span class="s">"bfile.name"</span><span class="p">,</span> <span class="mi">72</span><span class="p">)],</span> <span class="c"># input</span>
+ <span class="p">[[</span><span class="mi">56</span><span class="p">,</span> <span class="mf">3.3</span><span class="p">],</span> <span class="nb">set</span><span class="p">(</span><span class="n">custom_object</span><span class="p">(),</span> <span class="s">"output.file"</span><span class="p">)],</span> <span class="c"># output</span>
+ <span class="mf">33.3</span><span class="p">,</span> <span class="c"># extra parameter</span>
+ <span class="s">"oops"</span><span class="p">)</span> <span class="c"># extra parameter</span>
+</pre></div>
+</div>
+<p><strong>Ruffus</strong> will interprete this as:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">Input_parameter</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s">"afile.name"</span><span class="p">,</span> <span class="p">(</span><span class="s">"bfile.name"</span><span class="p">,</span> <span class="mi">72</span><span class="p">)]</span>
+<span class="n">Output_parameter</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">56</span><span class="p">,</span> <span class="mf">3.3</span><span class="p">],</span> <span class="nb">set</span><span class="p">(</span><span class="n">custom_object</span><span class="p">(),</span> <span class="s">"output.file"</span><span class="p">)]</span>
+<span class="n">Other_parameter_1</span> <span class="o">=</span> <span class="mf">33.3</span>
+<span class="n">Other_parameter_2</span> <span class="o">=</span> <span class="s">"oops"</span>
+</pre></div>
+</div>
+<p><strong>Ruffus</strong> disregards the <em>structure</em> of your data, only identifying the (nested) strings.
+Thus there are 2 input files:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="s">"afile.name"</span>
+<span class="s">"bfile.name"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>and 1 output file:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="s">"output.file"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="checking-if-files-are-up-to-date">
+<span id="manual-skip-up-to-date-rules"></span><span id="index-4"></span><h2>Checking if files are up to date<a class="headerlink" href="#checking-if-files-are-up-to-date" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The following simple rules are used by <strong>Ruffus</strong>.</p>
+<ol class="arabic">
+<li><p class="first">The pipeline stage will be rerun if:</p>
+<blockquote>
+<div><ul class="simple">
+<li>If any of the <em>inputs</em> files are new (newer than the <em>output</em> files)</li>
+<li>If any of the <em>output</em> files are missing</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">In addition, it is possible to run jobs which create files from scratch.</p>
+<blockquote>
+<div><ul class="simple">
+<li>If no <em>inputs</em> file names are supplied, the job will only run if any <em>output</em> file is missing.</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">Finally, if no <em>outputs</em> file names are supplied, the job will always run.</p>
+</li>
+</ol>
+<p>The <a class="reference internal" href="files.html#manual-files-example"><em>example</em></a> in the next chapter shows how this works in practice.</p>
+</div></blockquote>
+</div>
+<div class="section" id="missing-files">
+<span id="index-5"></span><h2>Missing files<a class="headerlink" href="#missing-files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If the <em>inputs</em> files for a job are missing, the task function will have no way
+to produce its <em>output</em>. In this case, a <tt class="docutils literal"><span class="pre">MissingInputFileError</span></tt> exception will be raised
+automatically. For example,</p>
+<blockquote>
+<div><div class="highlight-python"><pre>task.MissingInputFileError: No way to run job: Input file ['a.1'] does not exist
+for Job = ["a.1" -> "a.2", "A file"]</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="caveats-timestamp-resolution">
+<span id="index-6"></span><h2>Caveats: Timestamp resolution<a class="headerlink" href="#caveats-timestamp-resolution" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="line-block">
+<div class="line">Note that modification times have precision to the nearest second under some older file systems
+(ext2/ext3?). This may be also be true for networked file systems.</div>
+<div class="line"><strong>Ruffus</strong> is very conservative, and assumes that files with <em>exactly</em> the same date stamp might have been
+created in the wrong order, and will treat the job as out-of-date. This would result in some
+jobs re-running unnecessarily, simple because an underlying coarse-grained file system does not
+distinguish between successively created files with sufficiently accuracy.</div>
+</div>
+<p>To get around this, <strong>Ruffus</strong> makes sure that each task is punctuated by a 1 second pause
+(via <tt class="docutils literal"><span class="pre">time.sleep()</span></tt>). If this is gets in the way, and you are using a modern file system with
+nanosecond timestamp resolution, you can turn off the delay by setting
+<tt class="docutils literal"><span class="pre">one_second_per_job</span></tt> to <tt class="docutils literal"><span class="pre">False</span></tt> in <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a></p>
+<p>Later versions of <strong>Ruffus</strong> will allow file modification times to be saved at higher precision
+in a log file or database to get around this.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 2</strong>: <cite>Tasks and Recipes</cite></a><ul>
+<li><a class="reference internal" href="#skip-jobs-which-are-up-to-date">Skip jobs which are up to date</a></li>
+<li><a class="reference internal" href="#inputs-and-outputs-parameters"><em>Inputs</em> and <em>Outputs</em> parameters</a></li>
+<li><a class="reference internal" href="#checking-if-files-are-up-to-date">Checking if files are up to date</a></li>
+<li><a class="reference internal" href="#missing-files">Missing files</a></li>
+<li><a class="reference internal" href="#caveats-timestamp-resolution">Caveats: Timestamp resolution</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="follows.html"
+ title="previous chapter"><strong>Chapter 1</strong> : <cite>Arranging tasks into a pipeline with</cite> <strong>@follows</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="files.html"
+ title="next chapter"><strong>Chapter 3</strong>: <cite>Passing parameters to the pipeline with</cite> <strong>@files</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/tasks_as_recipes.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="files.html" title="Chapter 3: Passing parameters to the pipeline with @files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="follows.html" title="Chapter 1 : Arranging tasks into a pipeline with @follows"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/tracing_pipeline_parameters.html b/doc/_build/html/tutorials/manual/tracing_pipeline_parameters.html
new file mode 100644
index 0000000..cabfc98
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/tracing_pipeline_parameters.html
@@ -0,0 +1,271 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 5: Tracing pipeline parameters — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 6: Running Tasks and Jobs in parallel" href="parallel_processing.html" />
+ <link rel="prev" title="Chapter 4: Chaining pipeline Tasks together automatically" href="tasks_and_globs_in_inputs.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="parallel_processing.html" title="Chapter 6: Running Tasks and Jobs in parallel"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="tasks_and_globs_in_inputs.html" title="Chapter 4: Chaining pipeline Tasks together automatically"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-tracing-pipeline-parameters-chapter-num-tracing-pipeline-parameters">
+<span id="manual-tracing-pipeline-parameters"></span><h1><strong>Chapter 5</strong>: Tracing pipeline parameters<a class="headerlink" href="#manual-tracing-pipeline-parameters-chapter-num-tracing-pipeline-parameters" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+<p id="index-0">The trickiest part of developing pipelines is understanding how your
+data flows through the pipeline.</p>
+<p>In <strong>Ruffus</strong>, your data is passed from one task function to another down
+the pipeline by the chain of linked parameters. Sometimes, it may be difficult to
+choose the right <strong>Ruffus</strong> syntax at first, or to understand which parameters in
+what format are being passed to your function.</p>
+<p>Whether you are learning how to use <strong>ruffus</strong>, or trying out a new
+feature in <strong>ruffus</strong>, or just have a horrendously complicated pipeline
+to debug (we have colleagues with >100 criss-crossing pipelined stages),
+your best friend is <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a>.</p>
+<p><strong>pipeline_printout</strong> displays the parameters which would be passed to each task function
+for each job in your pipeline. In other words, it traces how each of the functions in
+the pipeline are called in detail.</p>
+<p>It makes good sense to alternate between calls to <strong>pipeline_printout</strong> and <strong>pipeline_run</strong>
+in the development of <strong>Ruffus</strong> pipelines (perhaps with the use of a command-line option),
+so that you always know exactly how the pipeline is being invoked.</p>
+</div></blockquote>
+<div class="section" id="printing-out-which-jobs-will-be-run">
+<h2>Printing out which jobs will be run<a class="headerlink" href="#printing-out-which-jobs-will-be-run" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>pipeline_printout</strong> is called in exactly the same way as <strong>pipeline_run</strong> but
+instead of running the pipeline, just prints the tasks which are and are not up-to-date.</p>
+<p>The <tt class="docutils literal"><span class="pre">verbose</span></tt> parameter controls how much detail is displayed.</p>
+<blockquote>
+<div><div class="highlight-python"><pre>verbose = 0 : prints nothing
+verbose = 1 : logs warnings and tasks which are not up-to-date and which will be run
+verbose = 2 : logs doc strings for task functions as well
+verbose = 3 : logs job parameters for jobs which are out-of-date
+verbose = 4 : logs list of up-to-date tasks but parameters for out-of-date jobs
+verbose = 5 : logs parameters for all jobs whether up-to-date or not
+verbose = 10: logs messages useful only for debugging ruffus pipeline code</pre>
+</div>
+</div></blockquote>
+<p>Let us take the two step <a class="reference internal" href="../simple_tutorial/step3_run_pipeline_code.html#simple-tutorial-3nd-step-code"><em>pipeline</em></a> from
+the tutorial. <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>Pipeline_printout(...)</em></a>
+by default merely lists the two tasks which will be run in the pipeline:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_pipeline_printout1.png" src="../../_images/simple_tutorial_pipeline_printout1.png" />
+</div></blockquote>
+<p>To see the input and output parameters of out-of-date jobs in the pipeline, we can increase the verbosity from the default (<tt class="docutils literal"><span class="pre">1</span></tt>) to <tt class="docutils literal"><span class="pre">3</span></tt>:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_pipeline_printout2.png" src="../../_images/simple_tutorial_pipeline_printout2.png" />
+</div></blockquote>
+<dl class="docutils">
+<dt>This is very useful for checking that the input and output parameters have been specified</dt>
+<dd>correctly.</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="determining-which-jobs-are-out-of-date-or-not">
+<h2>Determining which jobs are out-of-date or not<a class="headerlink" href="#determining-which-jobs-are-out-of-date-or-not" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is often useful to see which tasks are or are not up-to-date. For example, if we
+were to run the pipeline in full, and then modify one of the intermediate files, the
+pipeline would be partially out of date.</p>
+<p>Let us start by run the pipeline in full but then modify <tt class="docutils literal"><span class="pre">job1.stage</span></tt> so that the second task is no longer up-to-date:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+
+<span class="c"># modify job1.stage1</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"job1.stage1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>At a verbosity of <tt class="docutils literal"><span class="pre">5</span></tt>, even jobs which are up-to-date will be displayed.
+We can now see that the there is only one job in <tt class="docutils literal"><span class="pre">second_task(...)</span></tt> which needs to be re-run
+because <tt class="docutils literal"><span class="pre">job1.stage1</span></tt> has been modified after <tt class="docutils literal"><span class="pre">job1.stage2</span></tt> (highlighted in blue):</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_pipeline_printout3.png" src="../../_images/simple_tutorial_pipeline_printout3.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 5</strong>: Tracing pipeline parameters</a><ul>
+<li><a class="reference internal" href="#printing-out-which-jobs-will-be-run">Printing out which jobs will be run</a></li>
+<li><a class="reference internal" href="#determining-which-jobs-are-out-of-date-or-not">Determining which jobs are out-of-date or not</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="tasks_and_globs_in_inputs.html"
+ title="previous chapter"><strong>Chapter 4</strong>: Chaining pipeline <cite>Tasks</cite> together automatically</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="parallel_processing.html"
+ title="next chapter"><strong>Chapter 6</strong>: <cite>Running Tasks and Jobs in parallel</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/tracing_pipeline_parameters.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="parallel_processing.html" title="Chapter 6: Running Tasks and Jobs in parallel"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="tasks_and_globs_in_inputs.html" title="Chapter 4: Chaining pipeline Tasks together automatically"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/transform.html b/doc/_build/html/tutorials/manual/transform.html
new file mode 100644
index 0000000..91e73f8
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/transform.html
@@ -0,0 +1,395 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 8: Applying the same recipe to create many different files with @transform — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 9: Merge multiple input into a single result" href="merge.html" />
+ <link rel="prev" title="Chapter 7: Splitting up large tasks / files with @split" href="split.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 9: Merge multiple input into a single result"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 7: Splitting up large tasks / files with @split"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="manual-transform-chapter-num-applying-the-same-recipe-to-create-many-different-files-with-transform">
+<span id="manual-transform"></span><h1><strong>Chapter 8</strong>: <cite>Applying the same recipe to create many different files with</cite> <strong>@transform</strong><a class="headerlink" href="#manual-transform-chapter-num-applying-the-same-recipe-to-create-many-different-files-with-transform" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><table class="hlist"><tr><td><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+</ul>
+</td><td><ul class="simple">
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> syntax in detail</li>
+</ul>
+</td></tr></table>
+<p>Sometimes you might have a list of data files which you might want to send to the
+same pipelined function, to apply
+the same operation. The best way to manage this would be to produce a corresponding
+list of results files:</p>
+<blockquote>
+<div><div class="line-block">
+<div class="line">Compiling c source files might <em>@transform</em> an <tt class="docutils literal"><span class="pre">a.c</span></tt> file to an <tt class="docutils literal"><span class="pre">a.o</span></tt> file.</div>
+<div class="line">A <tt class="docutils literal"><span class="pre">grep</span></tt> operation might <em>@transform</em> a <tt class="docutils literal"><span class="pre">plays.king_lear.txt</span></tt> file to an <tt class="docutils literal"><span class="pre">plays.king_lear.counts</span></tt> file.</div>
+</div>
+</div></blockquote>
+<p><em>Ruffus</em> uses the <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> decorator for this purpose.</p>
+<p>When you <strong>@transform</strong> your data from one file type to another, you are not restricted just
+to changing the file suffix. We shall see how, with the full power of regular
+expressions behind you, you can sort the resulting
+data into different directories, add indices and so on.</p>
+</div></blockquote>
+<div class="section" id="transform">
+<span id="index-0"></span><h2><strong>@transform</strong><a class="headerlink" href="#transform" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="worked-example-calculating-sums-and-sum-of-squares-in-parallel">
+<h3>Worked example: calculating sums and sum of squares in parallel<a class="headerlink" href="#worked-example-calculating-sums-and-sum-of-squares-in-parallel" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>This example is borrowed from <a class="reference internal" href="../simple_tutorial/step5_split.html#simple-tutorial-5th-step"><em>step 5</em></a> of the simple tutorial.</p>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">See <a class="reference internal" href="../simple_tutorial/step5_split_code.html#simple-tutorial-5th-step-code"><em>example code here</em></a></p>
+</div>
+</div></blockquote>
+<p>Given a set of files, each with a set of random numbers, we want to calculate thier
+sums and sum of squares. The easiest way to do this is by providing a recipe for
+transforming a <tt class="docutils literal"><span class="pre">*.chunk</span></tt> file containing a list of numbers into a <tt class="docutils literal"><span class="pre">*.sums</span></tt> file
+with our sums and sum of squares.</p>
+<p><em>Ruffus</em> magically takes care of applying the same recipe (task function) to all the different
+data files in parallel.</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_transform.png" src="../../_images/simple_tutorial_transform.png" />
+</div></blockquote>
+<p>The <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> decorator tells <em>Ruffus</em> to take files from the step 4 task (i.e. <tt class="docutils literal"><span class="pre">*.chunks</span></tt>),
+and produce files having the <tt class="docutils literal"><span class="pre">.sums</span></tt> suffix instead.
+ending.</p>
+<dl class="docutils">
+<dt>Thus if <tt class="docutils literal"><span class="pre">step_4_split_numbers_into_chunks</span></tt> created</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="s">"1.chunks"</span>
+<span class="s">"2.chunks"</span>
+<span class="s">"3.chunks"</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>This would result in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">step_5_calculate_sum_of_squares</span> <span class="p">(</span><span class="s">"1.chunk"</span><span class="p">,</span> <span class="s">"1.sums"</span><span class="p">)</span>
+<span class="n">step_5_calculate_sum_of_squares</span> <span class="p">(</span><span class="s">"2.chunk"</span><span class="p">,</span> <span class="s">"2.sums"</span><span class="p">)</span>
+<span class="n">step_5_calculate_sum_of_squares</span> <span class="p">(</span><span class="s">"3.chunk"</span><span class="p">,</span> <span class="s">"3.sums"</span><span class="p">)</span>
+
+<span class="c"># etc...</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="using-suffix-to-change-give-each-output-file-a-new-suffix">
+<h2>Using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix(...)</em></a> to change give each output file a new suffix<a class="headerlink" href="#using-suffix-to-change-give-each-output-file-a-new-suffix" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix</em></a> specification indicates that</p>
+<blockquote>
+<div><ul class="simple">
+<li>only filenames with ending with the suffix term (e.g. <tt class="docutils literal"><span class="pre">.chunk</span></tt>) should be considered</li>
+<li>The text matching the suffix term should be replaced with the string in the output pattern.</li>
+</ul>
+</div></blockquote>
+<p>This example assumes that both the <em>inputs</em> and the <em>outputs</em> consist each of a single string but
+<strong>Ruffus</strong> places no such constraints on the data flowing through your pipeline.</p>
+<blockquote>
+<div><ul class="simple">
+<li>If there are multiple file names (strings) contained within each <em>inputs</em> parameter,
+then only the first will be used to generate the <em>output</em></li>
+<li>Each string that is encountered in each <em>output</em> parameter will be used for suffix replacement.</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="an-example-with-more-complex-data-structures">
+<h3>An example with more complex data structures<a class="headerlink" href="#an-example-with-more-complex-data-structures" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>This will become much clearer with this example:</p>
+<blockquote>
+<div><div class="highlight-python"><pre> inputs = [
+ ["file1.ext", 10 ], #job 1
+ [37.0, "file2.wrong_extension",
+ "file2_ignored.ext"], #job 2
+ "file3.ext" #job 3
+ ]
+
+ at transform(inputs, suffix(".ext"), [(".ext1", ), ".ext2"])
+def pipelinetask (input_file_name, output_file_name):
+ ""</pre>
+</div>
+</div></blockquote>
+<div class="line-block">
+<div class="line">Granted, it may seem rather odd that the <em>inputs</em> parameter including numbers as well
+as file names, but <strong>Ruffus</strong> does not second guess how you wish to arrange your pipelines.</div>
+<div class="line"><tt class="docutils literal"><span class="pre">inputs</span></tt> contains the parameters for three jobs.</div>
+<div class="line">In each case, the first file name string encountered will be used to generate the <em>output</em> parameter:</div>
+</div>
+<blockquote>
+<div><img alt="../../_images/manual_transform_complex_outputs.png" src="../../_images/manual_transform_complex_outputs.png" />
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The first filename in the prospective job #2 does not have the <tt class="docutils literal"><span class="pre">.ext</span></tt> suffix so this job will be eliminated.</p>
+</div>
+</div></blockquote>
+<p>Thus, the original code:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".ext"</span><span class="p">),</span> <span class="p">[(</span><span class="mi">15</span><span class="p">,</span> <span class="s">".ext1"</span><span class="p">),</span> <span class="s">".ext2"</span><span class="p">] [...]
+<span class="k">def</span> <span class="nf">pipelinetask</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>is equivalent to calling:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipelinetask</span><span class="p">([</span><span class="s">"file1.ext"</span><span class="p">,</span> <span class="mi">10</span><span class="p">],</span> <span class="p">[(</span><span class="mi">15</span><span class="p">,</span> <span class="s">'file1.ext1'</span><span class="p">),</span> <span class="s">'file1.ext2'</span><span class="p">])</span> <span class="c"># job 1</span>
+<span class="n">pipelinetask</span><span class="p">(</span><span class="s">"file3.ext"</span><span class="p">,</span> <span class="p">[(</span><span class="mi">15</span><span class="p">,</span> <span class="s">'file3.ext1'</span><span class="p">),</span> <span class="s">'file3.ext2'</span><span class="p">])</span> <span class="c"># job 3</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Hopefully, your code will simpler than this rather pathological case!</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="regular-expressions-regex-provide-maximum-flexibility">
+<h2>Regular expressions <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex(...)</em></a> provide maximum flexibility<a class="headerlink" href="#regular-expressions-regex-provide-maximum-flexibility" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Exactly the same function could be written using regular expressions:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">".ext"</span><span class="p">),</span> <span class="p">[(</span><span class="mi">15</span><span class="p">,</span> <span class="s">".ext1"</span><span class="p">),</span> <span class="s">".ext2"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">pipelinetask</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="line-block">
+<div class="line">However, regular expressions are not limited to suffix matches.</div>
+<div class="line">We can sort our <em>ouputs</em> to different subdirectories, depending on category.</div>
+<div class="line"><br /></div>
+<div class="line">Our example starts off with data file for different zoo animals.</div>
+<div class="line">We are only interested in mammals, and we would like the files of each species to</div>
+<div class="line">end up in its own directory after processing.</div>
+<div class="line">Starting with these species files:</div>
+</div>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="s">"mammals.tiger.wild.animals"</span>
+<span class="s">"mammals.lion.wild.animals"</span>
+<span class="s">"mammals.lion.handreared.animals"</span>
+<span class="s">"mammals.dog.tame.animals"</span>
+<span class="s">"mammals.dog.wild.animals"</span>
+<span class="s">"reptiles.crocodile.wild.animals"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>Then, the following:</dt>
+<dd><img alt="../../_images/manual_transform.png" class="first last" src="../../_images/manual_transform.png" />
+</dd>
+<dt>will put each captured mammal in its own directory:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">capture_mammals</span><span class="p">])</span>
+<span class="go"> Job = [mammals.dog.tame.animals -> dog/dog.tame.in_my_zoo, dog] completed</span>
+<span class="go"> Job = [mammals.dog.wild.animals -> dog/dog.wild.in_my_zoo, dog] completed</span>
+<span class="go"> Job = [mammals.lion.handreared.animals -> lion/lion.handreared.in_my_zoo, lion] completed</span>
+<span class="go"> Job = [mammals.lion.wild.animals -> lion/lion.wild.in_my_zoo, lion] completed</span>
+<span class="go"> Job = [mammals.tiger.wild.animals -> tiger/tiger.wild.in_my_zoo, tiger] completed</span>
+<span class="go">Completed Task = capture_mammals</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The code can be found <a class="reference internal" href="transform_code.html#manual-transform-code"><em>here</em></a></p>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 8</strong>: <cite>Applying the same recipe to create many different files with</cite> <strong>@transform</strong></a><ul>
+<li><a class="reference internal" href="#transform"><strong>@transform</strong></a><ul>
+<li><a class="reference internal" href="#worked-example-calculating-sums-and-sum-of-squares-in-parallel">Worked example: calculating sums and sum of squares in parallel</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#using-suffix-to-change-give-each-output-file-a-new-suffix">Using <tt class="docutils literal"><span class="pre">suffix(...)</span></tt> to change give each output file a new suffix</a><ul>
+<li><a class="reference internal" href="#an-example-with-more-complex-data-structures">An example with more complex data structures</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#regular-expressions-regex-provide-maximum-flexibility">Regular expressions <tt class="docutils literal"><span class="pre">regex(...)</span></tt> provide maximum flexibility</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="split.html"
+ title="previous chapter"><strong>Chapter 7</strong>: <cite>Splitting up large tasks / files with</cite> <strong>@split</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="merge.html"
+ title="next chapter"><strong>Chapter 9</strong>: <strong>Merge</strong> <cite>multiple input into a single result</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/transform.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 9: Merge multiple input into a single result"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 7: Splitting up large tasks / files with @split"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/manual/transform_code.html b/doc/_build/html/tutorials/manual/transform_code.html
new file mode 100644
index 0000000..52e19b7
--- /dev/null
+++ b/doc/_build/html/tutorials/manual/transform_code.html
@@ -0,0 +1,261 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Chapter 6: Applying the same recipe to create many different files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 2: Passing parameters to the pipeline" href="../simple_tutorial/step2_code.html" />
+ <link rel="prev" title="Code for Chapter 10: Generating parameters on the fly" href="onthefly_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../simple_tutorial/step2_code.html" title="Code for Step 2: Passing parameters to the pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Code for Chapter 10: Generating parameters on the fly"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-chapter-6-applying-the-same-recipe-to-create-many-different-files">
+<span id="manual-transform-code"></span><h1>Code for Chapter 6: Applying the same recipe to create many different files<a class="headerlink" href="#code-for-chapter-6-applying-the-same-recipe-to-create-many-different-files" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#manual"><em>Manual overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax in detail</em></a></li>
+<li><a class="reference internal" href="transform.html#manual-transform"><em>Back</em></a></li>
+</ul>
+<div class="line-block">
+<div class="line">Our example starts off with data file for different zoo animals.</div>
+<div class="line">We are only interested in mammals, and we would like the files of each species to</div>
+<div class="line">end up in its own directory after processing.</div>
+</div>
+</div></blockquote>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># Start with species files</span>
+<span class="c">#</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"mammals.tiger.wild.animals"</span> <span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"mammals.lion.wild.animals"</span> <span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"mammals.lion.handreared.animals"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"mammals.dog.tame.animals"</span> <span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"mammals.dog.wild.animals"</span> <span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"reptiles.crocodile.wild.animals"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># create destinations for each species</span>
+<span class="c">#</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="p">(</span><span class="s">"tiger"</span><span class="p">,</span> <span class="s">"lion"</span><span class="p">,</span> <span class="s">"dog"</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">s</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Now summarise files in directories organised by species</span>
+<span class="c">#</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r'mammals\.(.+)\.(.+)\.animals'</span><span class="p">),</span> <span class="c"># save species and wild/tame</span>
+ <span class="s">r'\1/\1.\2.in_my_zoo'</span><span class="p">,</span> <span class="c"># same species go together</span>
+ <span class="s">r'\1'</span><span class="p">)</span> <span class="c"># extra species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> <span class="o">+</span> <span class="s">"</span><span cla [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">capture_mammals</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">capture_mammals</span><span class="p">])</span>
+<span class="go"> Job = [mammals.dog.tame.animals -> dog/dog.tame.in_my_zoo, dog] completed</span>
+<span class="go"> Job = [mammals.dog.wild.animals -> dog/dog.wild.in_my_zoo, dog] completed</span>
+<span class="go"> Job = [mammals.lion.handreared.animals -> lion/lion.handreared.in_my_zoo, lion] completed</span>
+<span class="go"> Job = [mammals.lion.wild.animals -> lion/lion.wild.in_my_zoo, lion] completed</span>
+<span class="go"> Job = [mammals.tiger.wild.animals -> tiger/tiger.wild.in_my_zoo, tiger] completed</span>
+<span class="go">Completed Task = capture_mammals</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Chapter 6: Applying the same recipe to create many different files</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="onthefly_code.html"
+ title="previous chapter">Code for Chapter 10: Generating parameters on the fly</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../simple_tutorial/step2_code.html"
+ title="next chapter">Code for Step 2: Passing parameters to the pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/manual/transform_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../simple_tutorial/step2_code.html" title="Code for Step 2: Passing parameters to the pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Code for Chapter 10: Generating parameters on the fly"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/active_if.html b/doc/_build/html/tutorials/new_tutorial/active_if.html
new file mode 100644
index 0000000..9c071f2
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/active_if.html
@@ -0,0 +1,330 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 19: Signal the completion of each stage of our pipeline with @posttask" href="posttask.html" />
+ <link rel="prev" title="Chapter 17: @combinations, @permutations and all versus all @product" href="combinatorics.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 19: Signal the completion of each stage of our pipeline with @posttask"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="combinatorics.html" title="Chapter 17: @combinations, @permutations and all versus all @product"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-active-if-chapter-num-turning-parts-of-the-pipeline-on-and-off-at-runtime-with-active-if">
+<span id="new-manual-active-if"></span><span id="index-0"></span><h1><strong>Chapter 18</strong>: Turning parts of the pipeline on and off at runtime with <a class="reference internal" href="../../decorators/active_if.html#decorators-active-if"><em>@active_if</em></a><a class="headerlink" href="#new-manual-active-if-chapter-num-turning-parts-of-the-pipeline-on-and-off-at-runtime-with-active-if" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/active_if.html#decorators-active-if"><em>@active_if syntax in detail</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is sometimes useful to be able to switch on and off parts of a pipeline. For example, a pipeline
+might have two difference code paths depending on the type of data it is being asked to analyse.</p>
+<p>One surprisingly easy way to do this is to use a python <tt class="docutils literal"><span class="pre">if</span></tt> statement around particular task functions:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre> <span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"> <span class="n">run_task1</span> <span class="o">=</span> <span class="bp">True</span>
+</span>
+<span class="hll"> <span class="nd">@originate</span><span class="p">([</span><span class="s">'a.foo'</span><span class="p">,</span> <span class="s">'b.foo'</span><span class="p">])</span>
+</span> <span class="k">def</span> <span class="nf">create_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+ <span class="k">if</span> <span class="n">run_task1</span><span class="p">:</span>
+ <span class="c"># might not run</span>
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">create_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">foobar</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">foobar</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".bar"</span><span class="p">),</span> <span class="s">".result"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">wrap_up</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+ <span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>This simple solution has a number of drawbacks:</dt>
+<dd><ol class="first last arabic simple">
+<li>The on/off decision is a one off event that happens when the script is loaded. Ideally, we
+would like some flexibility, and postpone the decision until <tt class="docutils literal"><span class="pre">pipeline_run()</span></tt> is invoked.</li>
+<li>When <tt class="docutils literal"><span class="pre">if</span></tt> is false, the entire task function becomes invisible, and if there are any
+downstream tasks, as in the above example, <em>Ruffus</em> will complain loudly about
+missing dependencies.</li>
+</ol>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="active-if-controls-the-state-of-tasks">
+<h2><a class="reference internal" href="../../decorators/active_if.html#decorators-active-if"><em>@active_if</em></a> controls the state of tasks<a class="headerlink" href="#active-if-controls-the-state-of-tasks" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Switches tasks on and off at run time depending on its parameters</li>
+<li>Evaluated each time <tt class="docutils literal"><span class="pre">pipeline_run</span></tt>, <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt> or <tt class="docutils literal"><span class="pre">pipeline_printout_graph</span></tt> is called.</li>
+<li>Dormant tasks behave as if they are up to date and have no output.</li>
+</ul>
+<p>The Design and initial implementation were contributed by Jacob Biesinger</p>
+<p>The following example shows its flexibility and syntax:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">run_if_true_1</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">False</span>
+<span class="n">run_if_true_3</span> <span class="o">=</span> <span class="bp">True</span>
+
+
+<span class="c">#</span>
+<span class="c"># task1</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">([</span><span class="s">'a.foo'</span><span class="p">,</span> <span class="s">'b.foo'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_files</span><span class="p">(</span><span class="n">outfile</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> create_files</span>
+<span class="sd"> """</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">outfile</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># Only runs if all three run_if_true conditions are met</span>
+<span class="c">#</span>
+<span class="hll"><span class="c"># @active_if determines if task is active</span>
+</span><span class="nd">@active_if</span><span class="p">(</span><span class="n">run_if_true_1</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">run_if_true_2</span><span class="p">)</span>
+<span class="nd">@active_if</span><span class="p">(</span><span class="n">run_if_true_3</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".foo"</span><span class="p">),</span> <span class="s">".bar"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">this_task_might_be_inactive</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">infile</span> [...]
+
+
+<span class="c"># @active_if switches off task because run_if_true_2 == False</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="c"># @active_if switches on task because all run_if_true conditions are met</span>
+<span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The task starts off inactive:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="hll"><span class="gp">>>> </span><span class="c"># @active_if switches off task "this_task_might_be_inactive" because run_if_true_2 == False</span>
+</span><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go">Task enters queue = create_files</span>
+<span class="go">create_files</span>
+<span class="go"> Job = [None -> a.foo] Missing file [a.foo]</span>
+<span class="go"> Job = [None -> b.foo] Missing file [b.foo]</span>
+<span class="go"> Job = [None -> a.foo] completed</span>
+<span class="go"> Job = [None -> b.foo] completed</span>
+<span class="go">Completed Task = create_files</span>
+<span class="go">Inactive Task = this_task_might_be_inactive</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Now turn on the task:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="hll"><span class="gp">>>> </span><span class="c"># @active_if switches on task "this_task_might_be_inactive" because all run_if_true conditions are met</span>
+</span><span class="gp">>>> </span><span class="n">run_if_true_2</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="go">Task enters queue = this_task_might_be_inactive</span>
+
+<span class="go"> Job = [a.foo -> a.bar] Missing file [a.bar]</span>
+<span class="go"> Job = [b.foo -> b.bar] Missing file [b.bar]</span>
+<span class="go"> Job = [a.foo -> a.bar] completed</span>
+<span class="go"> Job = [b.foo -> b.bar] completed</span>
+<span class="go">Completed Task = this_task_might_be_inactive</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 18</strong>: Turning parts of the pipeline on and off at runtime with <tt class="docutils literal"><span class="pre">@active_if</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#active-if-controls-the-state-of-tasks"><tt class="docutils literal"><span class="pre">@active_if</span></tt> controls the state of tasks</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="combinatorics.html"
+ title="previous chapter"><strong>Chapter 17</strong>: <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="posttask.html"
+ title="next chapter"><strong>Chapter 19</strong>: Signal the completion of each stage of our pipeline with <tt class="docutils literal"><span class="pre">@posttask</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/active_if.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 19: Signal the completion of each stage of our pipeline with @posttask"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="combinatorics.html" title="Chapter 17: @combinations, @permutations and all versus all @product"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/check_if_uptodate.html b/doc/_build/html/tutorials/new_tutorial/check_if_uptodate.html
new file mode 100644
index 0000000..c127c80
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/check_if_uptodate.html
@@ -0,0 +1,267 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 1: Flow Chart Colours with pipeline_printout_graph(...)" href="flowchart_colours.html" />
+ <link rel="prev" title="Chapter 22: Esoteric: Running jobs in parallel without files using @parallel" href="parallel.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="flowchart_colours.html" title="Appendix 1: Flow Chart Colours with pipeline_printout_graph(...)"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 22: Esoteric: Running jobs in parallel without files using @parallel"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-check-if-uptodate-chapter-num-esoteric-writing-custom-functions-to-decide-which-jobs-are-up-to-date-with-check-if-uptodate">
+<span id="new-manual-check-if-uptodate"></span><span id="index-0"></span><h1><strong>Chapter 23</strong>: Esoteric: Writing custom functions to decide which jobs are up to date with <a class="reference internal" href="../../decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a><a class="headerlink" href="#new-manual-check-if-uptodate-chapter-num-esoteric-writing-custom-functions-to-decide-which-jobs-are-up-to-date-with-check-if-uptodate" title [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate syntax in detail</em></a></li>
+</ul>
+</div>
+<div class="section" id="check-if-uptodate-manual-dependency-checking">
+<h2><strong>@check_if_uptodate</strong> : Manual dependency checking<a class="headerlink" href="#check-if-uptodate-manual-dependency-checking" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>tasks specified with most decorators such as</dt>
+<dd><ul class="first last simple">
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a></li>
+<li><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a></li>
+<li><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a></li>
+<li><a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@collate</em></a></li>
+</ul>
+</dd>
+</dl>
+<p>have automatic dependency checking based on file modification times.</p>
+<p>Sometimes, you might want to decide have more control over whether to run jobs, especially
+if a task does not rely on or produce files (i.e. with <a class="reference internal" href="../../decorators/parallel.html#decorators-parallel"><em>@parallel</em></a>)</p>
+<p>You can write your own custom function to decide whether to run a job.
+This takes as many parameters as your task function, and needs to return a
+tuple for whether an update is required, and why (i.e. <tt class="docutils literal"><span class="pre">tuple(bool,</span> <span class="pre">str)</span></tt>)</p>
+<p>This simple example which creates the file <tt class="docutils literal"><span class="pre">"a.1"</span></tt> if it does not exist:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>could be rewritten more laboriously as:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="k">def</span> <span class="nf">check_file_exists</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">False</span><span class="p">,</span> <span class="s">"File already exists"</span>
+ <span class="k">return</span> <span class="bp">True</span><span class="p">,</span> <span class="s">"</span><span class="si">%s</span><span class="s"> is missing"</span> <span class="o">%</span> <span class="n">output_file</span>
+
+<span class="nd">@parallel</span><span class="p">([[</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">]])</span>
+<span class="nd">@check_if_uptodate</span><span class="p">(</span><span class="n">check_file_exists</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>Both produce the same output:</dt>
+<dd><div class="first last highlight-python"><pre>Task = create_if_necessary
+ Job = [null, "a.1"] completed</pre>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>The function specified by <a class="reference internal" href="../../decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a> can be called
+more than once for each job.</p>
+<p class="last">See the <a class="reference internal" href="dependencies.html#new-manual-dependencies"><em>description here</em></a> of how <em>Ruffus</em> decides which tasks to run.</p>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 23</strong>: Esoteric: Writing custom functions to decide which jobs are up to date with <tt class="docutils literal"><span class="pre">@check_if_uptodate</span></tt></a><ul>
+<li><a class="reference internal" href="#check-if-uptodate-manual-dependency-checking"><strong>@check_if_uptodate</strong> : Manual dependency checking</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="parallel.html"
+ title="previous chapter"><strong>Chapter 22</strong>: Esoteric: Running jobs in parallel without files using <tt class="docutils literal"><span class="pre">@parallel</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="flowchart_colours.html"
+ title="next chapter"><strong>Appendix 1</strong>: Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/check_if_uptodate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="flowchart_colours.html" title="Appendix 1: Flow Chart Colours with pipeline_printout_graph(...)"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 22: Esoteric: Running jobs in parallel without files using @parallel"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/checkpointing.html b/doc/_build/html/tutorials/new_tutorial/checkpointing.html
new file mode 100644
index 0000000..2c39a8d
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/checkpointing.html
@@ -0,0 +1,538 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 11: Pipeline topologies and a compendium of Ruffus decorators" href="decorators_compendium.html" />
+ <link rel="prev" title="Chapter 9: Preparing directories for output with @mkdir()" href="mkdir.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="decorators_compendium.html" title="Chapter 11: Pipeline topologies and a compendium of Ruffus decorators"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="mkdir.html" title="Chapter 9: Preparing directories for output with @mkdir()"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-checkpointing-chapter-num-checkpointing-interrupted-pipelines-and-exceptions">
+<span id="new-manual-checkpointing"></span><span id="index-0"></span><h1><strong>Chapter 10</strong>: Checkpointing: Interrupted Pipelines and Exceptions<a class="headerlink" href="#new-manual-checkpointing-chapter-num-checkpointing-interrupted-pipelines-and-exceptions" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="checkpointing_code.html#new-manual-checkpointing-code"><em>Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/theoretical_pipeline_schematic.png"><img alt="../../_images/theoretical_pipeline_schematic.png" src="../../_images/theoretical_pipeline_schematic.png" style="width: 610.0px; height: 71.0px;" /></a>
+<p>Computational pipelines transform your data in stages until the final result is produced.</p>
+<p>By default, <em>Ruffus</em> uses file modification times for the <strong>input</strong> and <strong>output</strong> to determine
+whether each stage of a pipeline is up-to-date or not. But what happens when the task
+function is interrupted, whether from the command line or by error, half way through writing the output?</p>
+<p>In this case, the half-formed, truncated and corrupt <strong>Output</strong> file will look newer than its <strong>Input</strong> and hence up-to-date.</p>
+</div></blockquote>
+</div>
+<div class="section" id="interrupting-tasks">
+<span id="new-manual-interrupting-tasks"></span><span id="index-1"></span><h2>Interrupting tasks<a class="headerlink" href="#interrupting-tasks" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Let us try with an example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span> <span class="nn">time</span>
+
+<span class="c"># create initial files</span>
+<span class="nd">@originate</span><span class="p">([</span><span class="s">'job1.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># long task to interrupt</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="s">".output"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">long_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">ff</span><span class="p">:</span>
+ <span class="n">ff</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Unfinished..."</span><span class="p">)</span>
+ <span class="c"># sleep for 2 seconds here so you can interrupt me</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Job started. Press ^C to interrupt me now...</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+<span class="hll"> <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
+</span> <span class="n">ff</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">Finished"</span><span class="p">)</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Job completed.</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+
+<span class="c"># Run</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">long_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>When this script runs, it pauses in the middle with this message:</p>
+<div class="highlight-python"><pre>Job started. Press ^C to interrupt me now...</pre>
+</div>
+<p>If you interrupted the script by pressing Control-C at this point, you will see that <tt class="docutils literal"><span class="pre">job1.output</span></tt> contains only <tt class="docutils literal"><span class="pre">Unfinished...</span></tt>.
+However, if you should rerun the interrupted pipeline again, Ruffus ignores the corrupt, incomplete file:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">long_task</span><span class="p">])</span>
+<span class="go">Job started. Press ^C to interrupt me now...</span>
+<span class="go">Job completed</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>And if you had run <tt class="docutils literal"><span class="pre">pipeline_printout</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">long_task</span><span class="p">],</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = long_task</span>
+<span class="go"> Job = [job1.start</span>
+<span class="go"> -> job1.output]</span>
+<span class="hll"><span class="go"> # Job needs update: Previous incomplete run leftover: [job1.output]</span>
+</span></pre></div>
+</div>
+</div></blockquote>
+<p>We can see that <em>Ruffus</em> magically knows that the previous run was incomplete, and that <tt class="docutils literal"><span class="pre">job1.output</span></tt> is detritus that needs to be discarded.</p>
+</div></blockquote>
+</div>
+<div class="section" id="checkpointing-only-log-completed-jobs">
+<span id="new-manual-logging-completed-jobs"></span><h2>Checkpointing: only log completed jobs<a class="headerlink" href="#checkpointing-only-log-completed-jobs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>All is revealed if you were to look in the working directory. <em>Ruffus</em> has created a file called <tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt>.
+In this <a class="reference external" href="https://sqlite.org/">SQLite</a> database, <em>Ruffus</em> logs only those files which are the result of a completed job,
+all other files are suspect.
+This file checkpoint database is a fail-safe, not a substitute for checking file modification times. If the <strong>Input</strong> or <strong>Output</strong> files are
+modified, the pipeline will rerun.</p>
+<p>By default, <em>Ruffus</em> saves only file timestamps to the SQLite database but you can also add a checksum of the pipeline task function body or parameters.
+This behaviour can be controlled by setting the <tt class="docutils literal"><span class="pre">checksum_level</span></tt> parameter
+in <tt class="docutils literal"><span class="pre">pipeline_run()</span></tt>. For example, if you do not want to save any timestamps or checksums:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="n">checksum_level</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span>
+
+<span class="n">CHECKSUM_FILE_TIMESTAMPS</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># only rerun when the file timestamps are out of date (classic mode)</span>
+<span class="n">CHECKSUM_HISTORY_TIMESTAMPS</span> <span class="o">=</span> <span class="mi">1</span> <span class="c"># Default: also rerun when the history shows a job as being out of date</span>
+<span class="n">CHECKSUM_FUNCTIONS</span> <span class="o">=</span> <span class="mi">2</span> <span class="c"># also rerun when function body has changed</span>
+<span class="n">CHECKSUM_FUNCTIONS_AND_PARAMS</span> <span class="o">=</span> <span class="mi">3</span> <span class="c"># also rerun when function parameters or function body change</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Checksums are calculated from the <a class="reference external" href="http://docs.python.org/2/library/pickle.html">pickled</a> string for the function code and parameters.
+If pickling fails, Ruffus will degrade gracefully to saving just the timestamp in the SQLite database.</p>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="do-not-share-the-same-checkpoint-file-across-for-multiple-pipelines">
+<span id="new-manual-history-files-cannot-be-shared"></span><h2>Do not share the same checkpoint file across for multiple pipelines!<a class="headerlink" href="#do-not-share-the-same-checkpoint-file-across-for-multiple-pipelines" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The name of the Ruffus python script is not saved in the checkpoint file along side timestamps and checksums.
+That means that you can rename your pipeline source code file without having to rerun the pipeline!
+The tradeoff is that if multiple pipelines are run from the same directory, and save their histories to the
+same SQlite database file, and if their file names overlap (all of these are bad ideas anyway!), this is
+bound to be a source of confusion.</p>
+<p>Luckily, the name and path of the checkpoint file can be also changed for each pipeline</p>
+</div></blockquote>
+</div>
+<div class="section" id="setting-checkpoint-file-names">
+<span id="new-manual-changing-history-file-name"></span><h2>Setting checkpoint file names<a class="headerlink" href="#setting-checkpoint-file-names" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<p>Some file systems do not appear to support SQLite at all:</p>
+<p>There are reports that SQLite databases have <a class="reference external" href="http://beets.radbox.org/blog/sqlite-nightmare.html">file locking problems</a> on Lustre.</p>
+<p class="last">The best solution would be to keep the SQLite database on an alternate compatible file system away from the working directory if possible.</p>
+</div>
+</div></blockquote>
+<div class="section" id="environment-variable-default-ruffus-history-file">
+<h3>environment variable <tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt><a class="headerlink" href="#environment-variable-default-ruffus-history-file" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The name of the checkpoint file is the value of the environment variable <tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt>.</p>
+<blockquote>
+<div>export DEFAULT_RUFFUS_HISTORY_FILE=/some/where/.ruffus_history.sqlite</div></blockquote>
+<p>This gives considerable flexibility, and allows a system-wide policy to be set so that all Ruffus checkpoint files are set logically to particular paths.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">It is your responsibility to make sure that the requisite destination directories for the checkpoint files exist beforehand!</p>
+</div>
+<p>Where this is missing, the checkpoint file defaults to <tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt> in your working directory</p>
+</div></blockquote>
+</div>
+<div class="section" id="setting-the-checkpoint-file-name-manually">
+<h3>Setting the checkpoint file name manually<a class="headerlink" href="#setting-the-checkpoint-file-name-manually" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>This checkpoint file name can always be overridden as a parameter to Ruffus functions:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="n">history_file</span> <span class="o">=</span> <span class="s">"XXX"</span><span class="p">)</span>
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">history_file</span> <span class="o">=</span> <span class="s">"XXX"</span><span class="p">)</span>
+<span class="n">pipeline_printout_graph</span><span class="p">(</span><span class="n">history_file</span> <span class="o">=</span> <span class="s">"XXX"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>There is also built in support in <tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt>. So if you use this module, you can simply add to your command line:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="c"># use a custom checkpoint file</span>
+myscript --checksum_file_name .myscript.ruffus_history.sqlite
+</pre></div>
+</div>
+</div></blockquote>
+<p>This takes precedence over everything else.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="useful-checkpoint-file-name-policies-default-ruffus-history-file">
+<h2>Useful checkpoint file name policies <tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt><a class="headerlink" href="#useful-checkpoint-file-name-policies-default-ruffus-history-file" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>If the pipeline script is called <tt class="docutils literal"><span class="pre">test/bin/scripts/run.me.py</span></tt>, then these are the resulting checkpoint files locations:</div></blockquote>
+<div class="section" id="example-1-same-directory-different-name">
+<h3>Example 1: same directory, different name<a class="headerlink" href="#example-1-same-directory-different-name" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>If the environment variable is:</p>
+<div class="highlight-bash"><div class="highlight"><pre><span class="nb">export </span><span class="nv">DEFAULT_RUFFUS_HISTORY_FILE</span><span class="o">=</span>.<span class="o">{</span>basename<span class="o">}</span>.ruffus_history.sqlite
+</pre></div>
+</div>
+<p>Then the job checkpoint database for <tt class="docutils literal"><span class="pre">run.me.py</span></tt> will be <tt class="docutils literal"><span class="pre">.run.me.ruffus_history.sqlite</span></tt></p>
+<div class="highlight-bash"><div class="highlight"><pre>/test/bin/scripts/run.me.py
+/common/path/for/job_history/scripts/.run.me.ruffus_history.sqlite
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-2-different-directory-same-name">
+<h3>Example 2: Different directory, same name<a class="headerlink" href="#example-2-different-directory-same-name" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="nb">export </span><span class="nv">DEFAULT_RUFFUS_HISTORY_FILE</span><span class="o">=</span>/common/path/for/job_history/.<span class="o">{</span>basename<span class="o">}</span>.ruffus_history.sqlite
+</pre></div>
+</div>
+<div class="highlight-bash"><div class="highlight"><pre>/common/path/for/job_history/.run.me.ruffus_history.sqlite
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-2-different-directory-same-name-but-keep-one-level-of-subdirectory-to-disambiguate">
+<h3>Example 2: Different directory, same name but keep one level of subdirectory to disambiguate<a class="headerlink" href="#example-2-different-directory-same-name-but-keep-one-level-of-subdirectory-to-disambiguate" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="nb">export </span><span class="nv">DEFAULT_RUFFUS_HISTORY_FILE</span><span class="o">=</span>/common/path/for/job_history/<span class="o">{</span>subdir<span class="o">[</span>0<span class="o">]}</span>/.<span class="o">{</span>basename<span class="o">}</span>.ruffus_history.sqlite
+</pre></div>
+</div>
+<div class="highlight-bash"><div class="highlight"><pre>/common/path/for/job_history/scripts/.run.me.ruffus_history.sqlite
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-2-nested-in-common-directory">
+<h3>Example 2: nested in common directory<a class="headerlink" href="#example-2-nested-in-common-directory" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="nb">export </span><span class="nv">DEFAULT_RUFFUS_HISTORY_FILE</span><span class="o">=</span>/common/path/for/job_history/<span class="o">{</span>path<span class="o">}</span>/.<span class="o">{</span>basename<span class="o">}</span>.ruffus_history.sqlite
+</pre></div>
+</div>
+<div class="highlight-bash"><div class="highlight"><pre>/common/path/for/job_history/test/bin/scripts/.run.me.ruffus_history.sqlite
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="regenerating-the-checkpoint-file">
+<span id="new-manual-regenerating-history-file"></span><span id="index-2"></span><h2>Regenerating the checkpoint file<a class="headerlink" href="#regenerating-the-checkpoint-file" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Occasionally you may need to re-generate the checkpoint file.</p>
+<p>This could be necessary:</p>
+<blockquote>
+<div><ul class="simple">
+<li>because you are upgrading from a previous version of Ruffus without checkpoint file support</li>
+<li>on the rare occasions when the SQLite file becomes corrupted and has to deleted</li>
+<li>if you wish to circumvent the file checking of Ruffus after making some manual changes!</li>
+</ul>
+</div></blockquote>
+<p>To do this, it is only necessary to call <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> appropriately:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">CHECKSUM_REGENERATE</span> <span class="o">=</span> <span class="mi">2</span>
+<span class="n">pipeline</span><span class="p">(</span><span class="n">touch_files_only</span> <span class="o">=</span> <span class="n">CHECKSUM_REGENERATE</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Similarly, if you are using <tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt>, you can call:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>myscript --recreate_database
+</pre></div>
+</div>
+</div></blockquote>
+<p>Note that this regenerates the checkpoint file to reflect the existing <em>Input</em>, <em>Output</em> files on disk.
+In other words, the onus is on you to make sure there are no half-formed, corrupt files. On the other hand,
+the pipeline does not need to have been previously run successfully for this to work. Essentially, Ruffus,
+pretends to run the pipeline, while logging all the files with consistent file modication times, stopping
+at the first tasks which appear out of date or incomplete.</p>
+</div></blockquote>
+</div>
+<div class="section" id="rules-for-determining-if-files-are-up-to-date">
+<span id="new-manual-skip-up-to-date-rules"></span><span id="index-3"></span><h2>Rules for determining if files are up to date<a class="headerlink" href="#rules-for-determining-if-files-are-up-to-date" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The following simple rules are used by <em>Ruffus</em>.</p>
+<ol class="arabic">
+<li><p class="first">The pipeline stage will be rerun if:</p>
+<blockquote>
+<div><ul class="simple">
+<li>If any of the <strong>Input</strong> files are new (newer than the <strong>Output</strong> files)</li>
+<li>If any of the <strong>Output</strong> files are missing</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">In addition, it is possible to run jobs which create files from scratch.</p>
+<blockquote>
+<div><ul class="simple">
+<li>If no <strong>Input</strong> file names are supplied, the job will only run if any <em>output</em> file is missing.</li>
+</ul>
+</div></blockquote>
+</li>
+<li><p class="first">Finally, if no <strong>Output</strong> file names are supplied, the job will always run.</p>
+</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="missing-files-generate-exceptions">
+<span id="index-4"></span><h2>Missing files generate exceptions<a class="headerlink" href="#missing-files-generate-exceptions" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If the <em>inputs</em> files for a job are missing, the task function will have no way
+to produce its <em>output</em>. In this case, a <tt class="docutils literal"><span class="pre">MissingInputFileError</span></tt> exception will be raised
+automatically. For example,</p>
+<blockquote>
+<div><div class="highlight-python"><pre>task.MissingInputFileError: No way to run job: Input file ['a.1'] does not exist
+for Job = ["a.1" -> "a.2", "A file"]</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="caveats-coarse-timestamp-resolution">
+<span id="index-5"></span><h2>Caveats: Coarse Timestamp resolution<a class="headerlink" href="#caveats-coarse-timestamp-resolution" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Note that modification times have precision to the nearest second under some older file systems
+(ext2/ext3?). This may be also be true for networked file systems.</p>
+<p><em>Ruffus</em> supplements the file system time resolution by independently recording the timestamp at
+full OS resolution (usually to at least the millisecond) at job completion, when presumably the <strong>Output</strong>
+files will have been created.</p>
+<p>However, <em>Ruffus</em> only does this if the discrepancy between file time and system time is less than a second
+(due to poor file system timestamp resolution). If there are large mismatches between the two, due for example
+to network time slippage, misconfiguration etc, <em>Ruffus</em> reverts to using the file system time and adds a one second
+delay between jobs (via <tt class="docutils literal"><span class="pre">time.sleep()</span></tt>) to make sure input and output file stamps are different.</p>
+<p>If you know that your filesystem has coarse-grained timestamp resolution, you can always revert to this very conservative behaviour,
+at the prices of some annoying 1s pauses, by setting <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(one_second_per_job = True)</em></a></p>
+</div></blockquote>
+</div>
+<div class="section" id="flag-files-checkpointing-for-the-paranoid">
+<span id="index-6"></span><h2>Flag files: Checkpointing for the paranoid<a class="headerlink" href="#flag-files-checkpointing-for-the-paranoid" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>One other way of checkpointing your pipelines is to create an extra “flag” file as an additional
+<strong>Output</strong> file name. The flag file is only created or updated when everything else in the
+job has completed successifully and written to disk. A missing or out of date flag file then
+would be a sign for Ruffus that the task never completed properly in the first place.</p>
+<p>This used to be much the best way of performing checkpointing in Ruffus and is still
+the most bulletproof way of proceeding. For example, even the loss or corruption
+of the checkpoint file, would not affect things greatly.</p>
+<p>Nevertheless flag files are largely superfluous in modern <em>Ruffus</em>.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 10</strong>: Checkpointing: Interrupted Pipelines and Exceptions</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#interrupting-tasks">Interrupting tasks</a></li>
+<li><a class="reference internal" href="#checkpointing-only-log-completed-jobs">Checkpointing: only log completed jobs</a></li>
+<li><a class="reference internal" href="#do-not-share-the-same-checkpoint-file-across-for-multiple-pipelines">Do not share the same checkpoint file across for multiple pipelines!</a></li>
+<li><a class="reference internal" href="#setting-checkpoint-file-names">Setting checkpoint file names</a><ul>
+<li><a class="reference internal" href="#environment-variable-default-ruffus-history-file">environment variable <tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt></a></li>
+<li><a class="reference internal" href="#setting-the-checkpoint-file-name-manually">Setting the checkpoint file name manually</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#useful-checkpoint-file-name-policies-default-ruffus-history-file">Useful checkpoint file name policies <tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt></a><ul>
+<li><a class="reference internal" href="#example-1-same-directory-different-name">Example 1: same directory, different name</a></li>
+<li><a class="reference internal" href="#example-2-different-directory-same-name">Example 2: Different directory, same name</a></li>
+<li><a class="reference internal" href="#example-2-different-directory-same-name-but-keep-one-level-of-subdirectory-to-disambiguate">Example 2: Different directory, same name but keep one level of subdirectory to disambiguate</a></li>
+<li><a class="reference internal" href="#example-2-nested-in-common-directory">Example 2: nested in common directory</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#regenerating-the-checkpoint-file">Regenerating the checkpoint file</a></li>
+<li><a class="reference internal" href="#rules-for-determining-if-files-are-up-to-date">Rules for determining if files are up to date</a></li>
+<li><a class="reference internal" href="#missing-files-generate-exceptions">Missing files generate exceptions</a></li>
+<li><a class="reference internal" href="#caveats-coarse-timestamp-resolution">Caveats: Coarse Timestamp resolution</a></li>
+<li><a class="reference internal" href="#flag-files-checkpointing-for-the-paranoid">Flag files: Checkpointing for the paranoid</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="mkdir.html"
+ title="previous chapter"><strong>Chapter 9</strong>: Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="decorators_compendium.html"
+ title="next chapter"><strong>Chapter 11</strong>: Pipeline topologies and a compendium of <em>Ruffus</em> decorators</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/checkpointing.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="decorators_compendium.html" title="Chapter 11: Pipeline topologies and a compendium of Ruffus decorators"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="mkdir.html" title="Chapter 9: Preparing directories for output with @mkdir()"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/checkpointing_code.html b/doc/_build/html/tutorials/new_tutorial/checkpointing_code.html
new file mode 100644
index 0000000..04b3b91
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/checkpointing_code.html
@@ -0,0 +1,211 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 12: Python Code for Splitting up large tasks / files with @split" href="split_code.html" />
+ <link rel="prev" title="Chapter 9: Python Code for Preparing directories for output with @mkdir()" href="mkdir_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="split_code.html" title="Chapter 12: Python Code for Splitting up large tasks / files with @split"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="mkdir_code.html" title="Chapter 9: Python Code for Preparing directories for output with @mkdir()"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-checkpointing-chapter-num-python-code-for-checkpointing-interrupted-pipelines-and-exceptions">
+<span id="new-manual-checkpointing-code"></span><h1><strong>Chapter 10</strong>: Python Code for Checkpointing: Interrupted Pipelines and Exceptions<a class="headerlink" href="#new-manual-checkpointing-chapter-num-python-code-for-checkpointing-interrupted-pipelines-and-exceptions" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="checkpointing.html#new-manual-checkpointing"><em>Back to |new_manual.checkpointing.chapter_num|: Interrupted Pipelines and Exceptions</em></a></li>
+</ul>
+</div>
+<div class="section" id="code-for-ref-suffix-decorators-suffix-example">
+<h2>Code for .:ref:<cite>suffix() <decorators.suffix></cite> example<a class="headerlink" href="#code-for-ref-suffix-decorators-suffix-example" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 10</strong>: Python Code for Checkpointing: Interrupted Pipelines and Exceptions</a><ul>
+<li><a class="reference internal" href="#code-for-ref-suffix-decorators-suffix-example">Code for .:ref:<cite>suffix() <decorators.suffix></cite> example</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="mkdir_code.html"
+ title="previous chapter"><strong>Chapter 9</strong>: Python Code for Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="split_code.html"
+ title="next chapter"><strong>Chapter 12</strong>: Python Code for Splitting up large tasks / files with <strong>@split</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/checkpointing_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="split_code.html" title="Chapter 12: Python Code for Splitting up large tasks / files with @split"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="mkdir_code.html" title="Chapter 9: Python Code for Preparing directories for output with @mkdir()"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/combinatorics.html b/doc/_build/html/tutorials/new_tutorial/combinatorics.html
new file mode 100644
index 0000000..6c369af
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/combinatorics.html
@@ -0,0 +1,618 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 17: @combinations, @permutations and all versus all @product — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if" href="active_if.html" />
+ <link rel="prev" title="Chapter 16: @subdivide tasks to run efficiently and regroup with @collate" href="subdivide_collate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="active_if.html" title="Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="subdivide_collate.html" title="Chapter 16: @subdivide tasks to run efficiently and regroup with @collate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-combinatorics-chapter-num-combinations-permutations-and-all-versus-all-product">
+<span id="new-manual-combinatorics"></span><span id="index-0"></span><h1><strong>Chapter 17</strong>: <a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a>, <a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a> and all versus all <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a><a cl [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a></li>
+<li><a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a></li>
+<li><a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a></li>
+<li><a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a></li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><a class="reference internal" href="combinatorics_code.html#new-manual-combinatorics-code"><em>Chapter 17: Python Code for @combinations, @permutations and all versus all @product</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A surprising number of computational problems involve some sort of all versus all calculations.
+Previously, this would have required all the parameters to be supplied using a custom function
+on the fly with <a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files</em></a>.</p>
+<p>From version 2.4, <em>Ruffus</em> supports <a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a>,
+<a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a>, <a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a>,
+<a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a>.</p>
+<p>These provide as far as possible all the functionality of the four combinatorics iterators
+from the standard python <a class="reference external" href="http://docs.python.org/2/library/itertools.html">itertools</a>
+functions of the same name.</p>
+</div></blockquote>
+</div>
+<div class="section" id="generating-output-with-formatter">
+<h2>Generating output with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a><a class="headerlink" href="#generating-output-with-formatter" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>String replacement always takes place via <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a>. Unfortunately,
+the other <em>Ruffus</em> workhorses of <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> and <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a>
+do not have sufficient syntactic flexibility.</p>
+<p>Each combinatorics decorator deals with multiple sets of inputs whether this might be:</p>
+<blockquote>
+<div><ul class="simple">
+<li>a self-self comparison (such as <a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a>,
+<a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a>, <a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a>) or,</li>
+<li>a self-other comparison (<a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a>)</li>
+</ul>
+</div></blockquote>
+<p>The replacement strings thus require an extra level of indirection to refer to
+parsed components.</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first level refers to which <em>set</em> of inputs.</li>
+<li>The second level refers to which input file in any particular <em>set</em> of inputs.</li>
+</ol>
+</div></blockquote>
+<p>For example, if the <em>inputs</em> are <strong>[A1,A2],[B1,B2],[C1,C2] vs [P1,P2],[Q1,Q2],[R1,R2] vs [X1,X2],[Y1,Y2],[Z1,Z2]</strong>,
+then <tt class="docutils literal"><span class="pre">'{basename[2][0]}'</span></tt> is the <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.basename">basename</a> for</p>
+<blockquote>
+<div><ul class="simple">
+<li>the third set of inputs (<strong>X,Y,Z</strong>) and</li>
+<li>the first file name string in each <strong>Input</strong> of that set (<strong>X1, Y1, Z1</strong>)</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="all-vs-all-comparisons-with-product">
+<span id="new-manual-product"></span><h2>All vs all comparisons with <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a><a class="headerlink" href="#all-vs-all-comparisons-with-product" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a> generates the Cartesian <strong>product</strong> between sets of input files,
+i.e. all vs all comparisons.</p>
+<p>The effect is analogous to a nested for loop.</p>
+<p><a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a> can be useful, for example, in bioinformatics for finding
+the corresponding genes (orthologues) for a set of proteins in multiple species.</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">product</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># product('ABC', 'XYZ') --> AX AY AZ BX BY BZ CX CY CZ</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">product</span><span class="p">(</span><span class="s">'ABC'</span><span class="p">,</span> <span class="s">'XYZ'</span><span class="p">)]</span>
+<span class="go">['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']</span>
+</pre></div>
+</div>
+<p>This example Calculates the <strong>@product</strong> of <strong>A,B</strong> and <strong>P,Q</strong> and <strong>X,Y</strong> files</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"><span class="c"># Three sets of initial files</span>
+</span><span class="nd">@originate</span><span class="p">([</span> <span class="s">'a.start'</span><span class="p">,</span> <span class="s">'b.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ab</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="nd">@originate</span><span class="p">([</span> <span class="s">'p.start'</span><span class="p">,</span> <span class="s">'q.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_pq</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'x.1_start'</span><span class="p">,</span> <span class="s">'x.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'y.1_start'</span><span class="p">,</span> <span class="s">'y.2_start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_xy</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+<span class="hll">
+</span><span class="c"># @product</span>
+<span class="hll"><span class="nd">@product</span><span class="p">(</span> <span class="n">create_initial_files_ab</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 1</span>
+
+<span class="hll"> <span class="n">create_initial_files_pq</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 2</span>
+
+<span class="hll"> <span class="n">create_initial_files_xy</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 3</span>
+<span class="hll">
+</span><span class="hll"> <span class="s">"{path[0][0]}/"</span> <span class="c"># Output Replacement string</span>
+</span><span class="hll"> <span class="s">"{basename[0][0]}_vs_"</span> <span class="c">#</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}_vs_"</span> <span class="c">#</span>
+</span> <span class="s">"{basename[2][0]}.product"</span><span class="p">,</span> <span class="c">#</span>
+<span class="hll">
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+<span class="hll">
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># Extra parameter: basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+</span> <span class="s">"{basename[2][0]}"</span><span class="p">,</span> <span class="c"># 3rd</span>
+ <span class="p">])</span>
+<span class="k">def</span> <span class="nf">product_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"# basenames = "</span><span class="p">,</span> <span class="s">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">"input_parameter = "</span><span class="p">,</span> <span class="n">input_file</span>
+ <span class="k">print</span> <span class="s">"output_parameter = "</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="hll">
+</span><span class="go"># basenames = a p x</span>
+<span class="go">input_parameter = ('a.start', 'p.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_p_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a p y</span>
+<span class="go">input_parameter = ('a.start', 'p.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_p_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a q x</span>
+<span class="go">input_parameter = ('a.start', 'q.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_q_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a q y</span>
+<span class="go">input_parameter = ('a.start', 'q.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_q_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b p x</span>
+<span class="go">input_parameter = ('b.start', 'p.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_p_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b p y</span>
+<span class="go">input_parameter = ('b.start', 'p.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_p_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b q x</span>
+<span class="go">input_parameter = ('b.start', 'q.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_q_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b q y</span>
+<span class="go">input_parameter = ('b.start', 'q.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_q_vs_y.product</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="permute-all-k-tuple-orderings-of-inputs-without-repeats-using-permutations">
+<span id="new-manual-permutations"></span><h2>Permute all k-tuple orderings of inputs without repeats using <a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a><a class="headerlink" href="#permute-all-k-tuple-orderings-of-inputs-without-repeats-using-permutations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>Generates the <strong>permutations</strong> for all the elements of a set of <strong>Input</strong> (e.g. <strong>A B C D</strong>),</dt>
+<dd><ul class="first last simple">
+<li>r-length tuples of <em>input</em> elements</li>
+<li>excluding repeated elements (<strong>A A</strong>)</li>
+<li>and order of the tuples is significant (both <strong>A B</strong> and <strong>B A</strong>).</li>
+</ul>
+</dd>
+</dl>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">permutations</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">permutations</span><span class="p">(</span><span class="s">"ABCD"</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
+<span class="go">['AB', 'AC', 'AD', 'BA', 'BC', 'BD', 'CA', 'CB', 'CD', 'DA', 'DB', 'DC']</span>
+</pre></div>
+</div>
+<p>This following example calculates the <strong>@permutations</strong> of <strong>A,B,C,D</strong> files</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @permutations</span>
+</span><span class="nd">@permutations</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 2 at a time</span>
+</span> <span class="mi">2</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}.permutations"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">permutations_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+<span class="go">A - B</span>
+<span class="go">A - C</span>
+<span class="go">A - D</span>
+<span class="go">B - A</span>
+<span class="go">B - C</span>
+<span class="go">B - D</span>
+<span class="go">C - A</span>
+<span class="go">C - B</span>
+<span class="go">C - D</span>
+<span class="go">D - A</span>
+<span class="go">D - B</span>
+<span class="go">D - C</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="select-unordered-k-tuples-within-inputs-excluding-repeated-elements-using-combinations">
+<span id="new-manual-combinations"></span><h2>Select unordered k-tuples within inputs excluding repeated elements using <a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a><a class="headerlink" href="#select-unordered-k-tuples-within-inputs-excluding-repeated-elements-using-combinations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><dl class="docutils">
+<dt>Generates the <strong>combinations</strong> for all the elements of a set of <strong>Input</strong> (e.g. <strong>A B C D</strong>),</dt>
+<dd><ul class="first last simple">
+<li>r-length tuples of <em>input</em> elements</li>
+<li>without repeated elements (<strong>A A</strong>)</li>
+<li>where order of the tuples is irrelevant (either <strong>A B</strong> or <strong>B A</strong>, not both).</li>
+</ul>
+</dd>
+</dl>
+<p><a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a> can be useful, for example, in calculating a transition probability matrix
+for a set of states. The diagonals are meaningless “self-self” transitions which are excluded.</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">combinations</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># combinations('ABCD', 3) --> ABC ABD ACD BCD</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">combinations</span><span class="p">(</span><span class="s">"ABCD"</span><span class="p">,</span> <span class="mi">3</span><span class="p">)]</span>
+<span class="go">['ABC', 'ABD', 'ACD', 'BCD']</span>
+</pre></div>
+</div>
+<p>This example calculates the <strong>@combinations</strong> of <strong>A,B,C,D</strong> files</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @combinations</span>
+</span><span class="nd">@combinations</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 3 at a time</span>
+</span> <span class="mi">3</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}_vs_"</span>
+ <span class="s">"{basename[2][1]}.combinations"</span><span class="p">,</span>
+<span class="hll">
+</span> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+ <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+<span class="hll">
+</span><span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+ <span class="s">"{basename[2][0]}"</span><span class="p">,</span> <span class="c"># 3rd</span>
+ <span class="p">])</span>
+<span class="k">def</span> <span class="nf">combinations_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">A - B - C</span>
+<span class="go">A - B - D</span>
+<span class="go">A - C - D</span>
+<span class="go">B - C - D</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="select-unordered-k-tuples-within-inputs-including-repeated-elements-with-combinations-with-replacement">
+<span id="new-manual-combinations-with-replacement"></span><h2>Select unordered k-tuples within inputs <em>including</em> repeated elements with <a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a><a class="headerlink" href="#select-unordered-k-tuples-within-inputs-including-repeated-elements-with-combinations-with-replacement" title="Permalink to this headline">¶< [...]
+<blockquote>
+<div><dl class="docutils">
+<dt>Generates the <strong>combinations_with_replacement</strong> for all the elements of a set of <strong>Input</strong> (e.g. <strong>A B C D</strong>),</dt>
+<dd><ul class="first last simple">
+<li>r-length tuples of <em>input</em> elements</li>
+<li>including repeated elements (<strong>A A</strong>)</li>
+<li>where order of the tuples is irrelevant (either <strong>A B</strong> or <strong>B A</strong>, not both).</li>
+</ul>
+</dd>
+</dl>
+<p><a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a> can be useful,
+for example, in bioinformatics for finding evolutionary relationships between genetic elements such as proteins
+and genes. Self-self comparisons can be used a baseline for scaling similarity scores.</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">combinations_with_replacement</span>
+<span class="hll"><span class="gp">>>> </span><span class="c"># combinations_with_replacement('ABCD', 2) --> AA AB AC AD BB BC BD CC CD DD</span>
+</span><span class="gp">>>> </span><span class="p">[</span> <span class="s">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">combinations_with_replacement</span><span class="p">(</span><span class="s">'ABCD'</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
+<span class="go">['AA', 'AB', 'AC', 'AD', 'BB', 'BC', 'BD', 'CC', 'CD', 'DD']</span>
+</pre></div>
+</div>
+<p>This example calculates the <strong>@combinations_with_replacement</strong> of <strong>A,B,C,D</strong> files</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @combinations_with_replacement</span>
+</span><span class="nd">@combinations_with_replacement</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 2 at a time</span>
+</span> <span class="mi">2</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}.combinations_with_replacement"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2rd</span>
+</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">combinations_with_replacement_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">A - A</span>
+<span class="go">A - B</span>
+<span class="go">A - C</span>
+<span class="go">A - D</span>
+<span class="go">B - B</span>
+<span class="go">B - C</span>
+<span class="go">B - D</span>
+<span class="go">C - C</span>
+<span class="go">C - D</span>
+<span class="go">D - D</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 17</strong>: <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#generating-output-with-formatter">Generating output with <tt class="docutils literal"><span class="pre">formatter()</span></tt></a></li>
+<li><a class="reference internal" href="#all-vs-all-comparisons-with-product">All vs all comparisons with <tt class="docutils literal"><span class="pre">@product</span></tt></a></li>
+<li><a class="reference internal" href="#permute-all-k-tuple-orderings-of-inputs-without-repeats-using-permutations">Permute all k-tuple orderings of inputs without repeats using <tt class="docutils literal"><span class="pre">@permutations</span></tt></a></li>
+<li><a class="reference internal" href="#select-unordered-k-tuples-within-inputs-excluding-repeated-elements-using-combinations">Select unordered k-tuples within inputs excluding repeated elements using <tt class="docutils literal"><span class="pre">@combinations</span></tt></a></li>
+<li><a class="reference internal" href="#select-unordered-k-tuples-within-inputs-including-repeated-elements-with-combinations-with-replacement">Select unordered k-tuples within inputs <em>including</em> repeated elements with <tt class="docutils literal"><span class="pre">@combinations_with_replacement</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="subdivide_collate.html"
+ title="previous chapter"><strong>Chapter 16</strong>: <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="active_if.html"
+ title="next chapter"><strong>Chapter 18</strong>: Turning parts of the pipeline on and off at runtime with <tt class="docutils literal"><span class="pre">@active_if</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/combinatorics.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="active_if.html" title="Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="subdivide_collate.html" title="Chapter 16: @subdivide tasks to run efficiently and regroup with @collate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/combinatorics_code.html b/doc/_build/html/tutorials/new_tutorial/combinatorics_code.html
new file mode 100644
index 0000000..92b5ac0
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/combinatorics_code.html
@@ -0,0 +1,488 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 17: Python Code for @combinations, @permutations and all versus all @product — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()" href="inputs_code.html" />
+ <link rel="prev" title="Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate" href="subdivide_collate_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="inputs_code.html" title="Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="subdivide_collate_code.html" title="Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-combinatorics-chapter-num-python-code-for-combinations-permutations-and-all-versus-all-product">
+<span id="new-manual-combinatorics-code"></span><h1><strong>Chapter 17</strong>: Python Code for <a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a>, <a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a> and all versus all <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a><a class=" [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a></li>
+<li><a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a></li>
+<li><a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a></li>
+<li><a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a></li>
+<li>Back to <strong>Chapter 17</strong>: <a class="reference internal" href="combinatorics.html#new-manual-combinatorics"><em>Preparing directories for output with @combinatorics()</em></a></li>
+</ul>
+</div>
+<div class="section" id="example-code-for-product">
+<h2>Example code for <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a><a class="headerlink" href="#example-code-for-product" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"><span class="c"># Three sets of initial files</span>
+</span><span class="nd">@originate</span><span class="p">([</span> <span class="s">'a.start'</span><span class="p">,</span> <span class="s">'b.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ab</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="nd">@originate</span><span class="p">([</span> <span class="s">'p.start'</span><span class="p">,</span> <span class="s">'q.start'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_pq</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'x.1_start'</span><span class="p">,</span> <span class="s">'x.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'y.1_start'</span><span class="p">,</span> <span class="s">'y.2_start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_xy</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+<span class="hll">
+</span><span class="c"># @product</span>
+<span class="hll"><span class="nd">@product</span><span class="p">(</span> <span class="n">create_initial_files_ab</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 1</span>
+
+<span class="hll"> <span class="n">create_initial_files_pq</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 2</span>
+
+<span class="hll"> <span class="n">create_initial_files_xy</span><span class="p">,</span> <span class="c"># Input</span>
+</span> <span class="n">formatter</span><span class="p">(</span><span class="s">"(.start)$"</span><span class="p">),</span> <span class="c"># match input file set # 3</span>
+<span class="hll">
+</span><span class="hll"> <span class="s">"{path[0][0]}/"</span> <span class="c"># Output Replacement string</span>
+</span><span class="hll"> <span class="s">"{basename[0][0]}_vs_"</span> <span class="c">#</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}_vs_"</span> <span class="c">#</span>
+</span> <span class="s">"{basename[2][0]}.product"</span><span class="p">,</span> <span class="c">#</span>
+<span class="hll">
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+<span class="hll">
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># Extra parameter: basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+</span> <span class="s">"{basename[2][0]}"</span><span class="p">,</span> <span class="c"># 3rd</span>
+ <span class="p">])</span>
+<span class="k">def</span> <span class="nf">product_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"# basenames = "</span><span class="p">,</span> <span class="s">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">"input_parameter = "</span><span class="p">,</span> <span class="n">input_file</span>
+ <span class="k">print</span> <span class="s">"output_parameter = "</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="hll">
+</span><span class="go"># basenames = a p x</span>
+<span class="go">input_parameter = ('a.start', 'p.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_p_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a p y</span>
+<span class="go">input_parameter = ('a.start', 'p.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_p_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a q x</span>
+<span class="go">input_parameter = ('a.start', 'q.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_q_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = a q y</span>
+<span class="go">input_parameter = ('a.start', 'q.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/a_vs_q_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b p x</span>
+<span class="go">input_parameter = ('b.start', 'p.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_p_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b p y</span>
+<span class="go">input_parameter = ('b.start', 'p.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_p_vs_y.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b q x</span>
+<span class="go">input_parameter = ('b.start', 'q.start', 'x.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_q_vs_x.product</span>
+<span class="hll">
+</span><span class="go"># basenames = b q y</span>
+<span class="go">input_parameter = ('b.start', 'q.start', 'y.start')</span>
+<span class="go">output_parameter = /home/lg/temp/b_vs_q_vs_y.product</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-permutations">
+<h2>Example code for <a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a><a class="headerlink" href="#example-code-for-permutations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @permutations</span>
+</span><span class="nd">@permutations</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 2 at a time</span>
+</span> <span class="mi">2</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}.permutations"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">permutations_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+<span class="go">A - B</span>
+<span class="go">A - C</span>
+<span class="go">A - D</span>
+<span class="go">B - A</span>
+<span class="go">B - C</span>
+<span class="go">B - D</span>
+<span class="go">C - A</span>
+<span class="go">C - B</span>
+<span class="go">C - D</span>
+<span class="go">D - A</span>
+<span class="go">D - B</span>
+<span class="go">D - C</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-combinations">
+<h2>Example code for <a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a><a class="headerlink" href="#example-code-for-combinations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @combinations</span>
+</span><span class="nd">@combinations</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 3 at a time</span>
+</span> <span class="mi">3</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}_vs_"</span>
+ <span class="s">"{basename[2][1]}.combinations"</span><span class="p">,</span>
+<span class="hll">
+</span> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+ <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+<span class="hll">
+</span><span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2nd</span>
+ <span class="s">"{basename[2][0]}"</span><span class="p">,</span> <span class="c"># 3rd</span>
+ <span class="p">])</span>
+<span class="k">def</span> <span class="nf">combinations_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">A - B - C</span>
+<span class="go">A - B - D</span>
+<span class="go">A - C - D</span>
+<span class="go">B - C - D</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-combinations-with-replacement">
+<h2>Example code for <a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a><a class="headerlink" href="#example-code-for-combinations-with-replacement" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># initial file pairs</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'A.1_start'</span><span class="p">,</span> <span class="s">'A.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'B.1_start'</span><span class="p">,</span> <span class="s">'B.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'C.1_start'</span><span class="p">,</span> <span class="s">'C.2_start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'D.1_start'</span><span class="p">,</span> <span class="s">'D.2_start'</span><span class="p">]])</span>
+<span class="k">def</span> <span class="nf">create_initial_files_ABCD</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># @combinations_with_replacement</span>
+</span><span class="nd">@combinations_with_replacement</span><span class="p">(</span><span class="n">create_initial_files_ABCD</span><span class="p">,</span> <span class="c"># Input</span>
+ <span class="n">formatter</span><span class="p">(),</span> <span class="c"># match input files</span>
+
+<span class="hll"> <span class="c"># tuple of 2 at a time</span>
+</span> <span class="mi">2</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Output Replacement string</span>
+</span> <span class="s">"{path[0][0]}/"</span>
+ <span class="s">"{basename[0][1]}_vs_"</span>
+ <span class="s">"{basename[1][1]}.combinations_with_replacement"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter: path for 1st set of files, 1st file name</span>
+</span> <span class="s">"{path[0][0]}"</span><span class="p">,</span>
+
+<span class="hll"> <span class="c"># Extra parameter</span>
+</span><span class="hll"> <span class="p">[</span><span class="s">"{basename[0][0]}"</span><span class="p">,</span> <span class="c"># basename for 1st set of files, 1st file name</span>
+</span><span class="hll"> <span class="s">"{basename[1][0]}"</span><span class="p">,</span> <span class="c"># 2rd</span>
+</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">combinations_with_replacement_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_parameter</span><span class="p">,</span> <span class="n">shared_path</span><span class="p">,</span> <span class="n">basenames</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" - "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basenames</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">A - A</span>
+<span class="go">A - B</span>
+<span class="go">A - C</span>
+<span class="go">A - D</span>
+<span class="go">B - B</span>
+<span class="go">B - C</span>
+<span class="go">B - D</span>
+<span class="go">C - C</span>
+<span class="go">C - D</span>
+<span class="go">D - D</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 17</strong>: Python Code for <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a><ul>
+<li><a class="reference internal" href="#example-code-for-product">Example code for <tt class="docutils literal"><span class="pre">@product</span></tt></a></li>
+<li><a class="reference internal" href="#example-code-for-permutations">Example code for <tt class="docutils literal"><span class="pre">@permutations</span></tt></a></li>
+<li><a class="reference internal" href="#example-code-for-combinations">Example code for <tt class="docutils literal"><span class="pre">@combinations</span></tt></a></li>
+<li><a class="reference internal" href="#example-code-for-combinations-with-replacement">Example code for <tt class="docutils literal"><span class="pre">@combinations_with_replacement</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="subdivide_collate_code.html"
+ title="previous chapter"><strong>Chapter 16</strong>: Python Code for <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="inputs_code.html"
+ title="next chapter"><strong>Chapter 20</strong>: Python Code for Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/combinatorics_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="inputs_code.html" title="Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="subdivide_collate_code.html" title="Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/command_line.html b/doc/_build/html/tutorials/new_tutorial/command_line.html
new file mode 100644
index 0000000..87d4fff
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/command_line.html
@@ -0,0 +1,515 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 6: Running Ruffus from the command line with ruffus.cmdline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)" href="pipeline_printout_graph.html" />
+ <link rel="prev" title="Chapter 5: Understanding how your pipeline works with pipeline_printout(...)" href="pipeline_printout.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout_graph.html" title="Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout.html" title="Chapter 5: Understanding how your pipeline works with pipeline_printout(...)"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="#">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-cmdline-chapter-num-running-ruffus-from-the-command-line-with-ruffus-cmdline">
+<span id="new-manual-cmdline"></span><span id="index-0"></span><h1><strong>Chapter 6</strong>: Running <em>Ruffus</em> from the command line with ruffus.cmdline<a class="headerlink" href="#new-manual-cmdline-chapter-num-running-ruffus-from-the-command-line-with-ruffus-cmdline" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual table of Contents</em></a></li>
+</ul>
+</div>
+<p>We find that much of our <em>Ruffus</em> pipeline code is built on the same template and this is generally
+a good place to start developing a new pipeline.</p>
+<p>From version 2.4, <em>Ruffus</em> includes an optional <tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt> module that provides
+support for a set of common command line arguments. This makes writing <em>Ruffus</em> pipelines much more pleasant.</p>
+<div class="section" id="template-for-argparse">
+<span id="new-manual-cmdline-setup-logging"></span><span id="new-manual-cmdline-run"></span><span id="new-manual-cmdline-get-argparse"></span><h2>Template for <a class="reference external" href="http://docs.python.org/2.7/library/argparse.html">argparse</a><a class="headerlink" href="#template-for-argparse" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>All you need to do is copy these 6 lines</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">ruffus.cmdline</span> <span class="kn">as</span> <span class="nn">cmdline</span>
+
+<span class="n">parser</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">get_argparse</span><span class="p">(</span><span class="n">description</span><span class="o">=</span><span class="s">'WHAT DOES THIS PIPELINE DO?'</span><span class="p">)</span>
+
+<span class="hll"><span class="c"># <<<---- add your own command line options like --input_file here</span>
+</span><span class="c"># parser.add_argument("--input_file")</span>
+
+<span class="n">options</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+
+<span class="c"># standard python logger which can be synchronised across concurrent Ruffus tasks</span>
+<span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="n">__name__</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class= [...]
+
+<span class="hll"><span class="c"># <<<---- pipelined functions go here</span>
+</span>
+<span class="n">cmdline</span><span class="o">.</span><span class="n">run</span> <span class="p">(</span><span class="n">options</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>You are recommended to use the standard <a class="reference external" href="http://docs.python.org/2.7/library/argparse.html">argparse</a> module
+but the deprecated <a class="reference external" href="http://docs.python.org/2.7/library/optparse.html">optparse</a> module works as well. (See <a class="reference internal" href="#code-template-optparse"><em>below</em></a> for the template)</p>
+</div></blockquote>
+</div>
+<div class="section" id="command-line-arguments">
+<h2>Command Line Arguments<a class="headerlink" href="#command-line-arguments" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt> by default provides these predefined options:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>-v, --verbose
+ --version
+-L, --log_file
+
+<span class="hll"> <span class="c"># tasks</span>
+</span>-T, --target_tasks
+ --forced_tasks
+-j, --jobs
+ --use_threads
+
+
+<span class="hll"> <span class="c"># printout</span>
+</span>-n, --just_print
+
+<span class="hll"> <span class="c"># flow chart</span>
+</span> --flowchart
+ --key_legend_in_graph
+ --draw_graph_horizontally
+ --flowchart_format
+
+
+<span class="hll"> <span class="c"># check sum</span>
+</span> --touch_files_only
+ --checksum_file_name
+ --recreate_database
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="logging">
+<h2>1) Logging<a class="headerlink" href="#logging" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The script provides for logging both to the command line:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>myscript -v
+myscript --verbose
+</pre></div>
+</div>
+</div></blockquote>
+<p>and an optional log file:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="c"># keep tabs on yourself</span>
+myscript --log_file /var/log/secret.logbook
+</pre></div>
+</div>
+</div></blockquote>
+<p>Logging is ignored if neither <tt class="docutils literal"><span class="pre">--verbose</span></tt> or <tt class="docutils literal"><span class="pre">--log_file</span></tt> are specified on the command line</p>
+<p><tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt> automatically allows you to write to a shared log file via a proxy from multiple processes.
+However, you do need to use <tt class="docutils literal"><span class="pre">logging_mutex</span></tt> for the log files to be synchronised properly across different jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+
+ <span class="n">logger_proxy</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Look Ma. No hands"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Logging is set up so that you can write</p>
+</div></blockquote>
+<div class="section" id="a-only-to-the-log-file">
+<h3>A) Only to the log file:<a class="headerlink" href="#a-only-to-the-log-file" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"A message"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="b-only-to-the-display">
+<h3>B) Only to the display:<a class="headerlink" href="#b-only-to-the-display" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s">"A message"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="c-to-both-simultaneously">
+<span id="new-manual-cmdline-message"></span><h3>C) To both simultaneously:<a class="headerlink" href="#c-to-both-simultaneously" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus.cmdline</span> <span class="kn">import</span> <span class="n">MESSAGE</span>
+
+<span class="n">logger</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">MESSAGE</span><span class="p">,</span> <span class="s">"A message"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="tracing-pipeline-progress">
+<h2>2) Tracing pipeline progress<a class="headerlink" href="#tracing-pipeline-progress" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This is extremely useful for understanding what is happening with your pipeline, what tasks and which
+jobs are up-to-date etc.</p>
+<p>See <a class="reference internal" href="pipeline_printout.html#new-manual-pipeline-printout"><em>Chapter 5: Understanding how your pipeline works with pipeline_printout(...)</em></a></p>
+<p>To trace the pipeline, call script with the following options</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="c"># well-mannered, reserved</span>
+myscript --just_print
+myscript -n
+
+or
+
+<span class="c"># extremely loquacious</span>
+myscript --just_print --verbose 5
+myscript -n -v5
+</pre></div>
+</div>
+</div></blockquote>
+<p>Increasing levels of verbosity (<tt class="docutils literal"><span class="pre">--verbose</span></tt> to <tt class="docutils literal"><span class="pre">--verbose</span> <span class="pre">5</span></tt>) provide more detailed output</p>
+</div></blockquote>
+</div>
+<div class="section" id="printing-a-flowchart">
+<h2>3) Printing a flowchart<a class="headerlink" href="#printing-a-flowchart" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This is the subject of <a class="reference internal" href="pipeline_printout_graph.html#new-manual-pipeline-printout-graph"><em>Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)</em></a>.</p>
+<p>Flowcharts can be specified using the following option:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>myscript --flowchart xxxchart.svg
+</pre></div>
+</div>
+</div></blockquote>
+<p>The extension of the flowchart file indicates what format the flowchart should take,
+for example, <tt class="docutils literal"><span class="pre">svg</span></tt>, <tt class="docutils literal"><span class="pre">jpg</span></tt> etc.</p>
+<p>Override with <tt class="docutils literal"><span class="pre">--flowchart_format</span></tt></p>
+</div></blockquote>
+</div>
+<div class="section" id="running-in-parallel-on-multiple-processors">
+<h2>4) Running in parallel on multiple processors<a class="headerlink" href="#running-in-parallel-on-multiple-processors" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Optionally specify the number of parallel strands of execution and which is the last <em>target</em> task to run.
+The pipeline will run starting from any out-of-date tasks which precede the <em>target</em> and proceed no further
+beyond the <em>target</em>.</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>myscript --jobs 15 --target_tasks <span class="s2">"final_task"</span>
+myscript -j 15
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="setup-checkpointing-so-that-ruffus-knows-which-files-are-out-of-date">
+<h2>5) Setup checkpointing so that <em>Ruffus</em> knows which files are out of date<a class="headerlink" href="#setup-checkpointing-so-that-ruffus-knows-which-files-are-out-of-date" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <a class="reference internal" href="checkpointing.html#new-manual-checkpointing"><em>checkpoint file</em></a> uses to the value set in the
+environment (<tt class="docutils literal"><span class="pre">DEFAULT_RUFFUS_HISTORY_FILE</span></tt>).</p>
+<p>If this is not set, it will default to <tt class="docutils literal"><span class="pre">.ruffus_history.sqlite</span></tt> in the current working directory.</p>
+<p>Either can be changed on the command line:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>myscript --checksum_file_name mychecksum.sqlite
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="recreating-checkpoints">
+<h3>Recreating checkpoints<a class="headerlink" href="#recreating-checkpoints" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Create or update the checkpoint file so that all existing files in completed jobs appear up to date</p>
+<p>Will stop sensibly if current state is incomplete or inconsistent</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">myscript</span> <span class="o">--</span><span class="n">recreate_database</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="touch-files">
+<h3>Touch files<a class="headerlink" href="#touch-files" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>As far as possible, create empty files with the correct timestamp to make the pipeline appear up to date.</p>
+<div class="highlight-bash"><div class="highlight"><pre>myscript --touch_files_only
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="skipping-specified-options">
+<h2>6) Skipping specified options<a class="headerlink" href="#skipping-specified-options" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Note that particular options can be skipped (not added to the command line), if they conflict with your own options, for example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># see below for how to use get_argparse</span>
+<span class="n">parser</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">get_argparse</span><span class="p">(</span> <span class="n">description</span><span class="o">=</span><span class="s">'WHAT DOES THIS PIPELINE DO?'</span><span class="p">,</span>
+<span class="hll"> <span class="c"># Exclude the following options: --log_file --key_legend_in_graph</span>
+</span> <span class="n">ignored_args</span> <span class="o">=</span> <span class="p">[</span><span class="s">"log_file"</span><span class="p">,</span> <span class="s">"key_legend_in_graph"</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="specifying-verbosity-and-abbreviating-long-paths">
+<h2>7) Specifying verbosity and abbreviating long paths<a class="headerlink" href="#specifying-verbosity-and-abbreviating-long-paths" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The verbosity can be specified on the command line</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>myscript --verbose 5
+
+<span class="c"># verbosity of 5 + 1 = 6</span>
+myscript --verbose 5 --verbose
+
+<span class="c"># verbosity reset to 2</span>
+myscript --verbose 5 --verbose --verbose 2
+</pre></div>
+</div>
+</div></blockquote>
+<p>If the printed paths are too long, and need to be abbreviated, or alternatively, if you want see the full absolute paths of your input and output parameters,
+you can specify an extension to the verbosity. See the manual discussion of <a class="reference internal" href="pipeline_printout.html#new-manual-pipeline-printout-verbose-abbreviated-path"><em>verbose_abbreviated_path</em></a> for
+more details. This is specified as <tt class="docutils literal"><span class="pre">--verbose</span> <span class="pre">VERBOSITY:VERBOSE_ABBREVIATED_PATH</span></tt>. (No spaces!)</p>
+<p>For example:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre> <span class="c"># verbosity of 4</span>
+ myscript.py --verbose 4
+
+<span class="hll"> <span class="c"># display three levels of nested directories</span>
+</span> myscript.py --verbose 4:3
+
+<span class="hll"> <span class="c"># restrict input and output parameters to 60 letters</span>
+</span> myscript.py --verbose 4:-60
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="displaying-the-version">
+<h2>8) Displaying the version<a class="headerlink" href="#displaying-the-version" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Note that the version for your script will default to <tt class="docutils literal"><span class="pre">"%(prog)s</span> <span class="pre">1.0"</span></tt> unless specified:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">parser</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">get_argparse</span><span class="p">(</span> <span class="n">description</span><span class="o">=</span><span class="s">'WHAT DOES THIS PIPELINE DO?'</span><span class="p">,</span>
+ <span class="n">version</span> <span class="o">=</span> <span class="s">"my_programme.py v. 2.23"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="template-for-optparse">
+<span id="code-template-optparse"></span><h2>Template for <a class="reference external" href="http://docs.python.org/2.7/library/optparse.html">optparse</a><a class="headerlink" href="#template-for-optparse" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>deprecated since python 2.7</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># Using optparse (new in python v 2.6)</span>
+<span class="c">#</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="n">parser</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">get_optgparse</span><span class="p">(</span><span class="n">version</span><span class="o">=</span><span class="s">"%prog 1.0"</span><span class="p">,</span> <span class="n">usage</span> <span class="o">=</span> <span class="s">"</span><span class="se">\n\n</span><span class="s"> %prog [options]"</span><span class="p">)</span>
+
+<span class="hll"><span class="c"># <<<---- add your own command line options like --input_file here</span>
+</span><span class="c"># parser.add_option("-i", "--input_file", dest="input_file", help="Input file")</span>
+
+<span class="p">(</span><span class="n">options</span><span class="p">,</span> <span class="n">remaining_args</span><span class="p">)</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+
+<span class="c"># logger which can be passed to ruffus tasks</span>
+<span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="s">"this_program"</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</s [...]
+
+<span class="hll"><span class="c"># <<<---- pipelined functions go here</span>
+</span>
+<span class="n">cmdline</span><span class="o">.</span><span class="n">run</span> <span class="p">(</span><span class="n">options</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 6</strong>: Running <em>Ruffus</em> from the command line with ruffus.cmdline</a><ul>
+<li><a class="reference internal" href="#template-for-argparse">Template for argparse</a></li>
+<li><a class="reference internal" href="#command-line-arguments">Command Line Arguments</a></li>
+<li><a class="reference internal" href="#logging">1) Logging</a><ul>
+<li><a class="reference internal" href="#a-only-to-the-log-file">A) Only to the log file:</a></li>
+<li><a class="reference internal" href="#b-only-to-the-display">B) Only to the display:</a></li>
+<li><a class="reference internal" href="#c-to-both-simultaneously">C) To both simultaneously:</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#tracing-pipeline-progress">2) Tracing pipeline progress</a></li>
+<li><a class="reference internal" href="#printing-a-flowchart">3) Printing a flowchart</a></li>
+<li><a class="reference internal" href="#running-in-parallel-on-multiple-processors">4) Running in parallel on multiple processors</a></li>
+<li><a class="reference internal" href="#setup-checkpointing-so-that-ruffus-knows-which-files-are-out-of-date">5) Setup checkpointing so that <em>Ruffus</em> knows which files are out of date</a><ul>
+<li><a class="reference internal" href="#recreating-checkpoints">Recreating checkpoints</a></li>
+<li><a class="reference internal" href="#touch-files">Touch files</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#skipping-specified-options">6) Skipping specified options</a></li>
+<li><a class="reference internal" href="#specifying-verbosity-and-abbreviating-long-paths">7) Specifying verbosity and abbreviating long paths</a></li>
+<li><a class="reference internal" href="#displaying-the-version">8) Displaying the version</a></li>
+<li><a class="reference internal" href="#template-for-optparse">Template for optparse</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="pipeline_printout.html"
+ title="previous chapter"><strong>Chapter 5</strong>: Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="pipeline_printout_graph.html"
+ title="next chapter"><strong>Chapter 7</strong>: Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/command_line.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout_graph.html" title="Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout.html" title="Chapter 5: Understanding how your pipeline works with pipeline_printout(...)"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="#">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/decorators_compendium.html b/doc/_build/html/tutorials/new_tutorial/decorators_compendium.html
new file mode 100644
index 0000000..4e1fa6c
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/decorators_compendium.html
@@ -0,0 +1,364 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 11: Pipeline topologies and a compendium of Ruffus decorators — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 12: Splitting up large tasks / files with @split" href="split.html" />
+ <link rel="prev" title="Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions" href="checkpointing.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 12: Splitting up large tasks / files with @split"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="checkpointing.html" title="Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-decorators-compendium-chapter-num-pipeline-topologies-and-a-compendium-of-ruffus-decorators">
+<span id="new-manual-decorators-compendium"></span><span id="index-0"></span><h1><strong>Chapter 11</strong>: Pipeline topologies and a compendium of <em>Ruffus</em> decorators<a class="headerlink" href="#new-manual-decorators-compendium-chapter-num-pipeline-topologies-and-a-compendium-of-ruffus-decorators" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/decorators.html#decorators"><em>decorators</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Computational pipelines transform your data in stages until the final result is produced.</p>
+<p>You can visualise your pipeline data flowing like water down a system of pipes.
+<em>Ruffus</em> has many ways of joining up your pipes to create different topologies.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>The best way to design a pipeline is to:</strong></p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><strong>Write down the file names of the data as it flows across your pipeline.</strong></li>
+<li><strong>Draw lines between the file names to show how they should be connected together.</strong></li>
+</ul>
+</div></blockquote>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="transform">
+<h2><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a><a class="headerlink" href="#transform" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>So far, our data files have been flowing through our pipelines independently in lockstep.</p>
+<a class="reference internal image-reference" href="../../_images/bestiary_transform.png"><img alt="../../_images/bestiary_transform.png" src="../../_images/bestiary_transform.png" style="width: 733.5px; height: 262.0px;" /></a>
+<p>If we drew a graph of the data files moving through the pipeline, all of our flowcharts would look like something like this.</p>
+<p>The <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> decorator connects up your data files in 1 to 1 operations, ensuring that for every <strong>Input</strong>, a corresponding <strong>Output</strong> is
+generated, ready to got into the next pipeline stage. If we start with three sets of starting data, we would end up with three final sets of results.</p>
+</div></blockquote>
+</div>
+<div class="section" id="a-bestiary-of-ruffus-decorators">
+<h2>A bestiary of <em>Ruffus</em> decorators<a class="headerlink" href="#a-bestiary-of-ruffus-decorators" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Very often, we would like to transform our data in more complex ways, this is where other <em>Ruffus</em> decorators come in.</p>
+<a class="reference internal image-reference" href="../../_images/bestiary_decorators.png"><img alt="../../_images/bestiary_decorators.png" src="../../_images/bestiary_decorators.png" style="width: 1359.5px; height: 294.5px;" /></a>
+</div></blockquote>
+</div>
+<div class="section" id="originate">
+<h2><a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate</em></a><a class="headerlink" href="#originate" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Introduced in <strong>Chapter 3</strong> <a class="reference internal" href="transform_in_parallel.html#new-manual-transform-in-parallel"><em>More on @transform-ing data and @originate</em></a>,
+<a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate</em></a> generates <strong>Output</strong> files from scratch without the benefits of any <strong>Input</strong> files.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="merge">
+<h2><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a><a class="headerlink" href="#merge" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>A <strong>many to one</strong> operator.</li>
+<li>The last decorator at the far right to the figure, <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> merges multiple <strong>Input</strong> into one <strong>Output</strong>.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="split">
+<h2><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a><a class="headerlink" href="#split" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>A <strong>one to many</strong> operator,</li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is the evil twin of <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a>. It takes a single set of <strong>Input</strong> and splits them into multiple smaller pieces.</li>
+<li>The best part of <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is that we don’t necessarily have to decide ahead of time <em>how many</em> smaller pieces it should produce. If we have encounter a larger file,
+we might need to split it up into more fragments for greater parallelism.</li>
+<li>Since <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is a <strong>one to many</strong> operator, if you pass it <strong>many</strong> inputs (e.g. via <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a>, it performs an implicit <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> step to make one
+set of <strong>Input</strong> that you can redistribute into a different number of pieces. If you are looking to split <em>each</em> <strong>Input</strong> into further smaller fragments, then you
+need <a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="subdivide">
+<h2><a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a><a class="headerlink" href="#subdivide" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>A <strong>many to even more</strong> operator.</li>
+<li>It takes each of multiple <strong>Input</strong>, and further subdivides them.</li>
+<li>Uses <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a>, <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> or <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> to generate <strong>Output</strong> names from its <strong>Input</strong> files but like <a class="reference internal" href= [...]
+<em>how many</em> smaller pieces each <strong>Input</strong> should be further divided into. For example, a large <strong>Input</strong> files might be subdivided into 7 pieces while the next job might,
+however, split its <strong>Input</strong> into just 4 pieces.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="collate">
+<h2><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a><a class="headerlink" href="#collate" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>A <strong>many to fewer</strong> operator.</li>
+<li><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a> is the opposite twin of <tt class="docutils literal"><span class="pre">subdivide</span></tt>: it takes multiple <strong>Output</strong> and groups or collates them into bundles of <strong>Output</strong>.</li>
+<li><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a> uses <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> or <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> to generate <strong>Output</strong> names.</li>
+<li>All <strong>Input</strong> files which map to the same <strong>Output</strong> are grouped together into one job (one task function call) which
+produces one <strong>Output</strong>.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="combinatorics">
+<h2>Combinatorics<a class="headerlink" href="#combinatorics" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>More rarely, we need to generate a set of <strong>Output</strong> based on a combination or permutation or product of the <strong>Input</strong>.</p>
+<p>For example, in bioinformatics, we might need to look for all instances of a set of genes in the genomes of a different number of species.
+In other words, we need to find the <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a> of XXX genes x YYY species.</p>
+<p><em>Ruffus</em> provides decorators modelled on the “Combinatoric generators” in the Standard Python <a class="reference external" href="http://docs.python.org/2/library/itertools.html">itertools</a> library.</p>
+<p>To use combinatoric decorators, you need to explicitly include them from <em>Ruffus</em>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">ruffus</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.combinatorics</span> <span class="kn">import</span> <span class="o">*</span>
+</pre></div>
+</div>
+<a class="reference internal image-reference" href="../../_images/bestiary_combinatorics.png"><img alt="../../_images/bestiary_combinatorics.png" src="../../_images/bestiary_combinatorics.png" style="width: 1360.5px; height: 271.5px;" /></a>
+</div></blockquote>
+</div>
+<div class="section" id="product">
+<h2><a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a><a class="headerlink" href="#product" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Given several sets of <strong>Input</strong>, it generates all versus all <strong>Output</strong>. For example, if there are four sets of <strong>Input</strong> files, <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a> will generate <tt class="docutils literal"><span class="pre">WWW</span> <span class="pre">x</span> <span class="pre">XXX</span> <span class="pre">x</span> <span class="pre">YYY</span> <span class="pre">x</spa [...]
+<li>Uses <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>formatter</em></a> to generate unique <strong>Output</strong> names from components parsed from <em>any</em> parts of <em>any</em> specified files in
+all <strong>Input</strong> sets. In the above example, this allows the generation of <tt class="docutils literal"><span class="pre">WWW</span> <span class="pre">x</span> <span class="pre">XXX</span> <span class="pre">x</span> <span class="pre">YYY</span> <span class="pre">x</span> <span class="pre">ZZZ</span></tt> unique names.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="combinations">
+<h2><a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a><a class="headerlink" href="#combinations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Given one set of <strong>Input</strong>, it generates the combinations of r-length tuples among them.</li>
+<li>Uses <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>formatter</em></a> to generate unique <strong>Output</strong> names from components parsed from <em>any</em> parts of <em>any</em> specified files in all <strong>Input</strong> sets.</li>
+<li>For example, given <strong>Input</strong> called <tt class="docutils literal"><span class="pre">A</span></tt>, <tt class="docutils literal"><span class="pre">B</span></tt> and <tt class="docutils literal"><span class="pre">C</span></tt>, it will generate: <tt class="docutils literal"><span class="pre">A-B</span></tt>, <tt class="docutils literal"><span class="pre">A-C</span></tt>, <tt class="docutils literal"><span class="pre">B-C</span></tt></li>
+<li>The order of <strong>Input</strong> items is ignored so either <tt class="docutils literal"><span class="pre">A-B</span></tt> or <tt class="docutils literal"><span class="pre">B-A</span></tt> will be included, not both</li>
+<li>Self-vs-self combinations (<tt class="docutils literal"><span class="pre">A-A</span></tt>) are excluded.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="combinations-with-replacement">
+<h2><a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a><a class="headerlink" href="#combinations-with-replacement" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Given one set of <strong>Input</strong>, it generates the combinations of r-length tuples among them but includes self-vs-self conbinations.</li>
+<li>Uses <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>formatter</em></a> to generate unique <strong>Output</strong> names from components parsed from <em>any</em> parts of <em>any</em> specified files in all <strong>Input</strong> sets.</li>
+<li>For example, given <strong>Input</strong> called <tt class="docutils literal"><span class="pre">A</span></tt>, <tt class="docutils literal"><span class="pre">B</span></tt> and <tt class="docutils literal"><span class="pre">C</span></tt>, it will generate: <tt class="docutils literal"><span class="pre">A-A</span></tt>, <tt class="docutils literal"><span class="pre">A-B</span></tt>, <tt class="docutils literal"><span class="pre">A-C</span></tt>, <tt class="docutils literal"><span clas [...]
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="permutations">
+<h2><a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a><a class="headerlink" href="#permutations" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>Given one set of <strong>Input</strong>, it generates the permutations of r-length tuples among them. This excludes self-vs-self combinations but includes all orderings (<tt class="docutils literal"><span class="pre">A-B</span></tt> and <tt class="docutils literal"><span class="pre">B-A</span></tt>).</li>
+<li>Uses <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>formatter</em></a> to generate unique <strong>Output</strong> names from components parsed from <em>any</em> parts of <em>any</em> specified files in all <strong>Input</strong> sets.</li>
+<li>For example, given <strong>Input</strong> called <tt class="docutils literal"><span class="pre">A</span></tt>, <tt class="docutils literal"><span class="pre">B</span></tt> and <tt class="docutils literal"><span class="pre">C</span></tt>, it will generate: <tt class="docutils literal"><span class="pre">A-A</span></tt>, <tt class="docutils literal"><span class="pre">A-B</span></tt>, <tt class="docutils literal"><span class="pre">A-C</span></tt>, <tt class="docutils literal"><span clas [...]
+</ul>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 11</strong>: Pipeline topologies and a compendium of <em>Ruffus</em> decorators</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#transform"><tt class="docutils literal"><span class="pre">@transform</span></tt></a></li>
+<li><a class="reference internal" href="#a-bestiary-of-ruffus-decorators">A bestiary of <em>Ruffus</em> decorators</a></li>
+<li><a class="reference internal" href="#originate"><tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+<li><a class="reference internal" href="#merge"><tt class="docutils literal"><span class="pre">@merge</span></tt></a></li>
+<li><a class="reference internal" href="#split"><tt class="docutils literal"><span class="pre">@split</span></tt></a></li>
+<li><a class="reference internal" href="#subdivide"><tt class="docutils literal"><span class="pre">@subdivide</span></tt></a></li>
+<li><a class="reference internal" href="#collate"><tt class="docutils literal"><span class="pre">@collate</span></tt></a></li>
+<li><a class="reference internal" href="#combinatorics">Combinatorics</a></li>
+<li><a class="reference internal" href="#product"><tt class="docutils literal"><span class="pre">@product</span></tt></a></li>
+<li><a class="reference internal" href="#combinations"><tt class="docutils literal"><span class="pre">@combinations</span></tt></a></li>
+<li><a class="reference internal" href="#combinations-with-replacement"><tt class="docutils literal"><span class="pre">@combinations_with_replacement</span></tt></a></li>
+<li><a class="reference internal" href="#permutations"><tt class="docutils literal"><span class="pre">@permutations</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="checkpointing.html"
+ title="previous chapter"><strong>Chapter 10</strong>: Checkpointing: Interrupted Pipelines and Exceptions</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="split.html"
+ title="next chapter"><strong>Chapter 12</strong>: Splitting up large tasks / files with <strong>@split</strong></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/decorators_compendium.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 12: Splitting up large tasks / files with @split"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="checkpointing.html" title="Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/dependencies.html b/doc/_build/html/tutorials/new_tutorial/dependencies.html
new file mode 100644
index 0000000..6bdbdca
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/dependencies.html
@@ -0,0 +1,289 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 2: How dependency is checked — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 3: Exceptions thrown inside pipelines" href="exceptions.html" />
+ <link rel="prev" title="Appendix 1: Flow Chart Colours with pipeline_printout_graph(...)" href="flowchart_colours.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="exceptions.html" title="Appendix 3: Exceptions thrown inside pipelines"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="flowchart_colours.html" title="Appendix 1: Flow Chart Colours with pipeline_printout_graph(...)"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-dependencies-chapter-num-how-dependency-is-checked">
+<span id="new-manual-dependencies"></span><span id="index-0"></span><h1><strong>Appendix 2</strong>: How dependency is checked<a class="headerlink" href="#new-manual-dependencies-chapter-num-how-dependency-is-checked" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>How does <em>Ruffus</em> decide how to run your pipeline?</p>
+<blockquote>
+<div><ul class="simple">
+<li>In which order should pipelined functions be called?</li>
+<li>Which parts of the pipeline are up-to-date and do not need to be rerun?</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="running-all-out-of-date-tasks-and-dependents">
+<h3>Running all out-of-date tasks and dependents<a class="headerlink" href="#running-all-out-of-date-tasks-and-dependents" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/manual_dependencies_flowchart_intro.png"><img alt="../../_images/manual_dependencies_flowchart_intro.png" src="../../_images/manual_dependencies_flowchart_intro.png" style="width: 275.0px; height: 222.0px;" /></a>
+<p>By default, <em>Ruffus</em> will</p>
+<blockquote>
+<div><ul class="simple">
+<li>build a flow chart (dependency tree) of pipelined tasks (functions)</li>
+<li>start from the most ancestral tasks with the fewest dependencies (<tt class="docutils literal"><span class="pre">task1</span></tt> and <tt class="docutils literal"><span class="pre">task4</span></tt> in the flowchart above).</li>
+<li>walk up the tree to find the first incomplete / out-of-date tasks (i.e. <tt class="docutils literal"><span class="pre">task3</span></tt> and <tt class="docutils literal"><span class="pre">task5</span></tt>.</li>
+<li>start running from there</li>
+</ul>
+</div></blockquote>
+<dl class="docutils">
+<dt>All down-stream (dependent) tasks will be re-run anyway, so we don’t have to test</dt>
+<dd>whether they are up-to-date or not.</dd>
+</dl>
+<div class="admonition note" id="new-manual-dependencies-checking-multiple-times">
+<p class="first admonition-title">Note</p>
+<p>This means that <em>Ruffus</em> <em>may</em> ask any task if their jobs are out of date more than once:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>once when deciding which parts of the pipeline have to be run</li>
+<li>once just before executing the task.</li>
+</ul>
+</div></blockquote>
+</div>
+<p><em>Ruffus</em> tries to be clever / efficient, and does the minimal amount of querying.</p>
+</div></blockquote>
+</div>
+<div class="section" id="forced-reruns">
+<span id="new-manual-dependencies-forced-reruns"></span><h3>Forced Reruns<a class="headerlink" href="#forced-reruns" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Even if a pipeline stage appears to be up to date,
+you can always force the pipeline to include from one or more task functions.</p>
+<p>This is particularly useful, for example, if the pipeline data hasn’t changed but
+the analysis or computional code has.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="n">forcedtorun_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">up_to_date_task1</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>will run all tasks from <tt class="docutils literal"><span class="pre">up_to_date_task1</span></tt> to <tt class="docutils literal"><span class="pre">final_task</span></tt></p>
+</div></blockquote>
+<p>Both the “target” and the “forced” lists can include as many tasks as you wish. All dependencies
+are still carried out and out-of-date jobs rerun.</p>
+</div></blockquote>
+</div>
+<div class="section" id="esoteric-option-minimal-reruns">
+<span id="new-manual-dependencies-minimal-reruns"></span><h3>Esoteric option: Minimal Reruns<a class="headerlink" href="#esoteric-option-minimal-reruns" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>In the above example, if we were to delete the results of <tt class="docutils literal"><span class="pre">up_to_date_task1</span></tt>, <em>Ruffus</em>
+would rerun <tt class="docutils literal"><span class="pre">up_to_date_task1</span></tt>, <tt class="docutils literal"><span class="pre">up_to_date_task2</span></tt> and <tt class="docutils literal"><span class="pre">task3</span></tt>.</p>
+<p>However, you might argue that so long as <tt class="docutils literal"><span class="pre">up_to_date_task2</span></tt> is up-to-date, and it
+is the only necessary prerequisite for <tt class="docutils literal"><span class="pre">task3</span></tt>, we should not be concerned about
+<tt class="docutils literal"><span class="pre">up_to_date_task1</span></tt>.</p>
+<p>This is enabled with:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">task6</span><span class="p">],</span> <span class="n">gnu_make_maximal_rebuild_mode</span> <span class="o">=</span> <span class="bp">False</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This option walks down the dependency tree and proceeds no further when it encounters
+an up-to-date task (<tt class="docutils literal"><span class="pre">up_to_date_task2</span></tt>) whatever the state of what lies beyond it.</p>
+<p>This rather dangerous option is useful if you don’t want to keep all the intermediate
+files/results from upstream tasks. The pipeline will only not involve any incomplete
+tasks which precede an up-to-date result.</p>
+<p>This is seldom what you intend, and you should always check that the appropriate stages
+of the pipeline are executed in the flowchart output.</p>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 2</strong>: How dependency is checked</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a><ul>
+<li><a class="reference internal" href="#running-all-out-of-date-tasks-and-dependents">Running all out-of-date tasks and dependents</a></li>
+<li><a class="reference internal" href="#forced-reruns">Forced Reruns</a></li>
+<li><a class="reference internal" href="#esoteric-option-minimal-reruns">Esoteric option: Minimal Reruns</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="flowchart_colours.html"
+ title="previous chapter"><strong>Appendix 1</strong>: Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="exceptions.html"
+ title="next chapter"><strong>Appendix 3</strong>: Exceptions thrown inside pipelines</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/dependencies.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="exceptions.html" title="Appendix 3: Exceptions thrown inside pipelines"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="flowchart_colours.html" title="Appendix 1: Flow Chart Colours with pipeline_printout_graph(...)"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/deprecated_files.html b/doc/_build/html/tutorials/new_tutorial/deprecated_files.html
new file mode 100644
index 0000000..b48e0ba
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/deprecated_files.html
@@ -0,0 +1,411 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 5: @files: Deprecated syntax — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 6: @files_re: Deprecated syntax using regular expressions" href="deprecated_files_re.html" />
+ <link rel="prev" title="Appendix 4: Names exported from Ruffus" href="list_of_ruffus_names.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="deprecated_files_re.html" title="Appendix 6: @files_re: Deprecated syntax using regular expressions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="list_of_ruffus_names.html" title="Appendix 4: Names exported from Ruffus"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-deprecated-files-chapter-num-files-deprecated-syntax">
+<span id="new-manual-deprecated-files"></span><span id="index-0"></span><h1><strong>Appendix 5</strong>: <strong>@files</strong>: Deprecated syntax<a class="headerlink" href="#new-manual-deprecated-files-chapter-num-files-deprecated-syntax" title="Permalink to this headline">¶</a></h1>
+<div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<ul class="last">
+<li><p class="first"><strong>This is deprecated syntax</strong></p>
+<p><strong>which is no longer supported and</strong></p>
+<p><strong>should NOT be used in new code.</strong></p>
+</li>
+</ul>
+</div>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/decorators.html#decorators"><em>decorators</em></a></li>
+<li><a class="reference internal" href="../../decorators/files.html#decorators-files"><em>@files</em></a> syntax in detail</li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="line-block">
+<div class="line">The python functions which do the actual work of each stage or
+<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> of a <em>Ruffus</em> pipeline are written by you.</div>
+<div class="line">The role of <em>Ruffus</em> is to make sure these functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if desired.</div>
+</div>
+<p>The easiest way to specify parameters to <em>Ruffus</em> <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions is to use
+the <a class="reference internal" href="../../decorators/files.html#decorators-files"><em>@files</em></a> decorator.</p>
+</div></blockquote>
+</div>
+<div class="section" id="files">
+<span id="index-1"></span><h2><strong>@files</strong><a class="headerlink" href="#files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Running this code:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="p">[</span><span class="s">'a.2'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">],</span> <span class="s">'A file'</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">single_job_io_task</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfiles</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="n">outfiles</span><span class="p">:</span> <span class="nb">open</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># prepare input file</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<dl class="docutils">
+<dt>Is equivalent to calling:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">single_job_io_task</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="p">[</span><span class="s">'a.2'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">],</span> <span class="s">'A file'</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+<dt>And produces:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.1 -> [a.2, b.2], A file] completed</span>
+<span class="go">Completed Task = single_job_io_task</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</div></blockquote>
+<p><em>Ruffus</em> will automatically check if your task is up to date. The second time <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a>
+is called, nothing will happen. But if you update <tt class="docutils literal"><span class="pre">a.1</span></tt>, the task will rerun:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="nb">open</span><span class="p">(</span><span class="s">'a.1'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.1 -> [a.2, b.2], A file] completed</span>
+<span class="go">Completed Task = single_job_io_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>See <a class="reference internal" href="checkpointing.html#new-manual-skip-up-to-date-rules"><em>chapter 2</em></a> for a more in-depth discussion of how <em>Ruffus</em>
+decides which parts of the pipeline are complete and up-to-date.</p>
+</div></blockquote>
+</div>
+<div class="section" id="running-the-same-code-on-different-parameters-in-parallel">
+<span id="new-manual-files-parallel"></span><span id="index-2"></span><h2>Running the same code on different parameters in parallel<a class="headerlink" href="#running-the-same-code-on-different-parameters-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Your pipeline may require the same function to be called multiple times on independent parameters.
+In which case, you can supply all the parameters to @files, each will be sent to separate jobs that
+may run in parallel if necessary. <em>Ruffus</em> will check if each separate <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a> is up-to-date using
+the <em>inputs</em> and <em>outputs</em> (first two) parameters (See the <a class="reference internal" href="transform_in_parallel.html#new-manual-only-rerun-out-of-date"><em>Up-to-date jobs are not re-run unnecessarily</em></a> ).</p>
+<p>For example, if a sequence
+(e.g. a list or tuple) of 5 parameters are passed to <strong>@files</strong>, that indicates
+there will also be 5 separate jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'job1.file'</span> <span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'job2.file'</span><span class="p">,</span> <span class="mi">4</span> <span class="p">],</span> <span class="c"># 2st job</span>
+ <span class="p">[</span> <span class="s">'job3.file'</span><span class="p">,</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="p">],</span> <span class="c"># 3st job</span>
+ <span class="p">[</span> <span class="mi">67</span><span class="p">,</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="s">'job4.file'</span><span class="p">]</span> <span class="p">],</span> <span class="c"># 4st job</span>
+ <span class="p">[</span> <span class="s">'job5.file'</span> <span class="p">],</span> <span class="c"># 5st job</span>
+ <span class="p">]</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">task_file</span><span class="p">(</span><span class="o">*</span><span class="n">params</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="line-block">
+<div class="line"><em>Ruffus</em> creates as many jobs as there are elements in <tt class="docutils literal"><span class="pre">parameters</span></tt>.</div>
+<div class="line">In turn, each of these elements consist of series of parameters which will be
+passed to each separate job.</div>
+</div>
+<p>Thus the above code is equivalent to calling:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">task_file</span><span class="p">(</span><span class="s">'job1.file'</span><span class="p">)</span>
+<span class="n">task_file</span><span class="p">(</span><span class="s">'job2.file'</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
+<span class="n">task_file</span><span class="p">(</span><span class="s">'job3.file'</span><span class="p">,</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
+<span class="n">task_file</span><span class="p">(</span><span class="mi">67</span><span class="p">,</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="s">'job4.file'</span><span class="p">])</span>
+<span class="n">task_file</span><span class="p">(</span><span class="s">'job5.file'</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>What <tt class="docutils literal"><span class="pre">task_file()</span></tt> does with these parameters is up to you!</p>
+<p>The only constraint on the parameters is that <em>Ruffus</em> will treat any first
+parameter of each job as the <em>inputs</em> and any second as the <em>output</em>. Any
+strings in the <em>inputs</em> or <em>output</em> parameters (including those nested in sequences)
+will be treated as file names.</p>
+<p>Thus, to pick the parameters out of one of the above jobs:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">task_file</span><span class="p">(</span><span class="mi">67</span><span class="p">,</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="s">'job4.file'</span><span class="p">])</span>
+</pre></div>
+</div>
+<div class="line-block">
+<div class="line"><em>inputs</em> == <tt class="docutils literal"><span class="pre">67</span></tt></div>
+<div class="line"><em>outputs</em> == <tt class="docutils literal"><span class="pre">[13,</span> <span class="pre">'job4.file']</span></tt></div>
+<div class="line"><br /></div>
+<div class="line-block">
+<div class="line">The solitary output filename is <tt class="docutils literal"><span class="pre">job4.file</span></tt></div>
+</div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="checking-if-jobs-are-up-to-date">
+<span id="new-manual-files-example"></span><span id="new-manual-files-is-uptodate"></span><span id="index-3"></span><h3>Checking if jobs are up to date<a class="headerlink" href="#checking-if-jobs-are-up-to-date" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="line-block">
+<div class="line">Usually we do not want to run all the stages in a pipeline but only where
+the input data has changed or is no longer up to date.</div>
+<div class="line">One easy way to do this is to check the modification times for files produced
+at each stage of the pipeline.</div>
+</div>
+<div class="line-block">
+<div class="line">Let us first create our starting files <tt class="docutils literal"><span class="pre">a.1</span></tt> and <tt class="docutils literal"><span class="pre">b.1</span></tt></div>
+<div class="line">We can then run the following pipeline function to create</div>
+</div>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">a.2</span></tt> from <tt class="docutils literal"><span class="pre">a.1</span></tt> and</li>
+<li><tt class="docutils literal"><span class="pre">b.2</span></tt> from <tt class="docutils literal"><span class="pre">b.1</span></tt></li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># create starting files</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"b.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'a.1'</span><span class="p">,</span> <span class="s">'a.2'</span><span class="p">,</span> <span class="s">'A file'</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'b.1'</span><span class="p">,</span> <span class="s">'b.2'</span><span class="p">,</span> <span class="s">'B file'</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_io_task</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
+ <span class="c"># copy infile contents to outfile</span>
+ <span class="n">infile_text</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">infile</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">infile_text</span> <span class="o">+</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span> <span class="o">+</span> <span class="n">text</spa [...]
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>This produces the following output:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.1 -> a.2, A file] completed</span>
+<span class="go"> Job = [b.1 -> b.2, B file] completed</span>
+<span class="go">Completed Task = parallel_io_task</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<div class="line-block">
+<div class="line">If you called <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> again, nothing would happen because the files are up to date:</div>
+<div class="line"><tt class="docutils literal"><span class="pre">a.2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">a.1</span></tt> and</div>
+<div class="line"><tt class="docutils literal"><span class="pre">b.2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">b.1</span></tt></div>
+</div>
+<dl class="docutils">
+<dt>However, if you subsequently modified <tt class="docutils literal"><span class="pre">a.1</span></tt> again:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">open</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>you would see the following:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_io_task</span><span class="p">])</span>
+<span class="go">Task = parallel_io_task</span>
+<span class="go"> Job = ["a.1" -> "a.2", "A file"] completed</span>
+<span class="go"> Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date</span>
+<span class="go">Completed Task = parallel_io_task</span>
+</pre></div>
+</div>
+<p>The 2nd job is up to date and will be skipped.</p>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 5</strong>: <strong>@files</strong>: Deprecated syntax</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#files"><strong>@files</strong></a></li>
+<li><a class="reference internal" href="#running-the-same-code-on-different-parameters-in-parallel">Running the same code on different parameters in parallel</a><ul>
+<li><a class="reference internal" href="#checking-if-jobs-are-up-to-date">Checking if jobs are up to date</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="list_of_ruffus_names.html"
+ title="previous chapter"><strong>Appendix 4</strong>: Names exported from Ruffus</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="deprecated_files_re.html"
+ title="next chapter"><strong>Appendix 6</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/deprecated_files.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="deprecated_files_re.html" title="Appendix 6: @files_re: Deprecated syntax using regular expressions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="list_of_ruffus_names.html" title="Appendix 4: Names exported from Ruffus"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/deprecated_files_re.html b/doc/_build/html/tutorials/new_tutorial/deprecated_files_re.html
new file mode 100644
index 0000000..531a83e
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/deprecated_files_re.html
@@ -0,0 +1,319 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 6: @files_re: Deprecated syntax using regular expressions — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 1: Python Code for An introduction to basic Ruffus syntax" href="introduction_code.html" />
+ <link rel="prev" title="Appendix 5: @files: Deprecated syntax" href="deprecated_files.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="introduction_code.html" title="Chapter 1: Python Code for An introduction to basic Ruffus syntax"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="deprecated_files.html" title="Appendix 5: @files: Deprecated syntax"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-deprecated-files-re-chapter-num-files-re-deprecated-syntax-using-regular-expressions">
+<span id="new-manual-deprecated-files-re"></span><span id="index-0"></span><h1><strong>Appendix 6</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite><a class="headerlink" href="#new-manual-deprecated-files-re-chapter-num-files-re-deprecated-syntax-using-regular-expressions" title="Permalink to this headline">¶</a></h1>
+<div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<ul class="last">
+<li><p class="first"><strong>This is deprecated syntax</strong></p>
+<p><strong>which is no longer supported and</strong></p>
+<p><strong>should NOT be used in new code.</strong></p>
+</li>
+</ul>
+</div>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/decorators.html#decorators"><em>decorators</em></a></li>
+<li><a class="reference internal" href="../../decorators/files_re.html#decorators-files-re"><em>@files_re</em></a> syntax in detail</li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>@files_re</strong> combines the functionality of @transform, @collate and @merge in
+one overloaded decorator.</p>
+<p>This is the reason why its use is discouraged. <strong>@files_re</strong> syntax is far too overloaded
+and context-dependent to support its myriad of different functions.</p>
+<p>The following documentation is provided to help maintain historical <em>Ruffus</em> usage.</p>
+</div></blockquote>
+<div class="section" id="transforming-input-and-output-filenames">
+<h3>Transforming input and output filenames<a class="headerlink" href="#transforming-input-and-output-filenames" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>For example, the following code takes files from
+the previous pipeline task, and makes new output parameters with the <tt class="docutils literal"><span class="pre">.sums</span></tt> suffix
+in place of the <tt class="docutils literal"><span class="pre">.chunks</span></tt> suffix:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">step_4_split_numbers_into_chunks</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_5_calculate_sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># calculate sums and sums of squares for all values in the input_file_name</span>
+ <span class="c"># writing to output_file_name</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This can be written using @files_re equivalently:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>@files_re(step_4_split_numbers_into_chunks, r".chunks", r".sums")
+def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+""</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+<span class="target" id="new-manual-files-re-combine"></span></div>
+<div class="section" id="collating-many-inputs-into-a-single-output">
+<span id="index-1"></span><h3>Collating many <em>inputs</em> into a single <em>output</em><a class="headerlink" href="#collating-many-inputs-into-a-single-output" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Similarly, the following code collects <strong>inputs</strong>
+from the same species in the same directory:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@collate</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span> <span class="c"># inputs = all *.animal files</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r'mammals.([^.]+)'</span><span class="p">),</span> <span class="c"># regular expression</span>
+ <span class="s">r'\1/animals.in_my_zoo'</span><span class="p">,</span> <span class="c"># single output file per species</span>
+ <span class="s">r'\1'</span> <span class="p">)</span> <span class="c"># species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="c"># summarise all animals of this species</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This can be written using @files_re equivalently using the <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-combine"><em>combine</em></a> indicator:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@files_re</span><span class="p">(</span><span class="s">'*.animals'</span><span class="p">,</span> <span class="c"># inputs = all *.animal files</span>
+ <span class="s">r'mammals.([^.]+)'</span><span class="p">,</span> <span class="c"># regular expression</span>
+ <span class="n">combine</span><span class="p">(</span><span class="s">r'\1/animals.in_my_zoo'</span><span class="p">),</span> <span class="c"># single output file per species</span>
+ <span class="s">r'\1'</span> <span class="p">)</span> <span class="c"># species name</span>
+<span class="k">def</span> <span class="nf">capture_mammals</span><span class="p">(</span><span class="n">infiles</span><span class="p">,</span> <span class="n">outfile</span><span class="p">,</span> <span class="n">species</span><span class="p">):</span>
+ <span class="c"># summarise all animals of this species</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="generating-input-and-output-parameter-using-regular-expresssions">
+<h3>Generating <em>input</em> and <em>output</em> parameter using regular expresssions<a class="headerlink" href="#generating-input-and-output-parameter-using-regular-expresssions" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The following code generates additional
+<em>input</em> prerequisite file names which match the original <em>input</em> files.</p>
+<p>We want each job of our <tt class="docutils literal"><span class="pre">analyse()</span></tt> function to get corresponding pairs
+of <tt class="docutils literal"><span class="pre">xx.chunks</span></tt> and <tt class="docutils literal"><span class="pre">xx.red_indian</span></tt> files when</p>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">*.chunks</span></tt> are generated by the task function <tt class="docutils literal"><span class="pre">split_up_problem()</span></tt> and
+<tt class="docutils literal"><span class="pre">*.red_indian</span></tt> are generated by the task function <tt class="docutils literal"><span class="pre">make_red_indians()</span></tt>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">make_red_indians</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">split_up_problem</span><span class="p">,</span> <span class="c"># starting set of *inputs*</span>
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*).chunks"</span><span class="p">),</span> <span class="c"># regular expression</span>
+ <span class="n">inputs</span><span class="p">([</span><span class="s">r"\g<0>"</span><span class="p">,</span> <span class="c"># xx.chunks</span>
+ <span class="s">r"\1.red_indian"</span><span class="p">]),</span> <span class="c"># important.file</span>
+ <span class="s">r"\1.results"</span> <span class="c"># xx.results</span>
+ <span class="p">)</span>
+<span class="k">def</span> <span class="nf">analyse</span><span class="p">(</span><span class="n">input_filenames</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">"Do analysis here"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The equivalent code using @files_re looks very similar:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>@follows(make_red_indians)
+ at files_re( split_up_problem, # starting set of *inputs*
+ r"(.*).chunks", # regular expression
+ [r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results") # xx.results
+def analyse(input_filenames, output_file_name):
+ "Do analysis here"</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 6</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a><ul>
+<li><a class="reference internal" href="#transforming-input-and-output-filenames">Transforming input and output filenames</a></li>
+<li><a class="reference internal" href="#collating-many-inputs-into-a-single-output">Collating many <em>inputs</em> into a single <em>output</em></a></li>
+<li><a class="reference internal" href="#generating-input-and-output-parameter-using-regular-expresssions">Generating <em>input</em> and <em>output</em> parameter using regular expresssions</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="deprecated_files.html"
+ title="previous chapter"><strong>Appendix 5</strong>: <strong>@files</strong>: Deprecated syntax</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="introduction_code.html"
+ title="next chapter"><strong>Chapter 1</strong>: Python Code for An introduction to basic Ruffus syntax</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/deprecated_files_re.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="introduction_code.html" title="Chapter 1: Python Code for An introduction to basic Ruffus syntax"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="deprecated_files.html" title="Appendix 5: @files: Deprecated syntax"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/exceptions.html b/doc/_build/html/tutorials/new_tutorial/exceptions.html
new file mode 100644
index 0000000..1f10281
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/exceptions.html
@@ -0,0 +1,359 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 3: Exceptions thrown inside pipelines — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 4: Names exported from Ruffus" href="list_of_ruffus_names.html" />
+ <link rel="prev" title="Appendix 2: How dependency is checked" href="dependencies.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="list_of_ruffus_names.html" title="Appendix 4: Names exported from Ruffus"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="dependencies.html" title="Appendix 2: How dependency is checked"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-exceptions-chapter-num-exceptions-thrown-inside-pipelines">
+<span id="new-manual-exceptions"></span><span id="index-0"></span><h1><strong>Appendix 3</strong>: Exceptions thrown inside pipelines<a class="headerlink" href="#new-manual-exceptions-chapter-num-exceptions-thrown-inside-pipelines" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The goal for <em>Ruffus</em> is that exceptions should just work <em>out-of-the-box</em> without any fuss.
+This is especially important for exceptions that come from your code which may be raised
+in a different process. Often multiple parallel operations (jobs or tasks) fail at the
+same time. <em>Ruffus</em> will forward each of these exceptions with the tracebacks so you
+can jump straight to the offending line.</p>
+<p>This example shows separate exceptions from two jobs running in parallel:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"a.start"</span><span class="p">,</span> <span class="s">"b.start"</span><span class="p">,</span> <span class="s">"c.start"</span><span class="p">,</span> <span class="s">"d.start"</span><span class="p">,</span> <span class="s">"e.start"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">throw_exceptions_here</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"OOPS"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="go"> >>> pipeline_run(multiprocess = 2)</span>
+
+<span class="go"> ruffus.ruffus_exceptions.RethrownJobError:</span>
+
+<span class="hll"><span class="go"> Original exceptions:</span>
+</span>
+<span class="go"> Exception #1</span>
+<span class="go"> 'exceptions.Exception(OOPS)' raised in ...</span>
+<span class="go"> Task = def throw_exceptions_here(...):</span>
+<span class="go"> Job = [None -> b.start]</span>
+
+<span class="go"> Traceback (most recent call last):</span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 685, in run_pooled_job_without_exceptions</span>
+<span class="go"> return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)</span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 549, in job_wrapper_output_files</span>
+<span class="go"> job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)</span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 504, in job_wrapper_io_files</span>
+<span class="go"> ret_val = user_defined_work_func(*(param[1:]))</span>
+<span class="go"> File "<stdin>", line 3, in throw_exceptions_here</span>
+<span class="go"> Exception: OOPS</span>
+<span class="hll">
+</span>
+<span class="go"> Exception #2</span>
+<span class="go"> 'exceptions.Exception(OOPS)' raised in ...</span>
+<span class="go"> Task = def throw_exceptions_here(...):</span>
+<span class="go"> Job = [None -> a.start]</span>
+
+<span class="go"> Traceback (most recent call last):</span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 685, in run_pooled_job_without_exceptions</span>
+<span class="go"> return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)</span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 549, in job_wrapper_output_files</span>
+<span class="go"> job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)</span>
+<span class="go"> File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 504, in job_wrapper_io_files</span>
+<span class="go"> ret_val = user_defined_work_func(*(param[1:]))</span>
+<span class="go"> File "<stdin>", line 3, in throw_exceptions_here</span>
+<span class="go"> Exception: OOPS</span>
+
+
+<span class="go">.. image:: ../../images/manual_exceptions.png</span>
+</pre></div>
+</div>
+</div></blockquote>
+<span class="target" id="new-manual-exceptions-multiple-errors"></span></div>
+<div class="section" id="pipelines-running-in-parallel-accumulate-exceptions">
+<span id="index-1"></span><h2>Pipelines running in parallel accumulate Exceptions<a class="headerlink" href="#pipelines-running-in-parallel-accumulate-exceptions" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>As show above, by default <em>Ruffus</em> accumulates <tt class="docutils literal"><span class="pre">NN</span></tt> exceptions before interrupting the pipeline prematurely where
+<tt class="docutils literal"><span class="pre">NN</span></tt> is the specified parallelism for <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(multiprocess = NN)</em></a></p>
+<p>This seems a fair tradeoff between being able to gather detailed error information for
+running jobs, and not wasting too much time for a task that is going to fail anyway.</p>
+</div></blockquote>
+</div>
+<div class="section" id="terminate-pipeline-immediately-upon-exceptions">
+<h2>Terminate pipeline immediately upon Exceptions<a class="headerlink" href="#terminate-pipeline-immediately-upon-exceptions" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="set-pipeline-run-exceptions-terminate-immediately-true">
+<h3>Set <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(exceptions_terminate_immediately = True)</em></a><a class="headerlink" href="#set-pipeline-run-exceptions-terminate-immediately-true" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>To have all exceptions interrupt the pipeline immediately, invoke:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="n">exceptions_terminate_immediately</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>For example, with this change, only a single exception will be thrown before the pipeline is interrupted:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"a.start"</span><span class="p">,</span> <span class="s">"b.start"</span><span class="p">,</span> <span class="s">"c.start"</span><span class="p">,</span> <span class="s">"d.start"</span><span class="p">,</span> <span class="s">"e.start"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">throw_exceptions_here</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"OOPS"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">exceptions_terminate_immediately</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="go"> >>> pipeline_run(multiprocess = 2)</span>
+
+<span class="go"> ruffus.ruffus_exceptions.RethrownJobError:</span>
+
+<span class="hll"><span class="go"> Original exception:</span>
+</span>
+<span class="go"> Exception #1</span>
+<span class="go"> 'exceptions.Exception(OOPS)' raised in ...</span>
+<span class="go"> Task = def throw_exceptions_here(...):</span>
+<span class="go"> Job = [None -> a.start]</span>
+
+<span class="go"> Traceback (most recent call last):</span>
+<span class="go"> [Tedious traceback snipped out!!!....]</span>
+<span class="go"> Exception: OOPS</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="raise-ruffus-jobsignalledbreak">
+<h3>raise <tt class="docutils literal"><span class="pre">Ruffus.JobSignalledBreak</span></tt><a class="headerlink" href="#raise-ruffus-jobsignalledbreak" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The same can be accomplished on a finer scale by throwing the <tt class="docutils literal"><span class="pre">Ruffus.JobSignalledBreak</span></tt> Exception. Unlike
+other exceptions, this causes an immediate halt in pipeline execution. If there are other exceptions in play at that
+point, they will be rethrown in the main process but no new exceptions will be added.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"a.start"</span><span class="p">,</span> <span class="s">"b.start"</span><span class="p">,</span> <span class="s">"c.start"</span><span class="p">,</span> <span class="s">"d.start"</span><span class="p">,</span> <span class="s">"e.start"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">throw_exceptions_here</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">JobSignalledBreak</span><span class="p">(</span><span class="s">"OOPS"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="display-exceptions-as-they-occur">
+<h2>Display exceptions as they occur<a class="headerlink" href="#display-exceptions-as-they-occur" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>In the following example, the jobs throw exceptions
+at two second staggered intervals into the job. With <tt class="docutils literal"><span class="pre">log_exceptions</span> <span class="pre">=</span> <span class="pre">True</span></tt>, the
+exceptions are displayed as they occur even though the pipeline continues running.</p>
+<p>logger.error(...) will be invoked with the string representation of the each exception, and associated stack trace.</p>
+<p>The default logger prints to sys.stderr, but as usual can be changed to any class from the logging module or compatible object via
+<a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(logger = XXX)</em></a></p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span><span class="o">,</span> <span class="nn">os</span>
+
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"1.start"</span><span class="p">,</span> <span class="s">"2.start"</span><span class="p">,</span> <span class="s">"3.start"</span><span class="p">,</span> <span class="s">"4.start"</span><span class="p">,</span> <span class="s">"5.start"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">throw_exceptions_here</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="n">delay</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">output_file</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">delay</span> <span class="o">*</span> <span class="mi">2</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">JobSignalledBreak</span><span class="p">(</span><span class="s">"OOPS"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">log_exceptions</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 3</strong>: Exceptions thrown inside pipelines</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#pipelines-running-in-parallel-accumulate-exceptions">Pipelines running in parallel accumulate Exceptions</a></li>
+<li><a class="reference internal" href="#terminate-pipeline-immediately-upon-exceptions">Terminate pipeline immediately upon Exceptions</a><ul>
+<li><a class="reference internal" href="#set-pipeline-run-exceptions-terminate-immediately-true">Set <tt class="docutils literal"><span class="pre">pipeline_run(exceptions_terminate_immediately</span> <span class="pre">=</span> <span class="pre">True)</span></tt></a></li>
+<li><a class="reference internal" href="#raise-ruffus-jobsignalledbreak">raise <tt class="docutils literal"><span class="pre">Ruffus.JobSignalledBreak</span></tt></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#display-exceptions-as-they-occur">Display exceptions as they occur</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="dependencies.html"
+ title="previous chapter"><strong>Appendix 2</strong>: How dependency is checked</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="list_of_ruffus_names.html"
+ title="next chapter"><strong>Appendix 4</strong>: Names exported from Ruffus</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/exceptions.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="list_of_ruffus_names.html" title="Appendix 4: Names exported from Ruffus"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="dependencies.html" title="Appendix 2: How dependency is checked"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/flowchart_colours.html b/doc/_build/html/tutorials/new_tutorial/flowchart_colours.html
new file mode 100644
index 0000000..f130c47
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/flowchart_colours.html
@@ -0,0 +1,241 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 1: Flow Chart Colours with pipeline_printout_graph(...) — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 2: How dependency is checked" href="dependencies.html" />
+ <link rel="prev" title="Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate" href="check_if_uptodate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="dependencies.html" title="Appendix 2: How dependency is checked"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-flowchart-colours-chapter-num-flow-chart-colours-with-pipeline-printout-graph">
+<span id="new-manual-flowchart-colours"></span><span id="index-0"></span><h1><strong>Appendix 1</strong>: Flow Chart Colours with <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a><a class="headerlink" href="#new-manual-flowchart-colours-chapter-num-flow-chart-colours-with-pipeline-printout-graph" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a></li>
+<li><a class="reference download internal" href="../../_downloads/play_with_colours.py"><tt class="xref download docutils literal"><span class="pre">Download</span> <span class="pre">code</span></tt></a></li>
+<li><a class="reference internal" href="flowchart_colours_code.html#new-manual-flowchart-colours-code"><em>Code</em></a> for experimenting with colours</li>
+</ul>
+</div>
+<div class="section" id="flowchart-colours">
+<h2>Flowchart colours<a class="headerlink" href="#flowchart-colours" title="Permalink to this headline">¶</a></h2>
+<p>The appearance of <em>Ruffus</em> flowcharts produced by <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph</em></a>
+can be extensively customised.</p>
+<p>This is mainly controlled by the <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph-user-colour-scheme"><em>user_colour_scheme</em></a> (note UK spelling of “colour”) parameter</p>
+<p>Example:</p>
+<blockquote>
+<div><p>Use colour scheme index = 1</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span><span class="s">"flowchart.svg"</span><span class="p">,</span> <span class="s">"svg"</span><span class="p">,</span> <span class="p">[</span><span class="n">final_task</span><span class="p">],</span>
+ <span class="n">user_colour_scheme</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s">"colour_scheme_index"</span> <span class="p">:</span><span class="mi">1</span><span class="p">,</span>
+ <span class="s">"Pipeline"</span> <span class="p">:{</span><span class="s">"fontcolor"</span> <span class="p">:</span> <span class="s">'"#FF3232"'</span> <span class="p">},</span>
+ <span class="s">"Key"</span> <span class="p">:{</span><span class="s">"fontcolor"</span> <span class="p">:</span> <span class="s">"Red"</span><span class="p">,</span>
+ <span class="s">"fillcolor"</span> <span class="p">:</span> <span class="s">'"#F6F4F4"'</span> <span class="p">},</span>
+ <span class="s">"Task to run"</span> <span class="p">:{</span><span class="s">"linecolor"</span> <span class="p">:</span> <span class="s">'"#0044A0"'</span> <span class="p">},</span>
+ <span class="s">"Final target"</span> <span class="p">:{</span><span class="s">"fillcolor"</span> <span class="p">:</span> <span class="s">'"#EFA03B"'</span><span class="p">,</span>
+ <span class="s">"fontcolor"</span> <span class="p">:</span> <span class="s">"black"</span><span class="p">,</span>
+ <span class="s">"dashed"</span> <span class="p">:</span> <span class="mi">0</span> <span class="p">}</span>
+ <span class="p">})</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>There are 8 colour schemes by setting <tt class="docutils literal"><span class="pre">"colour_scheme_index"</span></tt>:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span><span class="s">"flowchart.svg"</span><span class="p">,</span> <span class="s">"svg"</span><span class="p">,</span> <span class="p">[</span><span class="n">final_task</span><span class="p">],</span>
+ <span class="n">user_colour_scheme</span> <span class="o">=</span> <span class="p">{</span><span class="s">"colour_scheme_index"</span> <span class="p">:</span><span class="mi">6</span><span class="p">})</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>These colours were chosen after much fierce arguments between the authors and friends, and much
+inspiration from <a class="reference external" href="http://kuler.adobe.com/#create/fromacolor">http://kuler.adobe.com/#create/fromacolor</a>. Please
+feel free to submit any additional sets of colours for our consideration.</p>
+<p>(Click here for image in <a class="reference download internal" href="../../_downloads/flowchart_colour_schemes.svg"><tt class="xref download docutils literal"><span class="pre">svg</span></tt></a>.)</p>
+<img alt="../../_images/flowchart_colour_schemes.png" src="../../_images/flowchart_colour_schemes.png" />
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 1</strong>: Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a><ul>
+<li><a class="reference internal" href="#flowchart-colours">Flowchart colours</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="check_if_uptodate.html"
+ title="previous chapter"><strong>Chapter 23</strong>: Esoteric: Writing custom functions to decide which jobs are up to date with <tt class="docutils literal"><span class="pre">@check_if_uptodate</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="dependencies.html"
+ title="next chapter"><strong>Appendix 2</strong>: How dependency is checked</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/flowchart_colours.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="dependencies.html" title="Appendix 2: How dependency is checked"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/flowchart_colours_code.html b/doc/_build/html/tutorials/new_tutorial/flowchart_colours_code.html
new file mode 100644
index 0000000..9813442
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/flowchart_colours_code.html
@@ -0,0 +1,473 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...) — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Cheat Sheet" href="../../cheatsheet.html" />
+ <link rel="prev" title="Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files" href="onthefly_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../../cheatsheet.html" title="Cheat Sheet"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-flowchart-colours-chapter-num-python-code-for-flow-chart-colours-with-pipeline-printout-graph">
+<span id="new-manual-flowchart-colours-code"></span><span id="index-0"></span><h1><strong>Appendix 1</strong>: Python code for Flow Chart Colours with <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a><a class="headerlink" href="#new-manual-flowchart-colours-chapter-num-python-code-for-flow-chart-colours-with-pipeline-printout-graph" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a></li>
+<li><a class="reference download internal" href="../../_downloads/play_with_colours.py"><tt class="xref download docutils literal"><span class="pre">Download</span> <span class="pre">code</span></tt></a></li>
+<li>Back to <a class="reference internal" href="flowchart_colours.html#new-manual-flowchart-colours"><em>Flowchart colours</em></a></li>
+</ul>
+<p class="last">This example shows how flowchart colours can be customised.</p>
+</div>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#!/usr/bin/env python</span>
+<span class="sd">"""</span>
+
+<span class="sd"> play_with_colours.py</span>
+<span class="sd"> [--log_file PATH]</span>
+<span class="sd"> [--verbose]</span>
+
+<span class="sd">"""</span>
+
+<span class="c">################################################################################</span>
+<span class="c">#</span>
+<span class="c"># play_with_colours.py</span>
+<span class="c">#</span>
+<span class="c">#</span>
+<span class="c"># Copyright (c) 7/13/2010 Leo Goodstadt</span>
+<span class="c">#</span>
+<span class="c"># Permission is hereby granted, free of charge, to any person obtaining a copy</span>
+<span class="c"># of this software and associated documentation files (the "Software"), to deal</span>
+<span class="c"># in the Software without restriction, including without limitation the rights</span>
+<span class="c"># to use, copy, modify, merge, publish, distribute, sublicense, and/or sell</span>
+<span class="c"># copies of the Software, and to permit persons to whom the Software is</span>
+<span class="c"># furnished to do so, subject to the following conditions:</span>
+<span class="c">#</span>
+<span class="c"># The above copyright notice and this permission notice shall be included in</span>
+<span class="c"># all copies or substantial portions of the Software.</span>
+<span class="c">#</span>
+<span class="c"># THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR</span>
+<span class="c"># IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,</span>
+<span class="c"># FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE</span>
+<span class="c"># AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER</span>
+<span class="c"># LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,</span>
+<span class="c"># OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN</span>
+<span class="c"># THE SOFTWARE.</span>
+<span class="c">#################################################################################</span>
+
+<span class="kn">import</span> <span class="nn">sys</span><span class="o">,</span> <span class="nn">os</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># options</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+<span class="kn">from</span> <span class="nn">optparse</span> <span class="kn">import</span> <span class="n">OptionParser</span>
+<span class="kn">import</span> <span class="nn">StringIO</span>
+
+<span class="n">parser</span> <span class="o">=</span> <span class="n">OptionParser</span><span class="p">(</span><span class="n">version</span><span class="o">=</span><span class="s">"%play_with_colours 1.0"</span><span class="p">,</span>
+ <span class="n">usage</span> <span class="o">=</span> <span class="s">"</span><span class="se">\n\n</span><span class="s"> play_with_colours "</span>
+ <span class="s">"--flowchart FILE [options] "</span>
+ <span class="s">"[--colour_scheme_index INT ] "</span>
+ <span class="s">"[--key_legend_in_graph]"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># pipeline</span>
+<span class="c">#</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"--flowchart"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"flowchart"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"FILE"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"string"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Don't actually run any commands; just print the pipeline "</span>
+ <span class="s">"as a flowchart."</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"--colour_scheme_index"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"colour_scheme_index"</span><span class="p">,</span>
+ <span class="n">metavar</span><span class="o">=</span><span class="s">"INTEGER"</span><span class="p">,</span>
+ <span class="nb">type</span><span class="o">=</span><span class="s">"int"</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Index of colour scheme for flow chart."</span><span class="p">)</span>
+<span class="n">parser</span><span class="o">.</span><span class="n">add_option</span><span class="p">(</span><span class="s">"--key_legend_in_graph"</span><span class="p">,</span> <span class="n">dest</span><span class="o">=</span><span class="s">"key_legend_in_graph"</span><span class="p">,</span>
+ <span class="n">action</span><span class="o">=</span><span class="s">"store_true"</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">help</span><span class="o">=</span><span class="s">"Print out legend and key for dependency graph."</span><span class="p">)</span>
+
+<span class="p">(</span><span class="n">options</span><span class="p">,</span> <span class="n">remaining_args</span><span class="p">)</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+<span class="k">if</span> <span class="ow">not</span> <span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"Missing mandatory parameter: --flowchart.</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># imports</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.ruffus_exceptions</span> <span class="kn">import</span> <span class="n">JobSignalledBreak</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Pipeline</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+<span class="c">#</span>
+<span class="c"># up to date tasks</span>
+<span class="c">#</span>
+<span class="nd">@check_if_uptodate</span> <span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="p">(</span><span class="bp">False</span><span class="p">,</span> <span class="s">""</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task1</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@check_if_uptodate</span> <span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="p">(</span><span class="bp">False</span><span class="p">,</span> <span class="s">""</span><span class="p">))</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Up_to_date_task1</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task2</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@check_if_uptodate</span> <span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="p">(</span><span class="bp">False</span><span class="p">,</span> <span class="s">""</span><span class="p">))</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Up_to_date_task2</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task3</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="nd">@check_if_uptodate</span> <span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="p">(</span><span class="bp">False</span><span class="p">,</span> <span class="s">""</span><span class="p">))</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Up_to_date_task3</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_final_target</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="c"># Explicitly specified</span>
+<span class="c">#</span>
+<span class="nd">@check_if_uptodate</span> <span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="p">(</span><span class="bp">False</span><span class="p">,</span> <span class="s">""</span><span class="p">))</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Up_to_date_task1</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Explicitly_specified_task</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># Tasks to run</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Explicitly_specified_task</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Task_to_run1</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Task_to_run1</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Task_to_run2</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Task_to_run2</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Task_to_run3</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@check_if_uptodate</span> <span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="p">(</span><span class="bp">False</span><span class="p">,</span> <span class="s">""</span><span class="p">))</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Task_to_run2</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task_forced_to_rerun</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#</span>
+<span class="c"># Final target</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Up_to_date_task_forced_to_rerun</span><span class="p">,</span> <span class="n">Task_to_run3</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Final_target</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c">#</span>
+<span class="c"># Ignored downstream</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Final_target</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Downstream_task1_ignored</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">Final_target</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Downstream_task2_ignored</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+
+
+
+
+
+
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Main logic</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
+<span class="n">custom_flow_chart_colour_scheme</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">dict</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># Base chart on this overall colour scheme index</span>
+<span class="c">#</span>
+<span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"colour_scheme_index"</span><span class="p">]</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">colour_scheme_index</span>
+
+<span class="c">#</span>
+<span class="c"># Overriding colours</span>
+<span class="c">#</span>
+<span class="k">if</span> <span class="n">options</span><span class="o">.</span><span class="n">colour_scheme_index</span> <span class="o">==</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Vicious cycle"</span><span class="p">][</span><span class="s">"linecolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#FF3232"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Pipeline"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#FF3232"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Key"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"black"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Key"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#F6F4F4"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Task to run"</span><span class="p">][</span><span class="s">"linecolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#0044A0"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date"</span><span class="p">][</span><span class="s">"linecolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"gray"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Final target"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#EFA03B"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Final target"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"black"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Final target"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"black"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Final target"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Vicious cycle"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#FF3232"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Vicious cycle"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'white'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Vicious cycle"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"white"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Vicious cycle"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#B8CC6E"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#006000"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#006000"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Down stream"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"white"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Down stream"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"gray"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Down stream"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"gray"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Down stream"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Explicitly specified task"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"transparent"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Explicitly specified task"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"black"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Explicitly specified task"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">"black"</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Explicitly specified task"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Task to run"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#EBF3FF"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Task to run"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#0044A0"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Task to run"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#0044A0"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Task to run"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task forced to rerun"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'transparent'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task forced to rerun"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#0044A0"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task forced to rerun"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#0044A0"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date task forced to rerun"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date Final target"</span><span class="p">][</span><span class="s">"fillcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#EFA03B"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date Final target"</span><span class="p">][</span><span class="s">"fontcolor"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#006000"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date Final target"</span><span class="p">][</span><span class="s">"color"</span><span class="p">]</span> <span class="o">=</span> <span class="s">'"#006000"'</span>
+ <span class="n">custom_flow_chart_colour_scheme</span><span class="p">[</span><span class="s">"Up-to-date Final target"</span><span class="p">][</span><span class="s">"dashed"</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
+
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="n">pipeline_printout_graph</span> <span class="p">(</span>
+
+ <span class="nb">open</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">,</span> <span class="s">"w"</span><span class="p">),</span>
+ <span class="c"># use flowchart file name extension to decide flowchart format</span>
+ <span class="c"># e.g. svg, jpg etc.</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">flowchart</span><span class="p">)[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">:],</span>
+
+ <span class="c"># final targets</span>
+ <span class="p">[</span><span class="n">Final_target</span><span class="p">,</span> <span class="n">Up_to_date_final_target</span><span class="p">],</span>
+
+ <span class="c"># Explicitly specified tasks</span>
+ <span class="p">[</span><span class="n">Explicitly_specified_task</span><span class="p">],</span>
+
+ <span class="c"># Do we want key legend</span>
+ <span class="n">no_key_legend</span> <span class="o">=</span> <span class="ow">not</span> <span class="n">options</span><span class="o">.</span><span class="n">key_legend_in_graph</span><span class="p">,</span>
+
+ <span class="c"># Print all the task types whether used or not</span>
+ <span class="n">minimal_key_legend</span> <span class="o">=</span> <span class="bp">False</span><span class="p">,</span>
+
+ <span class="n">user_colour_scheme</span> <span class="o">=</span> <span class="n">custom_flow_chart_colour_scheme</span><span class="p">,</span>
+ <span class="n">pipeline_name</span> <span class="o">=</span> <span class="s">"Colour schemes"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 1</strong>: Python code for Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="onthefly_code.html"
+ title="previous chapter"><strong>Chapter 21</strong>: Esoteric: Python Code for Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../../cheatsheet.html"
+ title="next chapter">Cheat Sheet</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/flowchart_colours_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../../cheatsheet.html" title="Cheat Sheet"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/inputs.html b/doc/_build/html/tutorials/new_tutorial/inputs.html
new file mode 100644
index 0000000..63cad9a
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/inputs.html
@@ -0,0 +1,426 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs() — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 21: Esoteric: Generating parameters on the fly with @files" href="onthefly.html" />
+ <link rel="prev" title="Chapter 19: Signal the completion of each stage of our pipeline with @posttask" href="posttask.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 21: Esoteric: Generating parameters on the fly with @files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 19: Signal the completion of each stage of our pipeline with @posttask"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-inputs-chapter-num-manipulating-task-inputs-via-string-substitution-using-inputs-and-add-inputs">
+<span id="new-manual-inputs"></span><span id="index-0"></span><h1><strong>Chapter 20</strong>: Manipulating task inputs via string substitution using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a> and <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a><a class="headerlink" href="#new-manual-inputs-chapter-num-manipulating-task-inputs-via-string-s [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a> syntax</li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><a class="reference internal" href="inputs_code.html#new-manual-inputs-code"><em>Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The previous chapters have been described how <em>Ruffus</em> allows the <strong>Output</strong> names for each job
+to be generated from the <em>Input</em> names via string substitution. This is how <em>Ruffus</em> can
+automatically chain multiple tasks in a pipeline together seamlessly.</p>
+<p>Sometimes it is useful to be able to modify the <strong>Input</strong> by string substitution
+as well. There are two situations where this additional flexibility is needed:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>You need to add additional prequisites or filenames to the <strong>Input</strong> of every single job</li>
+<li>You need to add additional <strong>Input</strong> file names which are some variant of the existing ones.</li>
+</ol>
+</div></blockquote>
+<p>Both will be much more obvious with some examples</p>
+</div></blockquote>
+</div>
+<div class="section" id="adding-additional-input-prerequisites-per-job-with-add-inputs">
+<h2>Adding additional <em>input</em> prerequisites per job with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a><a class="headerlink" href="#adding-additional-input-prerequisites-per-job-with-add-inputs" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="example-compiling-c-code">
+<h3>1. Example: compiling c++ code<a class="headerlink" href="#example-compiling-c-code" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Let us first compile some c++ (<tt class="docutils literal"><span class="pre">"*.cpp"</span></tt>) files using plain <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> syntax:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-adding-a-common-header-file-with-add-inputs">
+<h3>2. Example: Adding a common header file with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a><a class="headerlink" href="#example-adding-a-common-header-file-with-add-inputs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># common (universal) header exists before our pipeline</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"><span class="c"># make header files</span>
+</span><span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".h"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_matching_headers</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span>
+<span class="hll"> <span class="c"># add header to the input of every job</span>
+</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span>
+<span class="hll"> <span class="c"># add result of task create_matching_headers to the input of every job</span>
+</span> <span class="n">create_matching_headers</span><span class="p">),</span>
+ <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+
+ <span class="o">>>></span> <span class="n">pipeline_run</span><span class="p">()</span>
+ <span class="n">Job</span> <span class="o">=</span> <span class="p">[</span><span class="n">hasty</span><span class="o">.</span><span class="n">cpp</span> <span class="o">-></span> <span class="n">hasty</span><span class="o">.</span><span class="n">h</span><span class="p">]</span> <span class="n">completed</span>
+ <span class="n">Job</span> <span class="o">=</span> <span class="p">[</span><span class="n">messy</span><span class="o">.</span><span class="n">cpp</span> <span class="o">-></span> <span class="n">messy</span><span class="o">.</span><span class="n">h</span><span class="p">]</span> <span class="n">completed</span>
+ <span class="n">Job</span> <span class="o">=</span> <span class="p">[</span><span class="n">tasty</span><span class="o">.</span><span class="n">cpp</span> <span class="o">-></span> <span class="n">tasty</span><span class="o">.</span><span class="n">h</span><span class="p">]</span> <span class="n">completed</span>
+ <span class="n">Completed</span> <span class="n">Task</span> <span class="o">=</span> <span class="n">create_matching_headers</span>
+ <span class="n">Job</span> <span class="o">=</span> <span class="p">[[</span><span class="n">hasty</span><span class="o">.</span><span class="n">cpp</span><span class="p">,</span> <span class="n">universal</span><span class="o">.</span><span class="n">h</span><span class="p">,</span> <span class="n">hasty</span><span class="o">.</span><span class="n">h</span><span class="p">,</span> <span class="n">messy</span><span class="o">.</span><span class="n">h</span><span class="p">,</sp [...]
+ <span class="n">Job</span> <span class="o">=</span> <span class="p">[[</span><span class="n">messy</span><span class="o">.</span><span class="n">cpp</span><span class="p">,</span> <span class="n">universal</span><span class="o">.</span><span class="n">h</span><span class="p">,</span> <span class="n">hasty</span><span class="o">.</span><span class="n">h</span><span class="p">,</span> <span class="n">messy</span><span class="o">.</span><span class="n">h</span><span class="p">,</sp [...]
+ <span class="n">Job</span> <span class="o">=</span> <span class="p">[[</span><span class="n">tasty</span><span class="o">.</span><span class="n">cpp</span><span class="p">,</span> <span class="n">universal</span><span class="o">.</span><span class="n">h</span><span class="p">,</span> <span class="n">hasty</span><span class="o">.</span><span class="n">h</span><span class="p">,</span> <span class="n">messy</span><span class="o">.</span><span class="n">h</span><span class="p">,</sp [...]
+ <span class="n">Completed</span> <span class="n">Task</span> <span class="o">=</span> <span class="nb">compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-additional-input-can-be-tasks">
+<h3>3. Example: Additional <em>Input</em> can be tasks<a class="headerlink" href="#example-additional-input-can-be-tasks" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>We can also add a task name to <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a>.
+This chains the <strong>Output</strong>, i.e. run time results, of any previous task as
+an additional <strong>Input</strong> to every single job in the task.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="hll"><span class="c"># make header files</span>
+</span><span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".h"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_matching_headers</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span>
+<span class="hll"> <span class="c"># add header to the input of every job</span>
+</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span>
+<span class="hll"> <span class="c"># add result of task create_matching_headers to the input of every job</span>
+</span> <span class="n">create_matching_headers</span><span class="p">),</span>
+ <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filenames</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed</span>
+<span class="go"> Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed</span>
+<span class="go"> Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-add-corresponding-files-using-add-inputs-with-formatter-or-regex">
+<h3>4. Example: Add corresponding files using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a> with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter</em></a> or <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex</em></a><a class="headerlink" href="#example-add-corresponding-files-using-add-inputs-wi [...]
+<blockquote>
+<div><p>The previous example created headers corresponding to our source files and added them
+as the <strong>Input</strong> to the compilation. That is generally not what you want. Instead,
+what is generally need is a way to</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Look up the exact corresponding header for the <em>specific</em> job, and not add all
+possible files to all jobs in a task. When compiling <tt class="docutils literal"><span class="pre">hasty.cpp</span></tt>, we just need
+to add <tt class="docutils literal"><span class="pre">hasty.h</span></tt> (and <tt class="docutils literal"><span class="pre">universal.h</span></tt>).</li>
+<li>Add a pre-existing file name (<tt class="docutils literal"><span class="pre">hasty.h</span></tt> already exists. Don’t create it via
+another task.)</li>
+</ol>
+</div></blockquote>
+<p>This is a surprisingly common requirement: In bioinformatics sometimes DNA or RNA
+sequence files come singly in <a class="reference external" href="http://en.wikipedia.org/wiki/FASTQ_format">*.fastq</a>
+and sometimes in <a class="reference external" href="http://en.wikipedia.org/wiki/DNA_sequencing_theory#Pairwise_end-sequencing">matching pairs</a>:
+<tt class="docutils literal"><span class="pre">*1.fastq,</span> <span class="pre">*2.fastq</span></tt> etc. In the latter case, we often need to make sure that both
+sequence files are being processed in tandem. One way is to take one file name (<tt class="docutils literal"><span class="pre">*1.fastq</span></tt>)
+and look up the other.</p>
+<blockquote>
+<div><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a> uses standard <em>Ruffus</em> string substitution
+via <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter</em></a> and <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex</em></a> to lookup (generate) <strong>Input</strong> file names.
+(As a rule <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix</em></a> only substitutes <strong>Output</strong> file names.)</div></blockquote>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span> <span class="n">source_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".cpp$"</span><span class="p">),</span>
+<span class="hll"> <span class="c"># corresponding header for each source file</span>
+</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">"{basename[0]}.h"</span><span class="p">,</span>
+<span class="hll"> <span class="c"># add header to the input of every job</span>
+</span> <span class="s">"universal.h"</span><span class="p">),</span>
+ <span class="s">"{basename[0]}.o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filenames</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This script gives the following output</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[hasty.cpp, hasty.h, universal.h] -> hasty.o] completed</span>
+<span class="go"> Job = [[messy.cpp, messy.h, universal.h] -> messy.o] completed</span>
+<span class="go"> Job = [[tasty.cpp, tasty.h, universal.h] -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="replacing-all-input-parameters-with-inputs">
+<h2>Replacing all input parameters with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a><a class="headerlink" href="#replacing-all-input-parameters-with-inputs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div>The previous examples all <em>added</em> to the set of <strong>Input</strong> file names.
+Sometimes it is necessary to replace all the <strong>Input</strong> parameters altogether.</div></blockquote>
+<div class="section" id="example-running-matching-python-scripts-using-inputs">
+<h3>5. Example: Running matching python scripts using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a><a class="headerlink" href="#example-running-matching-python-scripts-using-inputs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Here is a contrived example: we wish to find all cython/python files which have been
+compiled into corresponding c++ source files.
+Instead of compiling the c++, we shall invoke the corresponding python scripts.</p>
+<p>Given three c++ files and their corresponding python scripts:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span> <span class="n">source_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".cpp$"</span><span class="p">),</span>
+
+<span class="hll"> <span class="c"># corresponding python file for each source file</span>
+</span> <span class="n">inputs</span><span class="p">(</span><span class="s">"{basename[0]}.py"</span><span class="p">),</span>
+
+ <span class="s">"{basename[0]}.results"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">run_corresponding_python</span><span class="p">(</span><span class="n">input_filenames</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The <em>Ruffus</em> code will call each python script corresponding to their c++ counterpart:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [hasty.py -> hasty.results] completed</span>
+<span class="go"> Job = [messy.py -> messy.results] completed</span>
+<span class="go"> Job = [tasty.py -> tasty.results] completed</span>
+<span class="go">Completed Task = run_corresponding_python</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 20</strong>: Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#adding-additional-input-prerequisites-per-job-with-add-inputs">Adding additional <em>input</em> prerequisites per job with <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a><ul>
+<li><a class="reference internal" href="#example-compiling-c-code">1. Example: compiling c++ code</a></li>
+<li><a class="reference internal" href="#example-adding-a-common-header-file-with-add-inputs">2. Example: Adding a common header file with <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></li>
+<li><a class="reference internal" href="#example-additional-input-can-be-tasks">3. Example: Additional <em>Input</em> can be tasks</a></li>
+<li><a class="reference internal" href="#example-add-corresponding-files-using-add-inputs-with-formatter-or-regex">4. Example: Add corresponding files using <tt class="docutils literal"><span class="pre">add_inputs()</span></tt> with <tt class="docutils literal"><span class="pre">formatter</span></tt> or <tt class="docutils literal"><span class="pre">regex</span></tt></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#replacing-all-input-parameters-with-inputs">Replacing all input parameters with <tt class="docutils literal"><span class="pre">inputs()</span></tt></a><ul>
+<li><a class="reference internal" href="#example-running-matching-python-scripts-using-inputs">5. Example: Running matching python scripts using <tt class="docutils literal"><span class="pre">inputs()</span></tt></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="posttask.html"
+ title="previous chapter"><strong>Chapter 19</strong>: Signal the completion of each stage of our pipeline with <tt class="docutils literal"><span class="pre">@posttask</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="onthefly.html"
+ title="next chapter"><strong>Chapter 21</strong>: Esoteric: Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/inputs.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 21: Esoteric: Generating parameters on the fly with @files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="posttask.html" title="Chapter 19: Signal the completion of each stage of our pipeline with @posttask"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/inputs_code.html b/doc/_build/html/tutorials/new_tutorial/inputs_code.html
new file mode 100644
index 0000000..c349f09
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/inputs_code.html
@@ -0,0 +1,409 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs() — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files" href="onthefly_code.html" />
+ <link rel="prev" title="Chapter 17: Python Code for @combinations, @permutations and all versus all @product" href="combinatorics_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="combinatorics_code.html" title="Chapter 17: Python Code for @combinations, @permutations and all versus all @product"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-inputs-chapter-num-python-code-for-manipulating-task-inputs-via-string-substitution-using-inputs-and-add-inputs">
+<span id="new-manual-inputs-code"></span><h1><strong>Chapter 20</strong>: Python Code for Manipulating task inputs via string substitution using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a> and <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a><a class="headerlink" href="#new-manual-inputs-chapter-num-python-code-for-manipulating-task-inputs- [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a> syntax</li>
+<li>Back to <strong>Chapter 20</strong>: <a class="reference internal" href="inputs.html#new-manual-inputs"><em>Manipulating task inputs via string substitution</em></a></li>
+</ul>
+</div>
+<div class="section" id="example-code-for-adding-additional-input-prerequisites-per-job-with-add-inputs">
+<h2>Example code for adding additional <em>input</em> prerequisites per job with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a><a class="headerlink" href="#example-code-for-adding-additional-input-prerequisites-per-job-with-add-inputs" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="example-compiling-c-code">
+<span id="new-manual-inputs-example1"></span><h3>1. Example: compiling c++ code<a class="headerlink" href="#example-compiling-c-code" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [hasty.cpp -> hasty.o] completed</span>
+<span class="go"> Job = [messy.cpp -> messy.o] completed</span>
+<span class="go"> Job = [tasty.cpp -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-adding-a-common-header-file-with-add-inputs">
+<span id="new-manual-inputs-example2"></span><h3>2. Example: Adding a common header file with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a><a class="headerlink" href="#example-adding-a-common-header-file-with-add-inputs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># common (universal) header exists before our pipeline</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@transform</span><span class="p">(</span> <span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span>
+<span class="hll"> <span class="c"># add header to the input of every job</span>
+</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">),</span>
+ <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[hasty.cpp, universal.h] -> hasty.o] completed</span>
+<span class="go"> Job = [[messy.cpp, universal.h] -> messy.o] completed</span>
+<span class="go"> Job = [[tasty.cpp, universal.h] -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-additional-input-can-be-tasks">
+<span id="new-manual-inputs-example3"></span><h3>3. Example: Additional <em>Input</em> can be tasks<a class="headerlink" href="#example-additional-input-can-be-tasks" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># common (universal) header exists before our pipeline</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"><span class="c"># make header files</span>
+</span><span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span> <span class="s">".h"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_matching_headers</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".cpp"</span><span class="p">),</span>
+<span class="hll"> <span class="c"># add header to the input of every job</span>
+</span> <span class="n">add_inputs</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span>
+<span class="hll"> <span class="c"># add result of task create_matching_headers to the input of every job</span>
+</span> <span class="n">create_matching_headers</span><span class="p">),</span>
+ <span class="s">".o"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [hasty.cpp -> hasty.h] completed</span>
+<span class="go"> Job = [messy.cpp -> messy.h] completed</span>
+<span class="go"> Job = [tasty.cpp -> tasty.h] completed</span>
+<span class="go">Completed Task = create_matching_headers</span>
+<span class="go"> Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed</span>
+<span class="go"> Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed</span>
+<span class="go"> Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="example-add-corresponding-files-using-add-inputs-with-formatter-or-regex">
+<span id="new-manual-inputs-example4"></span><h3>4. Example: Add corresponding files using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>add_inputs()</em></a> with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter</em></a> or <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex</em></a><a class="headerlink" href="#exampl [...]
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="n">header_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.h"</span><span class="p">,</span> <span class="s">"tasty.h"</span><span class="p">,</span> <span class="s">"messy.h"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span> <span class="o">+</span> <span class="n">header_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># common (universal) header exists before our pipeline</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="hll">
+</span><span class="nd">@transform</span><span class="p">(</span> <span class="n">source_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".cpp$"</span><span class="p">),</span>
+ <span class="c"># corresponding header for each source file</span>
+ <span class="n">add_inputs</span><span class="p">(</span><span class="s">"{basename[0]}.h"</span><span class="p">,</span>
+ <span class="c"># add header to the input of every job</span>
+<span class="hll"> <span class="s">"universal.h"</span><span class="p">),</span>
+</span> <span class="s">"{basename[0]}.o"</span><span class="p">)</span>
+<span class="hll"><span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[hasty.cpp, hasty.h, universal.h] -> hasty.o] completed</span>
+<span class="go"> Job = [[messy.cpp, messy.h, universal.h] -> messy.o] completed</span>
+<span class="go"> Job = [[tasty.cpp, tasty.h, universal.h] -> tasty.o] completed</span>
+<span class="go">Completed Task = compile</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="example-code-for-replacing-all-input-parameters-with-inputs">
+<h2>Example code for replacing all input parameters with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a><a class="headerlink" href="#example-code-for-replacing-all-input-parameters-with-inputs" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="example-running-matching-python-scripts-using-inputs">
+<span id="new-manual-inputs-example5"></span><h3>5. Example: Running matching python scripts using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs()</em></a><a class="headerlink" href="#example-running-matching-python-scripts-using-inputs" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># source files exist before our pipeline</span>
+<span class="n">source_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.cpp"</span><span class="p">,</span> <span class="s">"tasty.cpp"</span><span class="p">,</span> <span class="s">"messy.cpp"</span><span class="p">]</span>
+<span class="n">python_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"hasty.py"</span><span class="p">,</span> <span class="s">"tasty.py"</span><span class="p">,</span> <span class="s">"messy.py"</span><span class="p">]</span>
+<span class="k">for</span> <span class="n">source_file</span> <span class="ow">in</span> <span class="n">source_files</span> <span class="o">+</span> <span class="n">python_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">source_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c"># common (universal) header exists before our pipeline</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"universal.h"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="hll">
+</span><span class="nd">@transform</span><span class="p">(</span> <span class="n">source_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".cpp$"</span><span class="p">),</span>
+ <span class="c"># corresponding python file for each source file</span>
+ <span class="n">inputs</span><span class="p">(</span><span class="s">"{basename[0]}.py"</span><span class="p">),</span>
+
+<span class="hll"> <span class="s">"{basename[0]}.results"</span><span class="p">)</span>
+</span><span class="k">def</span> <span class="nf">run_corresponding_python</span><span class="p">(</span><span class="n">input_filenames</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+<span class="hll"> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [hasty.py -> hasty.results] completed</span>
+<span class="go"> Job = [messy.py -> messy.results] completed</span>
+<span class="go"> Job = [tasty.py -> tasty.results] completed</span>
+<span class="go">Completed Task = run_corresponding_python</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 20</strong>: Python Code for Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a><ul>
+<li><a class="reference internal" href="#example-code-for-adding-additional-input-prerequisites-per-job-with-add-inputs">Example code for adding additional <em>input</em> prerequisites per job with <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a><ul>
+<li><a class="reference internal" href="#example-compiling-c-code">1. Example: compiling c++ code</a></li>
+<li><a class="reference internal" href="#example-adding-a-common-header-file-with-add-inputs">2. Example: Adding a common header file with <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></li>
+<li><a class="reference internal" href="#example-additional-input-can-be-tasks">3. Example: Additional <em>Input</em> can be tasks</a></li>
+<li><a class="reference internal" href="#example-add-corresponding-files-using-add-inputs-with-formatter-or-regex">4. Example: Add corresponding files using <tt class="docutils literal"><span class="pre">add_inputs()</span></tt> with <tt class="docutils literal"><span class="pre">formatter</span></tt> or <tt class="docutils literal"><span class="pre">regex</span></tt></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#example-code-for-replacing-all-input-parameters-with-inputs">Example code for replacing all input parameters with <tt class="docutils literal"><span class="pre">inputs()</span></tt></a><ul>
+<li><a class="reference internal" href="#example-running-matching-python-scripts-using-inputs">5. Example: Running matching python scripts using <tt class="docutils literal"><span class="pre">inputs()</span></tt></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="combinatorics_code.html"
+ title="previous chapter"><strong>Chapter 17</strong>: Python Code for <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="onthefly_code.html"
+ title="next chapter"><strong>Chapter 21</strong>: Esoteric: Python Code for Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/inputs_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="onthefly_code.html" title="Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="combinatorics_code.html" title="Chapter 17: Python Code for @combinations, @permutations and all versus all @product"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/introduction.html b/doc/_build/html/tutorials/new_tutorial/introduction.html
new file mode 100644
index 0000000..2f3a508
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/introduction.html
@@ -0,0 +1,554 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 1: An introduction to basic Ruffus syntax — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 2: Transforming data in a pipeline with @transform" href="transform.html" />
+ <link rel="prev" title="Ruffus Manual: List of Chapters and Example code" href="manual_contents.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 2: Transforming data in a pipeline with @transform"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="manual_contents.html" title="Ruffus Manual: List of Chapters and Example code"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="#">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <p><span class="raw-html"><style> .blue {color:blue} </style></span></p>
+<p><span class="raw-html"><style> .highlight-red {color:red} </style></span></p>
+<div class="section" id="new-manual-introduction-chapter-num-an-introduction-to-basic-ruffus-syntax">
+<span id="new-manual-introduction"></span><span id="index-0"></span><h1><strong>Chapter 1</strong>: An introduction to basic <em>Ruffus</em> syntax<a class="headerlink" href="#new-manual-introduction-chapter-num-an-introduction-to-basic-ruffus-syntax" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/theoretical_pipeline_schematic.png"><img alt="../../_images/theoretical_pipeline_schematic.png" src="../../_images/theoretical_pipeline_schematic.png" style="width: 610.0px; height: 71.0px;" /></a>
+<p>Computational pipelines transform your data in stages until the final result is produced.
+One easy way to understand pipelines is by imagining your data flowing across a series of
+pipes until it reaches its final destination. Even quite complicated processes can be
+broken into simple stages. Of course, it helps to visualise the whole process.</p>
+<p><em>Ruffus</em> is a way of automating the plumbing in your pipeline: You supply the python functions
+which perform the data transformation, and tell <em>Ruffus</em> how these pipeline <tt class="docutils literal"><span class="pre">task</span></tt> functions
+are connected up. <em>Ruffus</em> will make sure that the right data flows down your pipeline in the
+right way at the right time.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><em>Ruffus</em> refers to each stage of your pipeline as a <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>.</p>
+</div>
+</div></blockquote>
+<span class="target" id="new-manual-introduction-import"></span></div>
+<div class="section" id="importing-ruffus">
+<span id="index-1"></span><h2>Importing <em>Ruffus</em><a class="headerlink" href="#importing-ruffus" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The most convenient way to use <em>Ruffus</em> is to import the various names directly:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This will allow <em>Ruffus</em> terms to be used directly in your code. This is also
+the style we have adopted for this manual.</p>
+<dl class="docutils">
+<dt>If any of these clash with names in your code, you can use qualified names instead:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">ruffus</span>
+
+<span class="n">ruffus</span><span class="o">.</span><span class="n">pipeline_printout</span><span class="p">(</span><span class="s">"..."</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p><em>Ruffus</em> uses only standard python syntax.</p>
+<p>There is no need to install anything extra or to have your script “preprocessed” to run
+your pipeline.</p>
+</div></blockquote>
+</div>
+<div class="section" id="ruffus-decorators">
+<h2><em>Ruffus</em> <a class="reference external" href="https://docs.python.org/2/glossary.html#term-decorator">decorators</a><a class="headerlink" href="#ruffus-decorators" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>To let <em>Ruffus</em> know that which python functions are part of your pipeline,
+they need to be tagged or annotated using
+<em>Ruffus</em> <a class="reference external" href="https://docs.python.org/2/glossary.html#term-decorator">decorators</a> .</p>
+<p><a class="reference external" href="https://docs.python.org/2/glossary.html#term-decorator">Decorators</a> have been part of the Python language since version 2.4.
+Common examples from the standard library include <a class="reference external" href="https://docs.python.org/2/library/functions.html#staticmethod">@staticmethod</a> and
+<a class="reference external" href="https://docs.python.org/2/library/functions.html#classmethod">classmethod</a>.</p>
+<p><a class="reference external" href="https://docs.python.org/2/glossary.html#term-decorator">decorators</a> start with a <tt class="docutils literal"><span class="pre">@</span></tt>
+prefix, and take a number of parameters in parenthesis, much like in a function call.</p>
+<p><a class="reference external" href="https://docs.python.org/2/glossary.html#term-decorator">decorators</a> are placed before a normal python function.</p>
+<blockquote>
+<div><img alt="../../_images/tutorial_step1_decorator_syntax.png" src="../../_images/tutorial_step1_decorator_syntax.png" />
+</div></blockquote>
+<p>Multiple decorators can be stacked as necessary in whichever order:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">)</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">another_task</span><span class="p">)</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">():</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><em>Ruffus</em> <a class="reference external" href="https://docs.python.org/2/glossary.html#term-decorator">decorators</a> do not
+otherwise alter the underlying function. These can still be called normally.</p>
+</div></blockquote>
+</div>
+<div class="section" id="your-first-ruffus-pipeline">
+<h2>Your first <em>Ruffus</em> pipeline<a class="headerlink" href="#your-first-ruffus-pipeline" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="write-down-the-file-names">
+<h3>1. Write down the file names<a class="headerlink" href="#write-down-the-file-names" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><em>Ruffus</em> is designed for data moving through a computational pipeline as a series of files.</p>
+<p>It is also possible to use <em>Ruffus</em> pipelines without using intermediate data files but for your
+first efforts, it is probably best not to subvert its canonical design.</p>
+<p>The first thing when designing a new <em>Ruffus</em> pipeline is to sketch out the set of file names for
+the pipeline on paper:</p>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/tutorial_ruffus_files.jpg"><img alt="../../_images/tutorial_ruffus_files.jpg" src="../../_images/tutorial_ruffus_files.jpg" style="width: 600.0px; height: 212.5px;" /></a>
+</div></blockquote>
+<dl class="docutils">
+<dt>Here we have a number of DNA sequence files (<tt class="docutils literal"><span class="pre">*.fasta</span></tt>)</dt>
+<dd><ol class="first last arabic simple">
+<li>mapped to a genome (<tt class="docutils literal"><span class="pre">*.sam</span></tt>), and</li>
+<li>compressed (<tt class="docutils literal"><span class="pre">*.bam</span></tt>) before being</li>
+<li>summarised statistically (<tt class="docutils literal"><span class="pre">*.statistics</span></tt>)</li>
+</ol>
+</dd>
+</dl>
+<p>The first striking thing is that all of the files following the same <strong>consistent naming scheme</strong>.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><span class="highlight-red">The most important part of a Ruffus pipeline is to have a consistent naming scheme for your files.</span></p>
+<p class="last">This allows you to build sane pipelines.</p>
+</div>
+<p>In this case, each of the files at the same stage share the same file extension, e.g. (<tt class="docutils literal"><span class="pre">.sam</span></tt>).
+This is usually the simplest and most sensible choice. (We shall see in later chapters
+that <em>Ruffus</em> supports more complicated naming patterns so long as they are consistent.)</p>
+</div></blockquote>
+</div>
+<div class="section" id="write-the-python-functions-for-each-stage">
+<h3>2. Write the python functions for each stage<a class="headerlink" href="#write-the-python-functions-for-each-stage" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Next, we can sketch out the python functions which do the actual work for the pipeline.</p>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<ol class="last arabic">
+<li><p class="first"><span class="highlight-red">These are normal python functions with the important proviso that</span></p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first parameter contains the <strong>Input</strong> (file names)</li>
+<li>The second parameter contains the <strong>Output</strong> (file names)</li>
+</ol>
+<p>You can otherwise supply as many parameters as is required.</p>
+</div></blockquote>
+</li>
+<li><p class="first"><span class="highlight-red">Each python function should only take a</span> <em>Single</em> <strong>Input</strong> at a time</p>
+<blockquote>
+<div><p>All the parallelism in your pipeline should be handled by <em>Ruffus</em>. Make sure
+each function analyses one thing at a time.</p>
+</div></blockquote>
+</li>
+</ol>
+</div>
+</div></blockquote>
+<p><em>Ruffus</em> refers to a pipelined function as a <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>.</p>
+<p>The code for our three task functions look something like:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="hll"><span class="c"># STAGE 1 fasta->sam</span>
+</span><span class="c">#</span>
+<span class="hll"><span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="c"># 1st parameter is Input</span>
+</span><span class="hll"> <span class="n">output_file</span><span class="p">):</span> <span class="c"># 2nd parameter is Output</span>
+</span> <span class="sd">"""</span>
+<span class="sd"> Sketch of real mapping function</span>
+<span class="sd"> We can do the mapping ourselves</span>
+<span class="sd"> or call some other programme:</span>
+<span class="sd"> os.system("stampy %s %s..." % (input_file, output_file))</span>
+<span class="sd"> """</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="hll"><span class="c"># STAGE 2 sam->bam</span>
+</span><span class="c">#</span>
+<span class="k">def</span> <span class="nf">compress_sam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="c"># Input parameter</span>
+ <span class="n">output_file</span><span class="p">):</span> <span class="c"># Output parameter</span>
+ <span class="sd">"""</span>
+<span class="sd"> Sketch of real compression function</span>
+<span class="sd"> """</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="hll"><span class="c"># STAGE 3 bam->statistics</span>
+</span><span class="c">#</span>
+<span class="k">def</span> <span class="nf">summarise_bam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="c"># Input parameter</span>
+ <span class="n">output_file</span><span class="p">,</span> <span class="c"># Output parameter</span>
+ <span class="n">extra_stats_parameter</span><span class="p">):</span> <span class="c"># Any number of extra parameters as required</span>
+ <span class="sd">"""</span>
+<span class="sd"> Sketch of real analysis function</span>
+<span class="sd"> """</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>If we were calling our functions manually, without the benefit of <em>Ruffus</em>, we would need
+the following sequence of calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># STAGE 1</span>
+<span class="n">map_dna_sequence</span><span class="p">(</span><span class="s">"a.fasta"</span><span class="p">,</span> <span class="s">"a.sam"</span><span class="p">)</span>
+<span class="n">map_dna_sequence</span><span class="p">(</span><span class="s">"b.fasta"</span><span class="p">,</span> <span class="s">"b.sam"</span><span class="p">)</span>
+<span class="n">map_dna_sequence</span><span class="p">(</span><span class="s">"c.fasta"</span><span class="p">,</span> <span class="s">"c.sam"</span><span class="p">)</span>
+
+<span class="c"># STAGE 2</span>
+<span class="n">compress_sam_file</span><span class="p">(</span><span class="s">"a.sam"</span><span class="p">,</span> <span class="s">"a.bam"</span><span class="p">)</span>
+<span class="n">compress_sam_file</span><span class="p">(</span><span class="s">"b.sam"</span><span class="p">,</span> <span class="s">"b.bam"</span><span class="p">)</span>
+<span class="n">compress_sam_file</span><span class="p">(</span><span class="s">"c.sam"</span><span class="p">,</span> <span class="s">"c.bam"</span><span class="p">)</span>
+
+<span class="c"># STAGE 3</span>
+<span class="n">summarise_bam_file</span><span class="p">(</span><span class="s">"a.bam"</span><span class="p">,</span> <span class="s">"a.statistics"</span><span class="p">)</span>
+<span class="n">summarise_bam_file</span><span class="p">(</span><span class="s">"b.bam"</span><span class="p">,</span> <span class="s">"b.statistics"</span><span class="p">)</span>
+<span class="n">summarise_bam_file</span><span class="p">(</span><span class="s">"c.bam"</span><span class="p">,</span> <span class="s">"c.statistics"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="link-the-python-functions-into-a-pipeline">
+<h3>3. Link the python functions into a pipeline<a class="headerlink" href="#link-the-python-functions-into-a-pipeline" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><em>Ruffus</em> makes exactly the same function calls on your behalf. However, first, we need to
+tell <em>Ruffus</em> what the arguments should be for each of the function calls.</p>
+<ul class="simple">
+<li>The <strong>Input</strong> is easy: This is either the starting file set (<tt class="docutils literal"><span class="pre">*.fasta</span></tt>) or whatever is produced
+by the previous stage.</li>
+<li>The <strong>Output</strong> file name is the same as the <strong>Input</strong> but with the appropriate extension.</li>
+</ul>
+<p>These are specified using the <em>Ruffus</em> <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> decorator as follows:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="n">starting_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"a.fasta"</span><span class="p">,</span> <span class="s">"b.fasta"</span><span class="p">,</span> <span class="s">"c.fasta"</span><span class="p">]</span>
+
+<span class="c">#</span>
+<span class="hll"><span class="c"># STAGE 1 fasta->sam</span>
+</span><span class="hll"><span class="c">#</span>
+</span><span class="hll"><span class="nd">@transform</span><span class="p">(</span><span class="n">starting_files</span><span class="p">,</span> <span class="c"># Input = starting files</span>
+</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".fasta"</span><span class="p">),</span> <span class="c"># suffix = .fasta</span>
+ <span class="s">".sam"</span><span class="p">)</span> <span class="c"># Output suffix = .sam</span>
+<span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="hll"><span class="c"># STAGE 2 sam->bam</span>
+</span><span class="hll"><span class="c">#</span>
+</span><span class="hll"><span class="nd">@transform</span><span class="p">(</span><span class="n">map_dna_sequence</span><span class="p">,</span> <span class="c"># Input = previous stage</span>
+</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".sam"</span><span class="p">),</span> <span class="c"># suffix = .sam</span>
+ <span class="s">".bam"</span><span class="p">)</span> <span class="c"># Output suffix = .bam</span>
+<span class="k">def</span> <span class="nf">compress_sam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 3 bam->statistics</span>
+<span class="hll"><span class="c">#</span>
+</span><span class="hll"><span class="nd">@transform</span><span class="p">(</span><span class="n">compress_sam_file</span><span class="p">,</span> <span class="c"># Input = previous stage</span>
+</span><span class="hll"> <span class="n">suffix</span><span class="p">(</span><span class="s">".bam"</span><span class="p">),</span> <span class="c"># suffix = .bam</span>
+</span> <span class="s">".statistics"</span><span class="p">,</span> <span class="c"># Output suffix = .statistics</span>
+ <span class="s">"use_linear_model"</span><span class="p">)</span> <span class="c"># Extra statistics parameter</span>
+<span class="k">def</span> <span class="nf">summarise_bam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">,</span>
+ <span class="n">extra_stats_parameter</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Sketch of real analysis function</span>
+<span class="sd"> """</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="transform-syntax">
+<h3>4. @transform syntax<a class="headerlink" href="#transform-syntax" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ol class="arabic">
+<li><div class="first line-block">
+<div class="line">The 1st parameter for <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> is the <strong>Input</strong>.</div>
+<div class="line">This is either the set of starting data or the name of the previous pipeline function.</div>
+<div class="line"><em>Ruffus</em> <em>chains</em> together the stages of a pipeline by linking the <strong>Output</strong> of the previous stage into the <strong>Input</strong> of the next.</div>
+</div>
+</li>
+<li><div class="first line-block">
+<div class="line">The 2nd parameter is the current <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix</em></a></div>
+<div class="line">(i.e. our <strong>Input</strong> file extensions of <tt class="docutils literal"><span class="pre">".fasta"</span></tt> or <tt class="docutils literal"><span class="pre">".sam"</span></tt> or <tt class="docutils literal"><span class="pre">".bam"</span></tt>)</div>
+</div>
+</li>
+<li><div class="first line-block">
+<div class="line">The 3rd parameter is what we want our <strong>Output</strong> file name to be after <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix</em></a> string substitution (e.g. <tt class="docutils literal"><span class="pre">.fasta</span> <span class="pre">-</span> <span class="pre">></span> <span class="pre">.sam</span></tt>).</div>
+<div class="line">This works because we are using a sane naming scheme for our data files.</div>
+</div>
+</li>
+<li><p class="first">Other parameters can be passed to <tt class="docutils literal"><span class="pre">@transform</span></tt> and they will be forwarded to our python
+pipeline function.</p>
+</li>
+</ol>
+<p>The functions that do the actual work of each stage of the pipeline remain unchanged.
+The role of <em>Ruffus</em> is to make sure each is called in the right order,
+with the right parameters, running in parallel (using multiprocessing if desired).</p>
+</div></blockquote>
+</div>
+<div class="section" id="run-the-pipeline">
+<span id="new-manual-pipeline-run"></span><span id="index-2"></span><h3>5. Run the pipeline!<a class="headerlink" href="#run-the-pipeline" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>Key Ruffus Terminology</strong>:</p>
+<p>A <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> is an annotated python function which represents a recipe or stage of your pipeline.</p>
+<p>A <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a> is each time your recipe is applied to a piece of data, i.e. each time <em>Ruffus</em> calls your function.</p>
+<p class="last">Each <strong>task</strong> or pipeline recipe can thus have many <strong>jobs</strong> each of which can work in parallel on different data.</p>
+</div>
+<p>Now we can run the pipeline with the <em>Ruffus</em> function <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>This produces three sets of results in parallel, as you might expect:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.fasta -> a.sam] completed</span>
+<span class="go"> Job = [b.fasta -> b.sam] completed</span>
+<span class="go"> Job = [c.fasta -> c.sam] completed</span>
+<span class="go">Completed Task = map_dna_sequence</span>
+<span class="go"> Job = [a.sam -> a.bam] completed</span>
+<span class="go"> Job = [b.sam -> b.bam] completed</span>
+<span class="go"> Job = [c.sam -> c.bam] completed</span>
+<span class="go">Completed Task = compress_sam_file</span>
+<span class="go"> Job = [a.bam -> a.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [b.bam -> b.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [c.bam -> c.statistics, use_linear_model] completed</span>
+<span class="go">Completed Task = summarise_bam_file</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>To work out which functions to call, <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>
+finds the <strong>last</strong> <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function of your pipeline, then
+works out all the other functions this depends on, working backwards up the chain of
+dependencies automatically.</p>
+<p>We can specify this end point of your pipeline explicitly:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">summarise_bam_file</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>This allows us to only run part of the pipeline, for example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">target_tasks</span> <span class="o">=</span> <span class="p">[</span><span class="n">compress_sam_file</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The <a class="reference internal" href="introduction_code.html#new-manual-introduction-code"><em>example code</em></a> can be copied and pasted into a python
+command shell.</p>
+</div>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 1</strong>: An introduction to basic <em>Ruffus</em> syntax</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#importing-ruffus">Importing <em>Ruffus</em></a></li>
+<li><a class="reference internal" href="#ruffus-decorators"><em>Ruffus</em> decorators</a></li>
+<li><a class="reference internal" href="#your-first-ruffus-pipeline">Your first <em>Ruffus</em> pipeline</a><ul>
+<li><a class="reference internal" href="#write-down-the-file-names">1. Write down the file names</a></li>
+<li><a class="reference internal" href="#write-the-python-functions-for-each-stage">2. Write the python functions for each stage</a></li>
+<li><a class="reference internal" href="#link-the-python-functions-into-a-pipeline">3. Link the python functions into a pipeline</a></li>
+<li><a class="reference internal" href="#transform-syntax">4. @transform syntax</a></li>
+<li><a class="reference internal" href="#run-the-pipeline">5. Run the pipeline!</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="manual_contents.html"
+ title="previous chapter"><strong>Ruffus</strong> Manual: List of Chapters and Example code</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform.html"
+ title="next chapter"><strong>Chapter 2</strong>: Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/introduction.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 2: Transforming data in a pipeline with @transform"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="manual_contents.html" title="Ruffus Manual: List of Chapters and Example code"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="#">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/introduction_code.html b/doc/_build/html/tutorials/new_tutorial/introduction_code.html
new file mode 100644
index 0000000..6a05a71
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/introduction_code.html
@@ -0,0 +1,285 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 1: Python Code for An introduction to basic Ruffus syntax — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 1: Python Code for Transforming data in a pipeline with @transform" href="transform_code.html" />
+ <link rel="prev" title="Appendix 6: @files_re: Deprecated syntax using regular expressions" href="deprecated_files_re.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform_code.html" title="Chapter 1: Python Code for Transforming data in a pipeline with @transform"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="deprecated_files_re.html" title="Appendix 6: @files_re: Deprecated syntax using regular expressions"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-introduction-chapter-num-python-code-for-an-introduction-to-basic-ruffus-syntax">
+<span id="new-manual-introduction-code"></span><h1><strong>Chapter 1</strong>: Python Code for An introduction to basic Ruffus syntax<a class="headerlink" href="#new-manual-introduction-chapter-num-python-code-for-an-introduction-to-basic-ruffus-syntax" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 1</strong>: <a class="reference internal" href="introduction.html#new-manual-introduction"><em>An introduction to basic Ruffus syntax</em></a></li>
+</ul>
+</div>
+<div class="section" id="your-first-ruffus-script">
+<h2>Your first Ruffus script<a class="headerlink" href="#your-first-ruffus-script" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="p">::</span>
+
+<span class="c">#</span>
+<span class="c"># The starting data files would normally exist beforehand!</span>
+<span class="c"># We create some empty files for this example</span>
+<span class="c">#</span>
+<span class="n">starting_files</span> <span class="o">=</span> <span class="p">[</span><span class="s">"a.fasta"</span><span class="p">,</span> <span class="s">"b.fasta"</span><span class="p">,</span> <span class="s">"c.fasta"</span><span class="p">]</span>
+
+<span class="k">for</span> <span class="n">ff</span> <span class="ow">in</span> <span class="n">starting_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">ff</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 1 fasta->sam</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">starting_files</span><span class="p">,</span> <span class="c"># Input = starting files</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".fasta"</span><span class="p">),</span> <span class="c"># suffix = .fasta</span>
+ <span class="s">".sam"</span><span class="p">)</span> <span class="c"># Output suffix = .sam</span>
+<span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 2 sam->bam</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">map_dna_sequence</span><span class="p">,</span> <span class="c"># Input = previous stage</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".sam"</span><span class="p">),</span> <span class="c"># suffix = .sam</span>
+ <span class="s">".bam"</span><span class="p">)</span> <span class="c"># Output suffix = .bam</span>
+<span class="k">def</span> <span class="nf">compress_sam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 3 bam->statistics</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">compress_sam_file</span><span class="p">,</span> <span class="c"># Input = previous stage</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".bam"</span><span class="p">),</span> <span class="c"># suffix = .bam</span>
+ <span class="s">".statistics"</span><span class="p">,</span> <span class="c"># Output suffix = .statistics</span>
+ <span class="s">"use_linear_model"</span><span class="p">)</span> <span class="c"># Extra statistics parameter</span>
+<span class="k">def</span> <span class="nf">summarise_bam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">,</span>
+ <span class="n">extra_stats_parameter</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Sketch of real analysis function</span>
+<span class="sd"> """</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.fasta -> a.sam] completed</span>
+<span class="go"> Job = [b.fasta -> b.sam] completed</span>
+<span class="go"> Job = [c.fasta -> c.sam] completed</span>
+<span class="go">Completed Task = map_dna_sequence</span>
+<span class="go"> Job = [a.sam -> a.bam] completed</span>
+<span class="go"> Job = [b.sam -> b.bam] completed</span>
+<span class="go"> Job = [c.sam -> c.bam] completed</span>
+<span class="go">Completed Task = compress_sam_file</span>
+<span class="go"> Job = [a.bam -> a.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [b.bam -> b.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [c.bam -> c.statistics, use_linear_model] completed</span>
+<span class="go">Completed Task = summarise_bam_file</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 1</strong>: Python Code for An introduction to basic Ruffus syntax</a><ul>
+<li><a class="reference internal" href="#your-first-ruffus-script">Your first Ruffus script</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="deprecated_files_re.html"
+ title="previous chapter"><strong>Appendix 6</strong>: <strong>@files_re</strong>: Deprecated <cite>syntax using regular expressions</cite></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform_code.html"
+ title="next chapter"><strong>Chapter 1</strong>: Python Code for Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/introduction_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform_code.html" title="Chapter 1: Python Code for Transforming data in a pipeline with @transform"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="deprecated_files_re.html" title="Appendix 6: @files_re: Deprecated syntax using regular expressions"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/list_of_ruffus_names.html b/doc/_build/html/tutorials/new_tutorial/list_of_ruffus_names.html
new file mode 100644
index 0000000..8b685d2
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/list_of_ruffus_names.html
@@ -0,0 +1,288 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Appendix 4: Names exported from Ruffus — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 5: @files: Deprecated syntax" href="deprecated_files.html" />
+ <link rel="prev" title="Appendix 3: Exceptions thrown inside pipelines" href="exceptions.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="deprecated_files.html" title="Appendix 5: @files: Deprecated syntax"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="exceptions.html" title="Appendix 3: Exceptions thrown inside pipelines"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-ruffus-names-chapter-num-names-exported-from-ruffus">
+<span id="new-manual-ruffus-names"></span><span id="index-0"></span><h1><strong>Appendix 4</strong>: Names exported from Ruffus<a class="headerlink" href="#new-manual-ruffus-names-chapter-num-names-exported-from-ruffus" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+</ul>
+</div>
+<div class="section" id="ruffus-names">
+<h2>Ruffus Names<a class="headerlink" href="#ruffus-names" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This is a list of all the names <em>Ruffus</em> makes available:</p>
+<table border="1" class="docutils">
+<colgroup>
+<col width="28%" />
+<col width="72%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Category</th>
+<th class="head">Manual</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><strong>Pipeline functions</strong></td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout()</em></a> (<a class="reference internal" href="pipeline_printout.html#new-manual-pipeline-printout"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout()</em></a> (<a class="reference internal" href="pipeline_printout_graph.html#new-manual-pipeline-printout-graph"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_printout()</em></a> (<a class="reference internal" href="introduction.html#new-manual-pipeline-run"><em>Manual</em></a>)</div>
+</div>
+</td>
+</tr>
+<tr class="row-odd"><td><strong>Decorators</strong></td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="../../decorators/active_if.html#decorators-active-if"><em>@active_if</em></a> (<a class="reference internal" href="active_if.html#new-manual-active-if"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/check_if_uptodate.html#decorators-check-if-uptodate"><em>@check_if_uptodate</em></a> (<a class="reference internal" href="check_if_uptodate.html#new-manual-check-if-uptodate"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a> (<a class="reference internal" href="subdivide_collate.html#new-manual-collate"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/files.html#decorators-files"><em>@files</em></a> (<a class="reference internal" href="deprecated_files.html#new-manual-deprecated-files"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a> (<a class="reference internal" href="transform_in_parallel.html#new-manual-follows"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a> (<a class="reference internal" href="multiprocessing.html#new-manual-jobs-limit"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> (<a class="reference internal" href="merge.html#new-manual-merge"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>@mkdir</em></a> (<a class="reference internal" href="mkdir.html#new-manual-mkdir"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate</em></a> (<a class="reference internal" href="originate.html#new-manual-originate"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/parallel.html#decorators-parallel"><em>@parallel</em></a> (<a class="reference internal" href="parallel.html#new-manual-deprecated-parallel"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> (<a class="reference internal" href="posttask.html#new-manual-posttask"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> (<a class="reference internal" href="split.html#new-manual-split"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> (<a class="reference internal" href="subdivide_collate.html#new-manual-subdivide"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> (<a class="reference internal" href="transform.html#new-manual-transform"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/files_re.html#decorators-files-re"><em>@files_re</em></a> (<a class="reference internal" href="deprecated_files_re.html#new-manual-deprecated-files-re"><em>Manual</em></a>)</div>
+</div>
+</td>
+</tr>
+<tr class="row-even"><td><strong>Loggers</strong></td>
+<td><div class="first last line-block">
+<div class="line">stderr_logger</div>
+<div class="line">black_hole_logger</div>
+</div>
+</td>
+</tr>
+<tr class="row-odd"><td><strong>Parameter disambiguating Indicators</strong></td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix</em></a> (<a class="reference internal" href="output_file_names.html#new-manual-suffix"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex</em></a> (<a class="reference internal" href="output_file_names.html#new-manual-regex"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter</em></a> (<a class="reference internal" href="output_file_names.html#new-manual-formatter"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-inputs"><em>inputs</em></a> (<a class="reference internal" href="inputs.html#new-manual-inputs"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-add-inputs"><em>inputs</em></a> (<a class="reference internal" href="inputs.html#new-manual-inputs"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a> (<a class="reference internal" href="posttask.html#new-manual-posttask-touch-file"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-combine"><em>combine</em></a></div>
+<div class="line"><a class="reference internal" href="../../decorators/follows.html#decorators-follows-mkdir"><em>mkdir</em></a> (<a class="reference internal" href="transform_in_parallel.html#new-manual-follows-mkdir"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-output-from"><em>output_from</em></a> (<a class="reference internal" href="transform_in_parallel.html#new-manual-output-from"><em>Manual</em></a>)</div>
+</div>
+</td>
+</tr>
+<tr class="row-even"><td><strong>Decorators in ruffus.combinatorics</strong></td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="../../decorators/combinations.html#decorators-combinations"><em>@combinations</em></a> (<a class="reference internal" href="combinatorics.html#new-manual-combinations"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/combinations_with_replacement.html#decorators-combinations-with-replacement"><em>@combinations_with_replacement</em></a> (<a class="reference internal" href="combinatorics.html#new-manual-combinations-with-replacement"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/permutations.html#decorators-permutations"><em>@permutations</em></a> (<a class="reference internal" href="combinatorics.html#new-manual-permutations"><em>Manual</em></a>)</div>
+<div class="line"><a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a> (<a class="reference internal" href="combinatorics.html#new-manual-product"><em>Manual</em></a>)</div>
+</div>
+</td>
+</tr>
+<tr class="row-odd"><td><strong>Decorators in ruffus.cmdline</strong></td>
+<td><div class="first last line-block">
+<div class="line"><a class="reference internal" href="command_line.html#new-manual-cmdline-get-argparse"><em>get_argparse</em></a></div>
+<div class="line"><a class="reference internal" href="command_line.html#new-manual-cmdline-setup-logging"><em>setup_logging</em></a></div>
+<div class="line"><a class="reference internal" href="command_line.html#new-manual-cmdline-run"><em>run</em></a></div>
+<div class="line"><a class="reference internal" href="command_line.html#new-manual-cmdline-message"><em>MESSAGE</em></a></div>
+</div>
+</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Appendix 4</strong>: Names exported from Ruffus</a><ul>
+<li><a class="reference internal" href="#ruffus-names">Ruffus Names</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="exceptions.html"
+ title="previous chapter"><strong>Appendix 3</strong>: Exceptions thrown inside pipelines</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="deprecated_files.html"
+ title="next chapter"><strong>Appendix 5</strong>: <strong>@files</strong>: Deprecated syntax</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/list_of_ruffus_names.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="deprecated_files.html" title="Appendix 5: @files: Deprecated syntax"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="exceptions.html" title="Appendix 3: Exceptions thrown inside pipelines"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/logging.html b/doc/_build/html/tutorials/new_tutorial/logging.html
new file mode 100644
index 0000000..313e6ec
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/logging.html
@@ -0,0 +1,383 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 15: Logging progress through a pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 16: @subdivide tasks to run efficiently and regroup with @collate" href="subdivide_collate.html" />
+ <link rel="prev" title="Chapter 14: Multiprocessing, drmaa and Computation Clusters" href="multiprocessing.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="subdivide_collate.html" title="Chapter 16: @subdivide tasks to run efficiently and regroup with @collate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="multiprocessing.html" title="Chapter 14: Multiprocessing, drmaa and Computation Clusters"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-logging-chapter-num-logging-progress-through-a-pipeline">
+<span id="new-manual-logging"></span><span id="index-0"></span><h1><strong>Chapter 15</strong>: Logging progress through a pipeline<a class="headerlink" href="#new-manual-logging-chapter-num-logging-progress-through-a-pipeline" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Remember to look at the <a class="reference internal" href="logging_code.html#new-manual-logging-code"><em>example code</em></a></p>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>There are two parts to logging with <strong>Ruffus</strong>:</p>
+<ul>
+<li><p class="first">Logging progress through the pipeline</p>
+<blockquote>
+<div><p>This produces the sort of output displayed in this manual:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_io_task</span><span class="p">])</span>
+<span class="go">Task = parallel_io_task</span>
+<span class="go"> Job = ["a.1" -> "a.2", "A file"] completed</span>
+<span class="go"> Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date</span>
+<span class="go">Completed Task = parallel_io_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+<li><p class="first">Logging your own messages from within your pipelined functions.</p>
+<blockquote>
+<div><p>Because <strong>Ruffus</strong> may run each task function in separate process on a separate
+CPU (multiprocessing), some attention has to be paid to how to send and
+synchronise your log messages across process boundaries.</p>
+</div></blockquote>
+</li>
+</ul>
+<p>We shall deal with these in turn.</p>
+</div></blockquote>
+</div>
+<div class="section" id="logging-task-job-completion">
+<span id="new-manual-logging-pipeline"></span><h2>Logging task/job completion<a class="headerlink" href="#logging-task-job-completion" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>By default, <em>Ruffus</em> logs each task and each job as it is completed to
+<a class="reference external" href="http://docs.python.org/2/library/sys.html#sys.stderr">sys.stderr</a>.</p>
+<p>By default, Ruffus logs to <tt class="docutils literal"><span class="pre">STDERR</span></tt>: <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(logger = stderr_logger)</em></a>.</p>
+<p>If you want to turn off all tracking messages as the pipeline runs, apart from setting <tt class="docutils literal"><span class="pre">verbose</span> <span class="pre">=</span> <span class="pre">0</span></tt>, you
+can also use the aptly named Ruffus <tt class="docutils literal"><span class="pre">black_hole_logger</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span><span class="n">logger</span> <span class="o">=</span> <span class="n">black_hole_logger</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="controlling-logging-verbosity">
+<span id="index-1"></span><h3>Controlling logging verbosity<a class="headerlink" href="#controlling-logging-verbosity" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> currently has five levels of verbosity, set by the optional <tt class="docutils literal"><span class="pre">verbose</span></tt>
+parameter which defaults to 1:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>verbose = 0: nothing
+verbose = 1: logs completed jobs/tasks;
+verbose = 2: logs up to date jobs in incomplete tasks
+verbose = 3: logs reason for running job
+verbose = 4: logs messages useful only for debugging ruffus pipeline code</pre>
+</div>
+<p><tt class="docutils literal"><span class="pre">verbose</span></tt> > <tt class="docutils literal"><span class="pre">5</span></tt> are intended for debugging <strong>Ruffus</strong> by the developers and the details
+are liable to change from release to release</p>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="use-ruffus-cmdline">
+<span id="index-2"></span><h2>Use <a class="reference internal" href="command_line.html#new-manual-cmdline"><em>ruffus.cmdline</em></a><a class="headerlink" href="#use-ruffus-cmdline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>As always, it is easiest to use <a class="reference internal" href="command_line.html#new-manual-cmdline"><em>ruffus.cmdline</em></a>.</p>
+<p>Set your script to</p>
+<blockquote>
+<div><ul class="simple">
+<li>write messages to <tt class="docutils literal"><span class="pre">STDERR</span></tt> with the <tt class="docutils literal"><span class="pre">--verbose</span></tt> option and</li>
+<li>to a log file with the <tt class="docutils literal"><span class="pre">--log_file</span></tt> option.</li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre> <span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"> <span class="c"># Python logger which can be synchronised across concurrent Ruffus tasks</span>
+</span> <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="n">__name__</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span><spa [...]
+
+ <span class="nd">@transform</span><span class="p">(</span> <span class="p">[</span><span class="s">"job1.input"</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".input"</span><span class="p">),</span> <span class="s">".output1"</span><span class="p">),</span>
+ <span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+ <span class="n">pipeline_run</span><span class="p">(</span><span class="n">logger</span><span class="o">=</span><span class="n">logger</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="customising-logging">
+<span id="index-3"></span><h2>Customising logging<a class="headerlink" href="#customising-logging" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You can also specify exactly how logging works by providing a <a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> object
+to <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> .
+This log object should have <tt class="docutils literal"><span class="pre">debug()</span></tt> and <tt class="docutils literal"><span class="pre">info()</span></tt> methods.</p>
+<p>Instead of writing your own, it is usually more convenient to use the python
+<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a>
+module which provides logging classes with rich functionality.</p>
+<p>The <a class="reference internal" href="logging_code.html#new-manual-logging-code"><em>example code</em></a> sets up a logger to a rotating set of files</p>
+</div></blockquote>
+</div>
+<div class="section" id="log-your-own-messages">
+<span id="new-manual-logging-per-job"></span><span id="index-4"></span><h2>Log your own messages<a class="headerlink" href="#log-your-own-messages" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You need to take a little care when logging your custom messages <em>within</em> your pipeline.</p>
+<ul class="simple">
+<li>If your Ruffus pipeline may run in parallel, make sure that logging is synchronised.</li>
+<li>If your Ruffus pipeline may run across separate processes, send your logging object across process boundaries.</li>
+</ul>
+<p><a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> objects can not be
+<a class="reference external" href="http://docs.python.org/library/pickle.html">pickled</a> and shared naively across
+processes. Instead, we need to create proxies which forward the logging to a single
+shared log.</p>
+<p>The <a class="reference internal" href="../../proxy_logger.html#proxy-logger"><em>ruffus.proxy_logger</em></a> module provides an easy way to share
+<a class="reference external" href="http://docs.python.org/library/logging.html">logging</a> objects among
+jobs. This requires just two simple steps:</p>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<ul class="last simple">
+<li>This is a good template for sharing <a class="reference external" href="http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled">non-picklable objects</a>
+across processes.</li>
+</ul>
+</div>
+<div class="section" id="set-up-logging">
+<span id="new-manual-sharing-proxy-object"></span><h3>1. Set up logging<a class="headerlink" href="#set-up-logging" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Things are easiest if you are using <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt>:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># standard python logger which can be synchronised across concurrent Ruffus tasks</span>
+<span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="n">__name__</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class= [...]
+</pre></div>
+</div>
+<p>Otherwise, manually:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus.proxy_logger</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="p">(</span><span class="n">logger</span><span class="p">,</span>
+ <span class="n">logging_mutex</span><span class="p">)</span> <span class="o">=</span> <span class="n">make_shared_logger_and_proxy</span> <span class="p">(</span><span class="n">setup_std_shared_logger</span><span class="p">,</span>
+ <span class="s">"my_logger"</span><span class="p">,</span>
+ <span class="p">{</span><span class="s">"file_name"</span> <span class="p">:</span><span class="s">"/my/lg.log"</span><span class="p">})</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="share-the-proxy">
+<h3>2. Share the proxy<a class="headerlink" href="#share-the-proxy" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Now, pass:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">logger</span></tt> (which forwards logging calls across jobs) and</li>
+<li><tt class="docutils literal"><span class="pre">logging_mutex</span></tt> (which prevents different jobs which are logging simultaneously
+from being jumbled up)</li>
+</ul>
+</div></blockquote>
+<p>to each job:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span> <span class="n">initial_file</span><span class="p">,</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".input"</span><span class="p">),</span>
+ <span class="s">".output1"</span><span class="p">,</span>
+<span class="hll"> <span class="n">logger</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">),</span> <span class="c"># pass log and synchronisation as parameters</span>
+</span><span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span>
+<span class="hll"> <span class="n">logger</span><span class="p">,</span> <span class="n">logging_mutex</span><span class="p">):</span> <span class="c"># pass log and synchronisation as parameters</span>
+</span> <span class="k">pass</span>
+
+<span class="hll"> <span class="c"># synchronise logging</span>
+</span> <span class="k">with</span> <span class="n">logging_mutex</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s">"Here we go logging..."</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 15</strong>: Logging progress through a pipeline</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#logging-task-job-completion">Logging task/job completion</a><ul>
+<li><a class="reference internal" href="#controlling-logging-verbosity">Controlling logging verbosity</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#use-ruffus-cmdline">Use <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt></a></li>
+<li><a class="reference internal" href="#customising-logging">Customising logging</a></li>
+<li><a class="reference internal" href="#log-your-own-messages">Log your own messages</a><ul>
+<li><a class="reference internal" href="#set-up-logging">1. Set up logging</a></li>
+<li><a class="reference internal" href="#share-the-proxy">2. Share the proxy</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="multiprocessing.html"
+ title="previous chapter"><strong>Chapter 14</strong>: Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="subdivide_collate.html"
+ title="next chapter"><strong>Chapter 16</strong>: <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/logging.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="subdivide_collate.html" title="Chapter 16: @subdivide tasks to run efficiently and regroup with @collate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="multiprocessing.html" title="Chapter 14: Multiprocessing, drmaa and Computation Clusters"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/logging_code.html b/doc/_build/html/tutorials/new_tutorial/logging_code.html
new file mode 100644
index 0000000..1e483ed
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/logging_code.html
@@ -0,0 +1,243 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 15: Python Code for Logging progress through a pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate" href="subdivide_collate_code.html" />
+ <link rel="prev" title="Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters" href="multiprocessing_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="subdivide_collate_code.html" title="Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="multiprocessing_code.html" title="Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-logging-chapter-num-python-code-for-logging-progress-through-a-pipeline">
+<span id="new-manual-logging-code"></span><h1><strong>Chapter 15</strong>: Python Code for Logging progress through a pipeline<a class="headerlink" href="#new-manual-logging-chapter-num-python-code-for-logging-progress-through-a-pipeline" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li>Back to <strong>Chapter 15</strong>: <a class="reference internal" href="logging.html#new-manual-logging"><em>Logging progress through a pipeline</em></a></li>
+</ul>
+</div>
+<div class="section" id="rotating-set-of-file-logs">
+<h2>Rotating set of file logs<a class="headerlink" href="#rotating-set-of-file-logs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">logging.handlers</span>
+
+<span class="n">LOG_FILENAME</span> <span class="o">=</span> <span class="s">'/tmp/ruffus.log'</span>
+
+<span class="c"># Set up a specific logger with our desired output level</span>
+<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s">'My_Ruffus_logger'</span><span class="p">)</span>
+<span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">)</span>
+
+<span class="hll"><span class="c"># Rotate a set of 5 log files every 2kb</span>
+</span><span class="n">handler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">handlers</span><span class="o">.</span><span class="n">RotatingFileHandler</span><span class="p">(</span>
+ <span class="n">LOG_FILENAME</span><span class="p">,</span> <span class="n">maxBytes</span><span class="o">=</span><span class="mi">2000</span><span class="p">,</span> <span class="n">backupCount</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
+
+<span class="hll"><span class="c"># Add the log message handler to the logger</span>
+</span><span class="n">logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">handler</span><span class="p">)</span>
+
+<span class="hll"><span class="c"># Ruffus pipeline</span>
+</span><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># Start with some initial data file of yours...</span>
+<span class="n">initial_file</span> <span class="o">=</span> <span class="s">"job1.input"</span>
+<span class="nb">open</span><span class="p">(</span><span class="n">initial_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span> <span class="n">initial_file</span><span class="p">,</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".input"</span><span class="p">),</span>
+ <span class="s">".output1"</span><span class="p">),</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="s">"Some detailed description"</span>
+ <span class="k">pass</span>
+
+<span class="hll"><span class="c"># use our custom logging object</span>
+</span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">logger</span><span class="o">=</span><span class="n">logger</span><span class="p">)</span>
+<span class="k">print</span> <span class="nb">open</span><span class="p">(</span><span class="s">"/tmp/ruffus.log"</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 15</strong>: Python Code for Logging progress through a pipeline</a><ul>
+<li><a class="reference internal" href="#rotating-set-of-file-logs">Rotating set of file logs</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="multiprocessing_code.html"
+ title="previous chapter"><strong>Chapter 14</strong>: Python Code for Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="subdivide_collate_code.html"
+ title="next chapter"><strong>Chapter 16</strong>: Python Code for <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/logging_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="subdivide_collate_code.html" title="Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="multiprocessing_code.html" title="Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/manual_contents.html b/doc/_build/html/tutorials/new_tutorial/manual_contents.html
new file mode 100644
index 0000000..56d737b
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/manual_contents.html
@@ -0,0 +1,244 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Ruffus Manual: List of Chapters and Example code — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 1: An introduction to basic Ruffus syntax" href="introduction.html" />
+ <link rel="prev" title="Installation" href="../../installation.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="introduction.html" title="Chapter 1: An introduction to basic Ruffus syntax"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../../installation.html" title="Installation"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="#">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="ruffus-manual-list-of-chapters-and-example-code">
+<span id="new-manual-table-of-contents"></span><h1><strong>Ruffus</strong> Manual: List of Chapters and Example code<a class="headerlink" href="#ruffus-manual-list-of-chapters-and-example-code" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><p>Download as <a class="reference download internal" href="../../_downloads/ruffus.pdf"><tt class="xref download docutils literal"><span class="pre">pdf</span></tt></a>.</p>
+<ul class="simple">
+<li><strong>Chapter 1</strong>: <a class="reference internal" href="introduction.html#new-manual-introduction"><em>An introduction to basic Ruffus syntax</em></a></li>
+<li><strong>Chapter 2</strong>: <a class="reference internal" href="transform.html#new-manual-transform"><em>Transforming data in a pipeline with @transform</em></a></li>
+<li><strong>Chapter 3</strong>: <a class="reference internal" href="transform_in_parallel.html#new-manual-transform-in-parallel"><em>More on @transform-ing data</em></a></li>
+<li><strong>Chapter 4</strong>: <a class="reference internal" href="originate.html#new-manual-originate"><em>Creating files with @originate</em></a></li>
+<li><strong>Chapter 5</strong>: <a class="reference internal" href="pipeline_printout.html#new-manual-pipeline-printout"><em>Understanding how your pipeline works with pipeline_printout()</em></a></li>
+<li><strong>Chapter 6</strong>: <a class="reference internal" href="command_line.html#new-manual-cmdline"><em>Running Ruffus from the command line with ruffus.cmdline</em></a></li>
+<li><strong>Chapter 7</strong>: <a class="reference internal" href="pipeline_printout_graph.html#new-manual-pipeline-printout-graph"><em>Displaying the pipeline visually with pipeline_printout_graph()</em></a></li>
+<li><strong>Chapter 8</strong>: <a class="reference internal" href="output_file_names.html#new-manual-output-file-names"><em>Specifying output file names with formatter() and regex()</em></a></li>
+<li><strong>Chapter 9</strong>: <a class="reference internal" href="mkdir.html#new-manual-mkdir"><em>Preparing directories for output with @mkdir</em></a></li>
+<li><strong>Chapter 10</strong>: <a class="reference internal" href="checkpointing.html#new-manual-checkpointing"><em>Checkpointing: Interrupted Pipelines and Exceptions</em></a></li>
+<li><strong>Chapter 11</strong>: <a class="reference internal" href="decorators_compendium.html#new-manual-decorators-compendium"><em>Pipeline topologies and a compendium of Ruffus decorators</em></a></li>
+<li><strong>Chapter 12</strong>: <a class="reference internal" href="split.html#new-manual-split"><em>Splitting up large tasks / files with @split</em></a></li>
+<li><strong>Chapter 13</strong>: <a class="reference internal" href="merge.html#new-manual-merge"><em>@merge multiple input into a single result</em></a></li>
+<li><strong>Chapter 15</strong>: <a class="reference internal" href="logging.html#new-manual-logging"><em>Logging progress through a pipeline</em></a></li>
+<li><strong>Chapter 14</strong>: <a class="reference internal" href="multiprocessing.html#new-manual-multiprocessing"><em>Multiprocessing, drmaa and Computation Clusters</em></a></li>
+<li><strong>Chapter 16</strong>: <a class="reference internal" href="subdivide_collate.html#new-manual-subdivide-collate"><em>@subdivide tasks to run efficiently and regroup with @collate</em></a></li>
+<li><strong>Chapter 17</strong>: <a class="reference internal" href="combinatorics.html#new-manual-combinatorics"><em>@combinations, @permutations and all versus all @product</em></a></li>
+<li><strong>Chapter 18</strong>: <a class="reference internal" href="active_if.html#new-manual-active-if"><em>Turning parts of the pipeline on and off at runtime with @active_if</em></a></li>
+<li><strong>Chapter 20</strong>: <a class="reference internal" href="inputs.html#new-manual-inputs"><em>Manipulating task inputs via string substitution with inputs() and add_inputs()</em></a></li>
+<li><strong>Chapter 19</strong>: <a class="reference internal" href="posttask.html#new-manual-posttask"><em>Signal the completion of each stage of our pipeline with @posttask</em></a></li>
+<li><strong>Chapter 21</strong>: <a class="reference internal" href="onthefly.html#new-manual-on-the-fly"><em>Esoteric: Generating parameters on the fly with @files</em></a></li>
+<li><strong>Chapter 22</strong>: <a class="reference internal" href="parallel.html#new-manual-deprecated-parallel"><em>Esoteric: Running jobs in parallel without files using @parallel</em></a></li>
+<li><strong>Chapter 23</strong>: <a class="reference internal" href="check_if_uptodate.html#new-manual-check-if-uptodate"><em>Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate</em></a></li>
+<li><strong>Appendix 1</strong> <a class="reference internal" href="flowchart_colours.html#new-manual-flowchart-colours"><em>Flow Chart Colours with pipeline_printout_graph</em></a></li>
+<li><strong>Appendix 2</strong> <a class="reference internal" href="dependencies.html#new-manual-dependencies"><em>Under the hood: How dependency works</em></a></li>
+<li><strong>Appendix 3</strong> <a class="reference internal" href="exceptions.html#new-manual-exceptions"><em>Exceptions thrown inside pipelines</em></a></li>
+<li><strong>Appendix 4</strong> <a class="reference internal" href="list_of_ruffus_names.html#new-manual-ruffus-names"><em>Names (keywords) exported from Ruffus</em></a></li>
+<li><strong>Appendix 5</strong>: <a class="reference internal" href="deprecated_files.html#new-manual-deprecated-files"><em>Legacy and deprecated syntax @files</em></a></li>
+<li><strong>Appendix 6</strong>: <a class="reference internal" href="deprecated_files_re.html#new-manual-deprecated-files-re"><em>Legacy and deprecated syntax @files_re</em></a></li>
+</ul>
+</div></blockquote>
+<p><strong>Ruffus</strong> Manual: List of Example Code for Each Chapter:</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="introduction_code.html#new-manual-introduction-code"><em>Chapter 1: Python Code for An introduction to basic Ruffus syntax</em></a></li>
+<li><a class="reference internal" href="transform_code.html#new-manual-transform-code"><em>Chapter 1: Python Code for Transforming data in a pipeline with @transform</em></a></li>
+<li><a class="reference internal" href="transform_in_parallel_code.html#new-manual-transform-in-parallel-code"><em>Chapter 3: Python Code for More on @transform-ing data</em></a></li>
+<li><a class="reference internal" href="originate_code.html#new-manual-originate-code"><em>Chapter 4: Python Code for Creating files with @originate</em></a></li>
+<li><a class="reference internal" href="pipeline_printout_code.html#new-manual-pipeline-printout-code"><em>Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)</em></a></li>
+<li><a class="reference internal" href="pipeline_printout_graph_code.html#new-manual-pipeline-printout-graph-code"><em>Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)</em></a></li>
+<li><a class="reference internal" href="output_file_names_code.html#new-manual-output-file-names-code"><em>Chapter 8: Python Code for Specifying output file names with formatter() and regex()</em></a></li>
+<li><a class="reference internal" href="mkdir_code.html#new-manual-mkdir-code"><em>Chapter 9: Python Code for Preparing directories for output with @mkdir()</em></a></li>
+<li><a class="reference internal" href="checkpointing_code.html#new-manual-checkpointing-code"><em>Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions</em></a></li>
+<li><a class="reference internal" href="split_code.html#new-manual-split-code"><em>Chapter 12: Python Code for Splitting up large tasks / files with @split</em></a></li>
+<li><a class="reference internal" href="merge_code.html#new-manual-merge-code"><em>Chapter 13: Python Code for @merge multiple input into a single result</em></a></li>
+<li><a class="reference internal" href="multiprocessing_code.html#new-manual-multiprocessing-code"><em>Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters</em></a></li>
+<li><a class="reference internal" href="logging_code.html#new-manual-logging-code"><em>Chapter 15: Python Code for Logging progress through a pipeline</em></a></li>
+<li><a class="reference internal" href="subdivide_collate_code.html#new-manual-subdivide-collate-code"><em>Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate</em></a></li>
+<li><a class="reference internal" href="combinatorics_code.html#new-manual-combinatorics-code"><em>Chapter 17: Python Code for @combinations, @permutations and all versus all @product</em></a></li>
+<li><a class="reference internal" href="inputs_code.html#new-manual-inputs-code"><em>Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()</em></a></li>
+<li><a class="reference internal" href="onthefly_code.html#new-manual-on-the-fly-code"><em>Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../../installation.html"
+ title="previous chapter">Installation</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="introduction.html"
+ title="next chapter"><strong>Chapter 1</strong>: An introduction to basic <em>Ruffus</em> syntax</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/manual_contents.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="introduction.html" title="Chapter 1: An introduction to basic Ruffus syntax"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../../installation.html" title="Installation"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="#">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/merge.html b/doc/_build/html/tutorials/new_tutorial/merge.html
new file mode 100644
index 0000000..ce7ab4d
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/merge.html
@@ -0,0 +1,298 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 13: @merge multiple input into a single result — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 14: Multiprocessing, drmaa and Computation Clusters" href="multiprocessing.html" />
+ <link rel="prev" title="Chapter 12: Splitting up large tasks / files with @split" href="split.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="multiprocessing.html" title="Chapter 14: Multiprocessing, drmaa and Computation Clusters"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 12: Splitting up large tasks / files with @split"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-merge-chapter-num-merge-multiple-input-into-a-single-result">
+<span id="new-manual-merge"></span><span id="index-0"></span><h1><strong>Chapter 13</strong>: <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result<a class="headerlink" href="#new-manual-merge-chapter-num-merge-multiple-input-into-a-single-result" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> syntax</li>
+<li><a class="reference internal" href="merge_code.html#new-manual-merge-code"><em>Example code for this chapter</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview-of-merge">
+<h2>Overview of <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a><a class="headerlink" href="#overview-of-merge" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <a class="reference internal" href="split.html#new-manual-split"><em>previous chapter</em></a> explained how <strong>Ruffus</strong> allows large
+jobs to be split into small pieces with <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> and analysed
+in parallel using for example, our old friend <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a>.</p>
+<p>Having done this, our next task is to recombine the fragments into a seamless whole.</p>
+<p>This is the role of the <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> decorator.</p>
+</div></blockquote>
+</div>
+<div class="section" id="merge-is-a-many-to-one-operator">
+<h2><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> is a many to one operator<a class="headerlink" href="#merge-is-a-many-to-one-operator" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> tasks multiple <em>inputs</em> and produces a single <em>output</em>, <strong>Ruffus</strong>
+is again agnostic as to the sort of data contained within this single <em>output</em>. It can be a single
+(string) file name, an arbitrary complicated nested structure with numbers, objects etc.
+Or even a list.</p>
+<p>The main thing is that downstream tasks will interpret this output as a single entity leading to a single
+job.</p>
+<p><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> and <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> are, in other words, about network topology.</p>
+<p>Because of this <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> is also very useful for summarising the progress
+in our pipeline. At key selected points, we can gather data from the multitude of data or disparate <em>inputs</em>
+and <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> them to a single set of summaries.</p>
+</div></blockquote>
+</div>
+<div class="section" id="example-combining-partial-solutions-calculating-variances">
+<h2>Example: Combining partial solutions: Calculating variances<a class="headerlink" href="#example-combining-partial-solutions-calculating-variances" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <a class="reference internal" href="split.html#new-manual-split"><em>previous chapter</em></a> we had almost completed all the pieces of our flowchart:</p>
+<a class="reference internal image-reference" href="../../_images/manual_split_merge_example.jpg"><img alt="../../_images/manual_split_merge_example.jpg" src="../../_images/manual_split_merge_example.jpg" style="width: 251.1px; height: 197.4px;" /></a>
+<p>What remains is to take the partial solutions from the different <tt class="docutils literal"><span class="pre">.sums</span></tt> files
+and turn these into the variance as follows:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">sum_squared</span> <span class="o">-</span> <span class="nb">sum</span> <span class="o">*</span> <span class="nb">sum</span> <span class="o">/</span> <span class="n">N</span><span class="p">)</span><span class="o">/</span><span class="n">N</span>
+</pre></div>
+</div>
+<p>where <tt class="docutils literal"><span class="pre">N</span></tt> is the number of values</p>
+<p>See the <a class="reference external" href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">wikipedia</a> entry for a discussion of
+why this is a very naive approach.</p>
+</div></blockquote>
+<p>To do this, all we have to do is iterate through all the values in <tt class="docutils literal"><span class="pre">*.sums</span></tt>,
+add up the <tt class="docutils literal"><span class="pre">sums</span></tt> and <tt class="docutils literal"><span class="pre">sum_squared</span></tt>, and apply the above (naive) formula.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="hll"><span class="c"># @merge files together</span>
+</span><span class="c">#</span>
+<span class="nd">@merge</span><span class="p">(</span><span class="n">sum_of_squares</span><span class="p">,</span> <span class="s">"variance.result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">calculate_variance</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Calculate variance naively</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># initialise variables</span>
+ <span class="c">#</span>
+ <span class="n">all_sum_squared</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_sum</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_cnt_values</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="c">#</span>
+ <span class="c"># added up all the sum_squared, and sum and cnt_values from all the chunks</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">input_file_name</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span><span class="p">,</span> <span class="n">cnt_values</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">())</span>
+ <span class="n">all_sum_squared</span> <span class="o">+=</span> <span class="n">sum_squared</span>
+ <span class="n">all_sum</span> <span class="o">+=</span> <span class="nb">sum</span>
+ <span class="n">all_cnt_values</span> <span class="o">+=</span> <span class="n">cnt_values</span>
+ <span class="n">all_mean</span> <span class="o">=</span> <span class="n">all_sum</span> <span class="o">/</span> <span class="n">all_cnt_values</span>
+ <span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">all_sum_squared</span> <span class="o">-</span> <span class="n">all_sum</span> <span class="o">*</span> <span class="n">all_mean</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">all_cnt_values</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># print output</span>
+ <span class="c">#</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="n">variance</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This results in the following equivalent function call:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">calculate_variance</span> <span class="p">([</span><span class="s">"1.sums"</span><span class="p">,</span> <span class="s">"2.sums"</span><span class="p">,</span> <span class="s">"3.sums"</span><span class="p">,</span>
+<span class="hll"> <span class="s">"4.sums"</span><span class="p">,</span> <span class="s">"5.sums"</span><span class="p">,</span> <span class="s">"6.sums"</span><span class="p">,</span>
+</span> <span class="s">"7.sums"</span><span class="p">,</span> <span class="s">"8.sums"</span><span class="p">,</span> <span class="s">"9.sums, "</span><span class="mf">10.</span><span class="n">sums</span><span class="s">"], "</span><span class="n">variance</span><span class="o">.</span><span class="n">result</span><span class="s">")</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>and the following display:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] -> variance.result] completed</span>
+<span class="go">Completed Task = calculate_variance</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The final result is in <tt class="docutils literal"><span class="pre">variance.result</span></tt></p>
+<p>Have a look at the <a class="reference internal" href="merge_code.html#new-manual-merge-code"><em>complete example code for this chapter</em></a>.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 13</strong>: <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a><ul>
+<li><a class="reference internal" href="#overview-of-merge">Overview of <tt class="docutils literal"><span class="pre">@merge</span></tt></a></li>
+<li><a class="reference internal" href="#merge-is-a-many-to-one-operator"><tt class="docutils literal"><span class="pre">@merge</span></tt> is a many to one operator</a></li>
+<li><a class="reference internal" href="#example-combining-partial-solutions-calculating-variances">Example: Combining partial solutions: Calculating variances</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="split.html"
+ title="previous chapter"><strong>Chapter 12</strong>: Splitting up large tasks / files with <strong>@split</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="multiprocessing.html"
+ title="next chapter"><strong>Chapter 14</strong>: Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/merge.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="multiprocessing.html" title="Chapter 14: Multiprocessing, drmaa and Computation Clusters"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="split.html" title="Chapter 12: Splitting up large tasks / files with @split"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/merge_code.html b/doc/_build/html/tutorials/new_tutorial/merge_code.html
new file mode 100644
index 0000000..08404a2
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/merge_code.html
@@ -0,0 +1,335 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 13: Python Code for @merge multiple input into a single result — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters" href="multiprocessing_code.html" />
+ <link rel="prev" title="Chapter 12: Python Code for Splitting up large tasks / files with @split" href="split_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="multiprocessing_code.html" title="Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="split_code.html" title="Chapter 12: Python Code for Splitting up large tasks / files with @split"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-merge-chapter-num-python-code-for-merge-multiple-input-into-a-single-result">
+<span id="new-manual-merge-code"></span><h1><strong>Chapter 13</strong>: Python Code for <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result<a class="headerlink" href="#new-manual-merge-chapter-num-python-code-for-merge-multiple-input-into-a-single-result" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 13</strong>: <a class="reference internal" href="merge.html#new-manual-merge"><em>Splitting up large tasks / files with @merge</em></a></li>
+</ul>
+</div>
+<div class="section" id="splitting-large-jobs">
+<h2>Splitting large jobs<a class="headerlink" href="#splitting-large-jobs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="n">NUMBER_OF_RANDOMS</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
+
+
+<span class="kn">import</span> <span class="nn">random</span><span class="o">,</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">glob</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Create random numbers</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_random_numbers</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%g</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># split initial file</span>
+<span class="c">#</span>
+<span class="nd">@split</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_problem</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> splits random numbers file into xxx files of chunk_size each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># clean up any files from previous runs</span>
+ <span class="c">#</span>
+ <span class="c">#for ff in glob.glob("*.chunks"):</span>
+ <span class="k">for</span> <span class="n">ff</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">ff</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># create new file every chunk_size lines and</span>
+ <span class="c"># copy each line into current file</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">cnt_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">input_file_name</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="n">CHUNK_SIZE</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.chunks"</span> <span class="o">%</span> <span class="n">cnt_files</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk file</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">split_problem</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">]</span>
+ <span class="n">cnt_values</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">):</span>
+ <span class="n">cnt_values</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">())</span>
+ <span class="n">sum_squared</span> <span class="o">+=</span> <span class="n">val</span> <span class="o">*</span> <span class="n">val</span>
+ <span class="nb">sum</span> <span class="o">+=</span> <span class="n">val</span>
+ <span class="n">output</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="si">%s</span><span class="se">\n</span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">sum_squared</span><span class="p">),</span> <span c [...]
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate variance from sums</span>
+<span class="c">#</span>
+<span class="nd">@merge</span><span class="p">(</span><span class="n">sum_of_squares</span><span class="p">,</span> <span class="s">"variance.result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">calculate_variance</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Calculate variance naively</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># initialise variables</span>
+ <span class="c">#</span>
+ <span class="n">all_sum_squared</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_sum</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_cnt_values</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="c">#</span>
+ <span class="c"># added up all the sum_squared, and sum and cnt_values from all the chunks</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">input_file_name</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span><span class="p">,</span> <span class="n">cnt_values</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">())</span>
+ <span class="n">all_sum_squared</span> <span class="o">+=</span> <span class="n">sum_squared</span>
+ <span class="n">all_sum</span> <span class="o">+=</span> <span class="nb">sum</span>
+ <span class="n">all_cnt_values</span> <span class="o">+=</span> <span class="n">cnt_values</span>
+ <span class="n">all_mean</span> <span class="o">=</span> <span class="n">all_sum</span> <span class="o">/</span> <span class="n">all_cnt_values</span>
+ <span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">all_sum_squared</span> <span class="o">-</span> <span class="n">all_sum</span> <span class="o">*</span> <span class="n">all_mean</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">all_cnt_values</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># print output</span>
+ <span class="c">#</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="n">variance</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [None -> random_numbers.list] completed</span>
+<span class="go">Completed Task = create_random_numbers</span>
+<span class="go"> Job = [[random_numbers.list] -> *.chunks] completed</span>
+<span class="go">Completed Task = split_problem</span>
+<span class="go"> Job = [1.chunks -> 1.sums] completed</span>
+<span class="go"> Job = [10.chunks -> 10.sums] completed</span>
+<span class="go"> Job = [2.chunks -> 2.sums] completed</span>
+<span class="go"> Job = [3.chunks -> 3.sums] completed</span>
+<span class="go"> Job = [4.chunks -> 4.sums] completed</span>
+<span class="go"> Job = [5.chunks -> 5.sums] completed</span>
+<span class="go"> Job = [6.chunks -> 6.sums] completed</span>
+<span class="go"> Job = [7.chunks -> 7.sums] completed</span>
+<span class="go"> Job = [8.chunks -> 8.sums] completed</span>
+<span class="go"> Job = [9.chunks -> 9.sums] completed</span>
+<span class="go">Completed Task = sum_of_squares</span>
+<span class="go"> Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] -> variance.result] completed</span>
+<span class="go">Completed Task = calculate_variance</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 13</strong>: Python Code for <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a><ul>
+<li><a class="reference internal" href="#splitting-large-jobs">Splitting large jobs</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="split_code.html"
+ title="previous chapter"><strong>Chapter 12</strong>: Python Code for Splitting up large tasks / files with <strong>@split</strong></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="multiprocessing_code.html"
+ title="next chapter"><strong>Chapter 14</strong>: Python Code for Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/merge_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="multiprocessing_code.html" title="Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="split_code.html" title="Chapter 12: Python Code for Splitting up large tasks / files with @split"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/mkdir.html b/doc/_build/html/tutorials/new_tutorial/mkdir.html
new file mode 100644
index 0000000..c9755b8
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/mkdir.html
@@ -0,0 +1,334 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 9: Preparing directories for output with @mkdir() — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions" href="checkpointing.html" />
+ <link rel="prev" title="Chapter 8: Specifying output file names with formatter() and regex()" href="output_file_names.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="checkpointing.html" title="Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="output_file_names.html" title="Chapter 8: Specifying output file names with formatter() and regex()"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-mkdir-chapter-num-preparing-directories-for-output-with-mkdir">
+<span id="new-manual-mkdir"></span><span id="index-0"></span><h1><strong>Chapter 9</strong>: Preparing directories for output with <a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>@mkdir()</em></a><a class="headerlink" href="#new-manual-mkdir-chapter-num-preparing-directories-for-output-with-mkdir" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows(mkdir()) syntax in detail</em></a></li>
+<li><a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>@mkdir syntax in detail</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><a class="reference internal" href="mkdir_code.html#new-manual-mkdir-code"><em>Chapter 9: Python Code for Preparing directories for output with @mkdir()</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>In <strong>Chapter 3</strong>, we saw that we could use <a class="reference internal" href="transform_in_parallel.html#new-manual-follows-mkdir"><em>@follows(mkdir())</em></a> to
+ensure that output directories exist:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># create_new_files() @follows mkdir</span>
+<span class="c">#</span>
+<span class="hll"><span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="s">"output/results/here"</span><span class="p">))</span>
+</span><span class="nd">@originate</span><span class="p">([</span><span class="s">"output/results/here/a.start_file"</span><span class="p">,</span>
+ <span class="s">"output/results/here/b.start_file"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_new_files</span><span class="p">(</span><span class="n">output_file_pair</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This ensures that the decorated task follows (<a class="reference internal" href="transform_in_parallel.html#new-manual-follows-mkdir"><em>@follows</em></a>) the
+making of the specified directory (<tt class="docutils literal"><span class="pre">mkdir()</span></tt>).</p>
+<p>Sometimes, however, the <strong>Output</strong> is intended not for any single directory but a group
+of destinations depending on the parsed contents of <strong>Input</strong> paths.</p>
+</div></blockquote>
+</div>
+<div class="section" id="creating-directories-after-string-substitution-in-a-zoo">
+<h2>Creating directories after string substitution in a zoo...<a class="headerlink" href="#creating-directories-after-string-substitution-in-a-zoo" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You may remember <a class="reference internal" href="output_file_names.html#new-manual-output-file-names-formatter-zoo"><em>this example</em></a> from <strong>Chapter 8</strong>:</p>
+<p>We want to feed the denizens of a zoo. The original file names are spread over several directories and we
+group their food supply by the <em>clade</em> of the animal in the following manner:</p>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/simple_tutorial_zoo_animals_formatter_example.jpg"><img alt="../../_images/simple_tutorial_zoo_animals_formatter_example.jpg" src="../../_images/simple_tutorial_zoo_animals_formatter_example.jpg" style="width: 915.5px; height: 208.5px;" /></a>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"</span><span class="si">%40s</span><span class="s"> -> </span><span class="si">%90s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+<span class="hll"> <span class="c"># this blows up</span>
+</span><span class="hll"> <span class="c"># open(output_file, "w")</span>
+</span></pre></div>
+</div>
+</div></blockquote>
+<p>The example code from <strong>Chapter 8</strong> is, however, incomplete. If we were to actually create the specified
+files we would realise that we had forgotten to create the destination directories <tt class="docutils literal"><span class="pre">reptiles</span></tt>, <tt class="docutils literal"><span class="pre">mammals</span></tt> first!</p>
+</div></blockquote>
+<div class="section" id="using-formatter">
+<h3>using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a><a class="headerlink" href="#using-formatter" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>We could of course create directories manually.
+However, apart from being tedious and error prone, we have already gone to some lengths
+to parse out the diretories for <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a>.
+Why don’t we use the same logic to make the directories?</p>
+<p>Can you see the parallels between the syntax for <a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>@mkdir</em></a> and <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a>?</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># create directories for each clade</span>
+<span class="nd">@mkdir</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+ <span class="s">"{subpath[0][1]}/{clade[0]}) # new_directory</span>
+
+<span class="c"># Put animals of each clade in the same directory</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"</span><span class="si">%40s</span><span class="s"> -> </span><span class="si">%90s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="c"># this works now</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>See the <a class="reference internal" href="mkdir_code.html#new-manual-mkdir-code"><em>example code</em></a></p>
+</div></blockquote>
+</div>
+<div class="section" id="using-regex">
+<h3>using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a><a class="headerlink" href="#using-regex" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>If you are particularly fond of using regular expression to parse file paths,
+you could also use <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># create directories for each clade</span>
+<span class="nd">@mkdir</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*?)/?(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+ <span class="s">r"\1/\g<clade>"</span><span class="p">)</span> <span class="c"># new_directory</span>
+
+<span class="c"># Put animals of each clade in the same directory</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"</span><span class="si">%40s</span><span class="s"> -> </span><span class="si">%90s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="c"># this works now</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 9</strong>: Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#creating-directories-after-string-substitution-in-a-zoo">Creating directories after string substitution in a zoo...</a><ul>
+<li><a class="reference internal" href="#using-formatter">using <tt class="docutils literal"><span class="pre">formatter()</span></tt></a></li>
+<li><a class="reference internal" href="#using-regex">using <tt class="docutils literal"><span class="pre">regex()</span></tt></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="output_file_names.html"
+ title="previous chapter"><strong>Chapter 8</strong>: Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="checkpointing.html"
+ title="next chapter"><strong>Chapter 10</strong>: Checkpointing: Interrupted Pipelines and Exceptions</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/mkdir.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="checkpointing.html" title="Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="output_file_names.html" title="Chapter 8: Specifying output file names with formatter() and regex()"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/mkdir_code.html b/doc/_build/html/tutorials/new_tutorial/mkdir_code.html
new file mode 100644
index 0000000..b228bf6
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/mkdir_code.html
@@ -0,0 +1,300 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 9: Python Code for Preparing directories for output with @mkdir() — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions" href="checkpointing_code.html" />
+ <link rel="prev" title="Chapter 8: Python Code for Specifying output file names with formatter() and regex()" href="output_file_names_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="checkpointing_code.html" title="Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="output_file_names_code.html" title="Chapter 8: Python Code for Specifying output file names with formatter() and regex()"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-mkdir-chapter-num-python-code-for-preparing-directories-for-output-with-mkdir">
+<span id="new-manual-mkdir-code"></span><h1><strong>Chapter 9</strong>: Python Code for Preparing directories for output with <a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>@mkdir()</em></a><a class="headerlink" href="#new-manual-mkdir-chapter-num-python-code-for-preparing-directories-for-output-with-mkdir" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>mkdir()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> syntax</li>
+<li>Back to <strong>Chapter 9</strong>: <a class="reference internal" href="mkdir.html#new-manual-mkdir"><em>Preparing directories for output with @mkdir()</em></a></li>
+</ul>
+</div>
+<div class="section" id="code-for-formatter-zoo-example">
+<h2>Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> Zoo example<a class="headerlink" href="#code-for-formatter-zoo-example" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># Make directories</span>
+<span class="nd">@mkdir</span><span class="p">([</span><span class="s">"tiger"</span><span class="p">,</span> <span class="s">"lion"</span><span class="p">,</span> <span class="s">"dog"</span><span class="p">,</span> <span class="s">"crocodile"</span><span class="p">,</span> <span class="s">"rose"</span><span class="p">])</span>
+<span class="nd">@originate</span><span class="p">(</span>
+ <span class="c"># List of animals and plants</span>
+ <span class="p">[</span> <span class="s">"tiger/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.handreared.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.tame.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"crocodile/reptiles.wild.animals"</span><span class="p">,</span>
+ <span class="s">"rose/flowering.handreared.plants"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c"># create directories for each clade</span>
+<span class="nd">@mkdir</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">)</span> <span class="c"># new_directory</span>
+<span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"</span><span class="si">%40s</span><span class="s"> -> </span><span class="si">%90s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="c"># this works now</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="code-for-regex-zoo-example">
+<h2>Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> Zoo example<a class="headerlink" href="#code-for-regex-zoo-example" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># Make directories</span>
+<span class="nd">@mkdir</span><span class="p">([</span><span class="s">"tiger"</span><span class="p">,</span> <span class="s">"lion"</span><span class="p">,</span> <span class="s">"dog"</span><span class="p">,</span> <span class="s">"crocodile"</span><span class="p">,</span> <span class="s">"rose"</span><span class="p">])</span>
+<span class="nd">@originate</span><span class="p">(</span>
+ <span class="c"># List of animals and plants</span>
+ <span class="p">[</span> <span class="s">"tiger/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.handreared.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.tame.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"crocodile/reptiles.wild.animals"</span><span class="p">,</span>
+ <span class="s">"rose/flowering.handreared.plants"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c"># create directories for each clade</span>
+<span class="nd">@mkdir</span><span class="p">(</span> <span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+ <span class="s">r"\g<clade>"</span><span class="p">)</span> <span class="c"># new_directory</span>
+<span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">r"\1\g<clade>/\g<tame>.\2.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">r"\1\g<clade>"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">r"\2"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"\g<tame>"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"</span><span class="si">%40s</span><span class="s"> -> </span><span class="si">%90s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="c"># this works now</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 9</strong>: Python Code for Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a><ul>
+<li><a class="reference internal" href="#code-for-formatter-zoo-example">Code for <tt class="docutils literal"><span class="pre">formatter()</span></tt> Zoo example</a></li>
+<li><a class="reference internal" href="#code-for-regex-zoo-example">Code for <tt class="docutils literal"><span class="pre">regex()</span></tt> Zoo example</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="output_file_names_code.html"
+ title="previous chapter"><strong>Chapter 8</strong>: Python Code for Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="checkpointing_code.html"
+ title="next chapter"><strong>Chapter 10</strong>: Python Code for Checkpointing: Interrupted Pipelines and Exceptions</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/mkdir_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="checkpointing_code.html" title="Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="output_file_names_code.html" title="Chapter 8: Python Code for Specifying output file names with formatter() and regex()"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/multiprocessing.html b/doc/_build/html/tutorials/new_tutorial/multiprocessing.html
new file mode 100644
index 0000000..3fa388c
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/multiprocessing.html
@@ -0,0 +1,445 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 14: Multiprocessing, drmaa and Computation Clusters — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 15: Logging progress through a pipeline" href="logging.html" />
+ <link rel="prev" title="Chapter 13: @merge multiple input into a single result" href="merge.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 15: Logging progress through a pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 13: @merge multiple input into a single result"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-multiprocessing-chapter-num-multiprocessing-drmaa-and-computation-clusters">
+<span id="new-manual-multiprocessing"></span><span id="index-0"></span><h1><strong>Chapter 14</strong>: Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters<a class="headerlink" href="#new-manual-multiprocessing-chapter-num-multiprocessing-drmaa-and-computation-clusters" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a> syntax</li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> syntax</li>
+<li><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a> syntax</li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><a class="reference internal" href="multiprocessing_code.html#new-manual-multiprocessing-code"><em>Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="multi-processing">
+<span id="index-1"></span><h3>Multi Processing<a class="headerlink" href="#multi-processing" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><em>Ruffus</em> uses python <a class="reference external" href="http://docs.python.org/library/multiprocessing.html">multiprocessing</a> to run
+each job in a separate process.</p>
+<p>This means that jobs do <em>not</em> necessarily complete in the order of the defined parameters.
+Task hierachies are, of course, inviolate: upstream tasks run before downstream, dependent tasks.</p>
+<p>Tasks that are independent (i.e. do not precede each other) may be run in parallel as well.</p>
+<p>The number of concurrent jobs can be set in <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>If <tt class="docutils literal"><span class="pre">multiprocess</span></tt> is set to 1, then jobs will be run on a single process.</p>
+</div></blockquote>
+</div>
+<div class="section" id="data-sharing">
+<span id="index-2"></span><h3>Data sharing<a class="headerlink" href="#data-sharing" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Running jobs in separate processes allows <em>Ruffus</em> to make full use of the multiple
+processors in modern computers. However, some <a class="reference external" href="http://docs.python.org/library/multiprocessing.html#multiprocessing-programming">multiprocessing guidelines</a>
+should be borne in mind when writing <em>Ruffus</em> pipelines. In particular:</p>
+<ul class="simple">
+<li>Try not to pass large amounts of data between jobs, or at least be aware that this has to be marshalled
+across process boundaries.</li>
+<li>Only data which can be <a class="reference external" href="http://docs.python.org/library/pickle.html">pickled</a> can be passed as
+parameters to <em>Ruffus</em> task functions. Happily, that applies to almost any native Python data type.
+The use of the rare, unpicklable object will cause python to complain (fail) loudly when <em>Ruffus</em> pipelines
+are run.</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="restricting-parallelism-with-jobs-limit">
+<span id="new-manual-jobs-limit"></span><span id="index-3"></span><h2>Restricting parallelism with <a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a><a class="headerlink" href="#restricting-parallelism-with-jobs-limit" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Calling <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(multiprocess = NNN)</em></a> allows
+multiple jobs (from multiple independent tasks) to be run in parallel. However, there
+are some operations that consume so many resources that we might want them to run
+with less or no concurrency.</p>
+<p>For example, we might want to download some files via FTP but the server restricts
+requests from each IP address. Even if the rest of the pipeline is running 100 jobs in
+parallel, the FTP downloading must be restricted to 2 files at a time. We would really
+like to keep the pipeline running as is, but let this one operation run either serially,
+or with little concurrency.</p>
+<ul class="simple">
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(multiprocess = NNN)</em></a> sets the pipeline-wide concurrency but</li>
+<li><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit(MMM)</em></a> sets concurrency at <tt class="docutils literal"><span class="pre">MMM</span></tt> only for jobs in the decorated task.</li>
+</ul>
+<p>The optional name (e.g. <tt class="docutils literal"><span class="pre">@jobs_limit(3,</span> <span class="pre">"ftp_download_limit")</span></tt>) allows the same limit to
+be shared across multiple tasks. To be pedantic: a limit of <tt class="docutils literal"><span class="pre">3</span></tt> jobs at a time would be applied
+across all tasks which have a <tt class="docutils literal"><span class="pre">@jobs_limit</span></tt> named <tt class="docutils literal"><span class="pre">"ftp_download_limit"</span></tt>.</p>
+<p>The <a class="reference internal" href="multiprocessing_code.html#new-manual-multiprocessing-code"><em>example code</em></a> uses up to 10 processes across the
+pipeline, but runs the <tt class="docutils literal"><span class="pre">stage1_big</span></tt> and <tt class="docutils literal"><span class="pre">stage1_small</span></tt> tasks 3 at a time (shared across
+both tasks). <tt class="docutils literal"><span class="pre">stage2</span></tt> jobs run 5 at a time.</p>
+</div></blockquote>
+</div>
+<div class="section" id="using-drmaa-to-dispatch-work-to-computational-clusters-or-grid-engines-from-ruffus-jobs">
+<span id="new-manual-ruffus-drmaa-wrapper-run-job"></span><h2>Using <tt class="docutils literal"><span class="pre">drmaa</span></tt> to dispatch work to Computational Clusters or Grid engines from Ruffus jobs<a class="headerlink" href="#using-drmaa-to-dispatch-work-to-computational-clusters-or-grid-engines-from-ruffus-jobs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Ruffus has been widely used to manage work on computational clusters or grid engines. Though Ruffus
+task functions cannot (yet!) run natively and transparently on remote cluster nodes, it is trivial
+to dispatch work across the cluster.</p>
+<p>From version 2.4 onwards, Ruffus includes an optional helper module which interacts with
+<a class="reference external" href="https://github.com/drmaa-python/drmaa-python">python bindings</a> for the widely used <a class="reference external" href="http://en.wikipedia.org/wiki/DRMAA">drmaa</a>
+Open Grid Forum API specification. This allows jobs to dispatch work to a computational cluster and wait until it completes.</p>
+<p>Here are the necessary steps</p>
+</div></blockquote>
+<div class="section" id="use-a-shared-drmaa-session">
+<h3>1) Use a shared drmaa session:<a class="headerlink" href="#use-a-shared-drmaa-session" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Before your pipeline runs:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># start shared drmaa session for all jobs / tasks in pipeline</span>
+<span class="c">#</span>
+<span class="kn">import</span> <span class="nn">drmaa</span>
+<span class="n">drmaa_session</span> <span class="o">=</span> <span class="n">drmaa</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+<span class="n">drmaa_session</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Cleanup after your pipeline completes:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># pipeline functions go here</span>
+<span class="c">#</span>
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="n">drmaa_session</span><span class="o">.</span><span class="n">exit</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="import-ruffus-drmaa-wrapper">
+<h3>2) import <tt class="docutils literal"><span class="pre">ruffus.drmaa_wrapper</span></tt><a class="headerlink" href="#import-ruffus-drmaa-wrapper" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><ul class="simple">
+<li>The optional <tt class="docutils literal"><span class="pre">ruffus.drmaa_wrapper</span></tt> module needs to be imported explicitly:</li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="hll"><span class="c"># imported ruffus.drmaa_wrapper explicitly</span>
+</span><span class="kn">from</span> <span class="nn">ruffus.drmaa_wrapper</span> <span class="kn">import</span> <span class="n">run_job</span><span class="p">,</span> <span class="n">error_drmaa_job</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="call-drmaa-wrapper-run-job">
+<h3>3) call <a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a><a class="headerlink" href="#call-drmaa-wrapper-run-job" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a> dispatches the work to a cluster node within a normal Ruffus job and waits for completion</p>
+<p>This is the equivalent of <a class="reference external" href="http://docs.python.org/2/library/os.html#os.system">os.system</a> or
+<a class="reference external" href="http://docs.python.org/2/library/subprocess.html#subprocess.check_call">subprocess.check_output</a> but the code will run remotely as specified:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="hll"> <span class="c"># ruffus.drmaa_wrapper.run_job</span>
+</span> <span class="n">stdout_res</span><span class="p">,</span> <span class="n">stderr_res</span> <span class="o">=</span> <span class="n">run_job</span><span class="p">(</span><span class="n">cmd_str</span> <span class="o">=</span> <span class="s">"touch "</span> <span class="o">+</span> <span class="n">output_file</span><span class="p">,</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="n">job_name</span><span class="p">,</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="p">,</span>
+ <span class="n">drmaa_session</span> <span class="o">=</span> <span class="n">drmaa_session</span><span class="p">,</span>
+ <span class="n">run_locally</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">local_run</span><span class="p">,</span>
+ <span class="n">job_other_options</span> <span class="o">=</span> <span class="n">job_other_options</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>The complete code is available <a class="reference internal" href="multiprocessing_code.html#using-ruffus-drmaa-wrapper"><em>here</em></a></p>
+</div></blockquote>
+<ul class="simple">
+<li><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a> is a convenience wrapper around the <a class="reference external" href="https://github.com/drmaa-python/drmaa-python">python drmaa bindings</a>
+<a class="reference external" href="http://drmaa-python.readthedocs.org/en/latest/tutorials.html#waiting-for-a-job">RunJob</a> function.
+It takes care of writing drmaa <em>job templates</em> for you.</li>
+<li>Each call creates a separate drmaa <em>job template</em>.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="use-multithread-pipeline-run-multithread-nnn">
+<h3>4) Use multithread: <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(multithread = NNN)</em></a><a class="headerlink" href="#use-multithread-pipeline-run-multithread-nnn" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<p><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a></p>
+<blockquote class="last">
+<div><p><strong>requires</strong> <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>(multithread = NNN)</em></a></p>
+<p><strong>and will not work with</strong> <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>(multiprocess = NNN)</em></a></p>
+</div></blockquote>
+</div>
+<dl class="docutils">
+<dt>Using multithreading rather than multiprocessing</dt>
+<dd><ul class="first simple">
+<li>allows the drmaa session to be shared</li>
+<li>prevents “processing storms” which lock up the queue submission node when hundreds or thousands of grid engine / cluster commands complete at the same time.</li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span> <span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">multithread</span> <span class="o">=</span> <span class="n">NNN</span><span class="p">,</span> <span class="o">...</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>or if you are using ruffus.cmdline:</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="n">cmdline</span><span class="o">.</span><span class="n">run</span> <span class="p">(</span><span class="n">options</span><span class="p">,</span> <span class="n">multithread</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">jobs</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>Normally multithreading reduces the amount of parallelism in python due to the python <a class="reference external" href="http://en.wikipedia.org/wiki/Global_Interpreter_Lock">Global interpreter Lock (GIL)</a>.
+However, as the work load is almost entirely on another computer (i.e. a cluster / grid engine node) with a separate python interpreter, any cost benefit calculations of this sort are moot.</p>
+</div></blockquote>
+</div>
+<div class="section" id="develop-locally">
+<h3>5) Develop locally<a class="headerlink" href="#develop-locally" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a> provides two convenience parameters for developing grid engine pipelines:</p>
+<ul>
+<li><p class="first">commands can run locally, i.e. on the local machine rather than on cluster nodes:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">run_job</span><span class="p">(</span><span class="n">cmd_str</span><span class="p">,</span> <span class="n">run_locally</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+<li><p class="first">Output files can be <a class="reference external" href="http://en.wikipedia.org/wiki/Touch_(Unix)">touch</a>ed, i.e. given the appearance of the work having being done without actually running the commands</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">run_job</span><span class="p">(</span><span class="n">cmd_str</span><span class="p">,</span> <span class="n">touch_only</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+</ul>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="forcing-a-pipeline-to-appear-up-to-date">
+<span id="new-manual-pipeline-run-touch"></span><span id="index-4"></span><h2>Forcing a pipeline to appear up to date<a class="headerlink" href="#forcing-a-pipeline-to-appear-up-to-date" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Sometimes, we <em>know</em> that a pipeline has run to completion, that everything is up-to-date. However, Ruffus still insists on the basis
+of file modification times that you need to rerun.</p>
+<p>For example, sometimes a trivial accounting modification needs to be made to a data file.
+Even though you know that this changes nothing in practice, Ruffus will detect the modification and
+ask to rerun everything from that point forwards.</p>
+<p>One way to convince Ruffus that everything is fine is to manually <a class="reference external" href="http://en.wikipedia.org/wiki/Touch_(Unix)">touch</a>
+all subsequent data files one by one in sequence so that the file timestamps follow the appropriate progression.</p>
+<p>You can also ask <em>Ruffus</em> to do this automatically for you by running the pipeline in <a class="reference external" href="http://en.wikipedia.org/wiki/Touch_(Unix)">touch</a>
+mode:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">(</span> <span class="n">touch_files_only</span> <span class="o">=</span> <span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run</em></a> will run your pipeline script normally working backwards from any specified final target, or else the
+last task in the pipeline. It works out where it should begin running, i.e. with the first out-of-date data files.
+After that point, instead of calling your pipeline task functions, each missing or out-of-date file is
+<a class="reference external" href="http://en.wikipedia.org/wiki/Touch_(Unix)">touch-ed</a> in turn so that the file modification dates
+follow on successively.</p>
+<p>This turns out to be useful way to check that your pipeline runs correctly by creating a series of dummy (empty files).
+However, <em>Ruffus</em> does not know how to read your mind to know which files to create from <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> or
+<a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> tasks.</p>
+<p>Using <a class="reference internal" href="command_line.html#new-manual-cmdline"><em>ruffus.cmdline</em></a> from version 2.4, you can just specify:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>your script --touch_files_only <span class="o">[</span>--other_options_of_your_own_etc<span class="o">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 14</strong>: Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a><ul>
+<li><a class="reference internal" href="#multi-processing">Multi Processing</a></li>
+<li><a class="reference internal" href="#data-sharing">Data sharing</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#restricting-parallelism-with-jobs-limit">Restricting parallelism with <tt class="docutils literal"><span class="pre">@jobs_limit</span></tt></a></li>
+<li><a class="reference internal" href="#using-drmaa-to-dispatch-work-to-computational-clusters-or-grid-engines-from-ruffus-jobs">Using <tt class="docutils literal"><span class="pre">drmaa</span></tt> to dispatch work to Computational Clusters or Grid engines from Ruffus jobs</a><ul>
+<li><a class="reference internal" href="#use-a-shared-drmaa-session">1) Use a shared drmaa session:</a></li>
+<li><a class="reference internal" href="#import-ruffus-drmaa-wrapper">2) import <tt class="docutils literal"><span class="pre">ruffus.drmaa_wrapper</span></tt></a></li>
+<li><a class="reference internal" href="#call-drmaa-wrapper-run-job">3) call <tt class="docutils literal"><span class="pre">drmaa_wrapper.run_job()</span></tt></a></li>
+<li><a class="reference internal" href="#use-multithread-pipeline-run-multithread-nnn">4) Use multithread: <tt class="docutils literal"><span class="pre">pipeline_run(multithread</span> <span class="pre">=</span> <span class="pre">NNN)</span></tt></a></li>
+<li><a class="reference internal" href="#develop-locally">5) Develop locally</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#forcing-a-pipeline-to-appear-up-to-date">Forcing a pipeline to appear up to date</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="merge.html"
+ title="previous chapter"><strong>Chapter 13</strong>: <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="logging.html"
+ title="next chapter"><strong>Chapter 15</strong>: Logging progress through a pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/multiprocessing.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 15: Logging progress through a pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 13: @merge multiple input into a single result"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/multiprocessing_code.html b/doc/_build/html/tutorials/new_tutorial/multiprocessing_code.html
new file mode 100644
index 0000000..c4f33bb
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/multiprocessing_code.html
@@ -0,0 +1,365 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 15: Python Code for Logging progress through a pipeline" href="logging_code.html" />
+ <link rel="prev" title="Chapter 13: Python Code for @merge multiple input into a single result" href="merge_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="logging_code.html" title="Chapter 15: Python Code for Logging progress through a pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="merge_code.html" title="Chapter 13: Python Code for @merge multiple input into a single result"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-multiprocessing-chapter-num-python-code-for-multiprocessing-drmaa-and-computation-clusters">
+<span id="new-manual-multiprocessing-code"></span><h1><strong>Chapter 14</strong>: Python Code for Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters<a class="headerlink" href="#new-manual-multiprocessing-chapter-num-python-code-for-multiprocessing-drmaa-and-computation-clusters" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a> syntax</li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> syntax</li>
+<li><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a> syntax</li>
+<li>Back to <strong>Chapter 14</strong>: <a class="reference internal" href="multiprocessing.html#new-manual-multiprocessing"><em>Multiprocessing, drmaa and Computation Clusters</em></a></li>
+</ul>
+</div>
+<div class="section" id="jobs-limit">
+<h2><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a><a class="headerlink" href="#jobs-limit" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>First 2 tasks are constrained to a parallelism of 3 shared jobs at a time</li>
+<li>Final task is constrained to a parallelism of 5 jobs at a time</li>
+<li>The entire pipeline is constrained to a (theoretical) parallelism of 10 jobs at a time</li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="c"># make list of 10 files</span>
+<span class="nd">@split</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"*stage1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">make_files</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="mi">5</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.small_stage1"</span> <span class="o">%</span> <span class="n">i</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.big_stage1"</span> <span class="o">%</span> <span class="n">i</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="hll">
+</span><span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s">"ftp_download_limit"</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">make_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".small_stage1"</span><span class="p">),</span> <span class="s">".stage2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage1_small</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"FTP downloading </span><span class="si">%s</span><span class="s"> ->Start"</span> <span class="o">%</span> <span class="n">input_file</span>
+<span class="hll"> <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
+</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">"FTP downloading </span><span class="si">%s</span><span class="s"> ->Finished"</span> <span class="o">%</span> <span class="n">input_file</span>
+
+<span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s">"ftp_download_limit"</span><span class="p">)</span>
+<span class="hll"><span class="nd">@transform</span><span class="p">(</span><span class="n">make_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".big_stage1"</span><span class="p">),</span> <span class="s">".stage2"</span><span class="p">)</span>
+</span><span class="k">def</span> <span class="nf">stage1_big</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"FTP downloading </span><span class="si">%s</span><span class="s"> ->Start"</span> <span class="o">%</span> <span class="n">input_file</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">"FTP downloading </span><span class="si">%s</span><span class="s"> ->Finished"</span> <span class="o">%</span> <span class="n">input_file</span>
+
+<span class="nd">@jobs_limit</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">([</span><span class="n">stage1_small</span><span class="p">,</span> <span class="n">stage1_big</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".stage2"</span><span class="p">),</span> <span class="s">".stage3"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">stage2</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Processing stage2 </span><span class="si">%s</span><span class="s"> ->Start"</span> <span class="o">%</span> <span class="n">input_file</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">"Processing stage2 </span><span class="si">%s</span><span class="s"> ->Finished"</span> <span class="o">%</span> <span class="n">input_file</span>
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span>
+
+<span class="hll"><span class="gp">>>> </span><span class="c"># 3 jobs at a time, interleaved</span>
+</span><span class="go">FTP downloading 5.big_stage1 ->Start</span>
+<span class="go">FTP downloading 6.big_stage1 ->Start</span>
+<span class="go">FTP downloading 7.big_stage1 ->Start</span>
+<span class="go">FTP downloading 5.big_stage1 ->Finished</span>
+<span class="go">FTP downloading 8.big_stage1 ->Start</span>
+<span class="go">FTP downloading 6.big_stage1 ->Finished</span>
+<span class="go">FTP downloading 9.big_stage1 ->Start</span>
+<span class="go">FTP downloading 7.big_stage1 ->Finished</span>
+<span class="go">FTP downloading 0.small_stage1 ->Start</span>
+<span class="go">FTP downloading 8.big_stage1 ->Finished</span>
+<span class="go">FTP downloading 1.small_stage1 ->Start</span>
+<span class="go">FTP downloading 9.big_stage1 ->Finished</span>
+<span class="go">FTP downloading 2.small_stage1 ->Start</span>
+<span class="go">FTP downloading 0.small_stage1 ->Finished</span>
+<span class="go">FTP downloading 3.small_stage1 ->Start</span>
+<span class="go">FTP downloading 1.small_stage1 ->Finished</span>
+<span class="go">FTP downloading 4.small_stage1 ->Start</span>
+<span class="go">FTP downloading 2.small_stage1 ->Finished</span>
+<span class="go">FTP downloading 3.small_stage1 ->Finished</span>
+<span class="go">FTP downloading 4.small_stage1 ->Finished</span>
+
+<span class="hll"><span class="gp">>>> </span><span class="c"># 5 jobs at a time, interleaved</span>
+</span><span class="go">Processing stage2 0.stage2 ->Start</span>
+<span class="go">Processing stage2 1.stage2 ->Start</span>
+<span class="go">Processing stage2 2.stage2 ->Start</span>
+<span class="go">Processing stage2 3.stage2 ->Start</span>
+<span class="go">Processing stage2 4.stage2 ->Start</span>
+<span class="go">Processing stage2 0.stage2 ->Finished</span>
+<span class="go">Processing stage2 5.stage2 ->Start</span>
+<span class="go">Processing stage2 1.stage2 ->Finished</span>
+<span class="go">Processing stage2 6.stage2 ->Start</span>
+<span class="go">Processing stage2 2.stage2 ->Finished</span>
+<span class="go">Processing stage2 4.stage2 ->Finished</span>
+<span class="go">Processing stage2 7.stage2 ->Start</span>
+<span class="go">Processing stage2 8.stage2 ->Start</span>
+<span class="go">Processing stage2 3.stage2 ->Finished</span>
+<span class="go">Processing stage2 9.stage2 ->Start</span>
+<span class="go">Processing stage2 5.stage2 ->Finished</span>
+<span class="go">Processing stage2 7.stage2 ->Finished</span>
+<span class="go">Processing stage2 6.stage2 ->Finished</span>
+<span class="go">Processing stage2 8.stage2 ->Finished</span>
+<span class="go">Processing stage2 9.stage2 ->Finished</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="using-ruffus-drmaa-wrapper">
+<span id="id1"></span><h2>Using <tt class="docutils literal"><span class="pre">ruffus.drmaa_wrapper</span></tt><a class="headerlink" href="#using-ruffus-drmaa-wrapper" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#!/usr/bin/python</span>
+<span class="n">job_queue_name</span> <span class="o">=</span> <span class="s">"YOUR_QUEUE_NAME_GOES_HERE"</span>
+<span class="n">job_other_options</span> <span class="o">=</span> <span class="s">"-P YOUR_PROJECT_NAME_GOES_HERE"</span>
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">from</span> <span class="nn">ruffus.drmaa_wrapper</span> <span class="kn">import</span> <span class="n">run_job</span><span class="p">,</span> <span class="n">error_drmaa_job</span>
+
+<span class="n">parser</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">get_argparse</span><span class="p">(</span><span class="n">description</span><span class="o">=</span><span class="s">'WHAT DOES THIS PIPELINE DO?'</span><span class="p">)</span>
+
+<span class="n">options</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+
+<span class="c"># logger which can be passed to multiprocessing ruffus tasks</span>
+<span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span> <span class="o">=</span> <span class="n">cmdline</span><span class="o">.</span><span class="n">setup_logging</span> <span class="p">(</span><span class="n">__name__</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">log_file</span><span class="p">,</span> <span class="n">options</span><span class="o">.</span><span class="n">verbose</span><span class= [...]
+
+
+<span class="c">#</span>
+<span class="hll"><span class="c"># start shared drmaa session for all jobs / tasks in pipeline</span>
+</span><span class="c">#</span>
+<span class="kn">import</span> <span class="nn">drmaa</span>
+<span class="n">drmaa_session</span> <span class="o">=</span> <span class="n">drmaa</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+<span class="n">drmaa_session</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
+
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"1.chromosome"</span><span class="p">,</span> <span class="s">"X.chromosome"</span><span class="p">],</span>
+ <span class="n">logger</span><span class="p">,</span> <span class="n">logger_mutex</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_test_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">stdout_res</span><span class="p">,</span> <span class="n">stderr_res</span> <span class="o">=</span> <span class="s">""</span><span class="p">,</span><span class="s">""</span>
+ <span class="n">job_queue_name</span><span class="p">,</span> <span class="n">job_other_options</span> <span class="o">=</span> <span class="n">get_queue_options</span><span class="p">()</span>
+
+ <span class="c">#</span>
+<span class="hll"> <span class="c"># ruffus.drmaa_wrapper.run_job</span>
+</span> <span class="c">#</span>
+ <span class="n">stdout_res</span><span class="p">,</span> <span class="n">stderr_res</span> <span class="o">=</span> <span class="n">run_job</span><span class="p">(</span><span class="n">cmd_str</span> <span class="o">=</span> <span class="s">"touch "</span> <span class="o">+</span> <span class="n">output_file</span><span class="p">,</span>
+ <span class="n">job_name</span> <span class="o">=</span> <span class="n">job_name</span><span class="p">,</span>
+ <span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="p">,</span>
+ <span class="n">drmaa_session</span> <span class="o">=</span> <span class="n">drmaa_session</span><span class="p">,</span>
+ <span class="n">run_locally</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">local_run</span><span class="p">,</span>
+ <span class="n">job_queue_name</span> <span class="o">=</span> <span class="n">job_queue_name</span><span class="p">,</span>
+ <span class="n">job_other_options</span> <span class="o">=</span> <span class="n">job_other_options</span><span class="p">)</span>
+
+ <span class="c"># relay all the stdout, stderr, drmaa output to diagnose failures</span>
+ <span class="k">except</span> <span class="n">error_drmaa_job</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span>
+ <span class="s">"Failed to run:"</span>
+ <span class="n">cmd</span><span class="p">,</span>
+ <span class="n">err</span><span class="p">,</span>
+ <span class="n">stdout_res</span><span class="p">,</span>
+ <span class="n">stderr_res</span><span class="p">)))</span>
+
+
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">'__main__'</span><span class="p">:</span>
+ <span class="n">cmdline</span><span class="o">.</span><span class="n">run</span> <span class="p">(</span><span class="n">options</span><span class="p">,</span> <span class="n">multithread</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">jobs</span><span class="p">)</span>
+<span class="hll"> <span class="c"># cleanup drmaa</span>
+</span> <span class="n">drmaa_session</span><span class="o">.</span><span class="n">exit</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 14</strong>: Python Code for Multiprocessing, <tt class="docutils literal"><span class="pre">drmaa</span></tt> and Computation Clusters</a><ul>
+<li><a class="reference internal" href="#jobs-limit"><tt class="docutils literal"><span class="pre">@jobs_limit</span></tt></a></li>
+<li><a class="reference internal" href="#using-ruffus-drmaa-wrapper">Using <tt class="docutils literal"><span class="pre">ruffus.drmaa_wrapper</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="merge_code.html"
+ title="previous chapter"><strong>Chapter 13</strong>: Python Code for <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="logging_code.html"
+ title="next chapter"><strong>Chapter 15</strong>: Python Code for Logging progress through a pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/multiprocessing_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="logging_code.html" title="Chapter 15: Python Code for Logging progress through a pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="merge_code.html" title="Chapter 13: Python Code for @merge multiple input into a single result"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/onthefly.html b/doc/_build/html/tutorials/new_tutorial/onthefly.html
new file mode 100644
index 0000000..da47624
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/onthefly.html
@@ -0,0 +1,363 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 21: Esoteric: Generating parameters on the fly with @files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 22: Esoteric: Running jobs in parallel without files using @parallel" href="parallel.html" />
+ <link rel="prev" title="Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs()" href="inputs.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 22: Esoteric: Running jobs in parallel without files using @parallel"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="inputs.html" title="Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-on-the-fly-chapter-num-esoteric-generating-parameters-on-the-fly-with-files">
+<span id="new-manual-on-the-fly"></span><span id="index-0"></span><h1><strong>Chapter 21</strong>: Esoteric: Generating parameters on the fly with <a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files</em></a><a class="headerlink" href="#new-manual-on-the-fly-chapter-num-esoteric-generating-parameters-on-the-fly-with-files" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files on-the-fly syntax in detail</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="onthefly_code.html#new-manual-on-the-fly-code"><em>Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The different <em>Ruffus</em> <a class="reference internal" href="../../decorators/decorators.html#decorators"><em>decorators</em></a> connect up different tasks and
+generate <em>Output</em> (file names) from your <em>Input</em> in all sorts of different ways.</p>
+<p>However, sometimes, none of them <em>quite</em> do exactly what you need. And it becomes
+necessary to generate your own <em>Input</em> and <em>Output</em> parameters on the fly.</p>
+<p>Although this additional flexibility comes at the cost of a lot of extra inconvenient
+code, you can continue to leverage the rest of <em>Ruffus</em> functionality such as
+checking whether files are up to date or not.</p>
+</div></blockquote>
+</div>
+<div class="section" id="files-syntax">
+<span id="index-1"></span><h2><a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files</em></a> syntax<a class="headerlink" href="#files-syntax" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>To generate parameters on the fly, use the <a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files</em></a>
+with a <a class="reference internal" href="../../glossary.html#term-generator"><em class="xref std std-term">generator</em></a> function which yields one list / tuple of parameters per job.</p>
+<p>For example:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="hll"><span class="c"># generator function</span>
+</span><span class="k">def</span> <span class="nf">generate_parameters_on_the_fly</span><span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> returns one list of parameters per job</span>
+<span class="sd"> """</span>
+ <span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'A.input'</span><span class="p">,</span> <span class="s">'A.output'</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">)],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'B.input'</span><span class="p">,</span> <span class="s">'B.output'</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'C.input'</span><span class="p">,</span> <span class="s">'C.output'</span><span class="p">,</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+ <span class="k">for</span> <span class="n">job_parameters</span> <span class="ow">in</span> <span class="n">parameters</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">job_parameters</span>
+
+<span class="hll"><span class="c"># tell ruffus that parameters should be generated on the fly</span>
+</span><span class="nd">@files</span><span class="p">(</span><span class="n">generate_parameters_on_the_fly</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">pipeline_task</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">,</span> <span class="n">extra</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s"> + </span><span class="si">%d</span><span class="s"> => </span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">extra</span><span class="p">[</span><span class=" [...]
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Produces:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre>
+</pre></div>
+</div>
+<dl class="docutils">
+<dt>Task = parallel_task</dt>
+<dd>1 + 2 = 3
+Job = [“A”, 1, 2] completed
+3 + 4 = 7
+Job = [“B”, 3, 4] completed
+5 + 6 = 11
+Job = [“C”, 5, 6] completed</dd>
+</dl>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Be aware that the parameter generating function may be invoked
+<a class="reference internal" href="dependencies.html#new-manual-dependencies-checking-multiple-times"><em>more than once</em></a>:
+* The first time to check if this part of the pipeline is up-to-date.
+* The second time when the pipeline task function is run.</p>
+</div>
+<p>The resulting custom <em>inputs</em>, <em>outputs</em> parameters per job are
+treated normally for the purposes of checking to see if jobs are up-to-date and
+need to be re-run.</p>
+</div></blockquote>
+</div>
+<div class="section" id="a-cartesian-product-all-vs-all-example">
+<h2>A Cartesian Product, all vs all example<a class="headerlink" href="#a-cartesian-product-all-vs-all-example" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <a class="reference internal" href="onthefly_code.html#new-manual-on-the-fly-code"><em>accompanying example</em></a> provides a more realistic reason why
+you would want to generate parameters on the fly. It is a fun piece of code, which generates
+N x M combinations from two sets of files as the <em>inputs</em> of a pipeline stage.</p>
+<p>The <em>inputs</em> / <em>outputs</em> filenames are generated as a pair of nested for-loops to produce
+the N (outside loop) x M (inside loop) combinations, with the appropriate parameters
+for each job <tt class="docutils literal"><span class="pre">yield</span></tt>ed per iteration of the inner loop. The gist of this is:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="hll"><span class="c"># Generator function</span>
+</span><span class="c">#</span>
+<span class="c"># N x M jobs</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">generate_simulation_params</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> Custom function to generate</span>
+<span class="sd"> file names for gene/gwas simulation study</span>
+<span class="sd"> """</span>
+ <span class="k">for</span> <span class="n">sim_file</span> <span class="ow">in</span> <span class="n">get_simulation_files</span><span class="p">():</span>
+ <span class="k">for</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">)</span> <span class="ow">in</span> <span class="n">get_gene_gwas_file_pairs</span><span class="p">():</span>
+ <span class="n">result_file</span> <span class="o">=</span> <span class="s">"</span><span class="si">%s</span><span class="s">.</span><span class="si">%s</span><span class="s">.results"</span> <span class="o">%</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">sim_file</span><span class="p">)</span>
+ <span class="k">yield</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">,</span> <span class="n">sim_file</span><span class="p">),</span> <span class="n">result_file</span>
+
+
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">generate_simulation_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">gwas_simulation</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="s">"..."</span>
+</pre></div>
+</div>
+<dl class="docutils">
+<dt>If <tt class="docutils literal"><span class="pre">get_gene_gwas_file_pairs()</span></tt> produces:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="p">[</span><span class="s">'a.sim'</span><span class="p">,</span> <span class="s">'b.sim'</span><span class="p">,</span> <span class="s">'c.sim'</span><span class="p">]</span>
+</pre></div>
+</div>
+</dd>
+<dt>and <tt class="docutils literal"><span class="pre">get_gene_gwas_file_pairs()</span></tt> produces:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="p">[(</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">),</span> <span class="p">(</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">)]</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>then we would end up with <tt class="docutils literal"><span class="pre">3</span></tt> x <tt class="docutils literal"><span class="pre">2</span></tt> = <tt class="docutils literal"><span class="pre">6</span></tt> jobs and the following equivalent function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">,</span> <span class="s">'a.sim'</span><span class="p">),</span> <span class="s">"1.gene.a.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">,</span> <span class="s">'a.sim'</span><span class="p">),</span> <span class="s">"2.gene.a.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">,</span> <span class="s">'b.sim'</span><span class="p">),</span> <span class="s">"1.gene.b.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">,</span> <span class="s">'b.sim'</span><span class="p">),</span> <span class="s">"2.gene.b.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'1.gene'</span><span class="p">,</span> <span class="s">'1.gwas'</span><span class="p">,</span> <span class="s">'c.sim'</span><span class="p">),</span> <span class="s">"1.gene.c.sim.results"</span><span class="p">)</span>
+<span class="n">gwas_simulation</span><span class="p">((</span><span class="s">'2.gene'</span><span class="p">,</span> <span class="s">'2.gwas'</span><span class="p">,</span> <span class="s">'c.sim'</span><span class="p">),</span> <span class="s">"2.gene.c.sim.results"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p>The <a class="reference internal" href="onthefly_code.html#new-manual-on-the-fly-code"><em>accompanying code</em></a> looks slightly more complicated because
+of some extra bookkeeping.</p>
+<p>You can compare this approach with the alternative of using <a class="reference internal" href="../../decorators/product.html#decorators-product"><em>@product</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="hll"><span class="c"># N x M jobs</span>
+</span><span class="c">#_________________________________________________________________________________________</span>
+<span class="nd">@product</span><span class="p">(</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"*.simulation"</span><span class="p">),</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+
+ <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gene"</span><span class="p">),</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+
+ <span class="c"># add gwas as an input: looks like *.gene but with a differnt extension</span>
+ <span class="n">add_inputs</span><span class="p">(</span><span class="s">"{path[1][0]/{basename[1][0]}.gwas"</span><span class="p">)</span>
+
+ <span class="s">"{basename[0][0]}.{basename[1][0]}.results"</span><span class="p">)</span> <span class="c"># output file</span>
+<span class="k">def</span> <span class="nf">gwas_simulation</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="s">"..."</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 21</strong>: Esoteric: Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#files-syntax"><tt class="docutils literal"><span class="pre">@files</span></tt> syntax</a></li>
+<li><a class="reference internal" href="#a-cartesian-product-all-vs-all-example">A Cartesian Product, all vs all example</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="inputs.html"
+ title="previous chapter"><strong>Chapter 20</strong>: Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="parallel.html"
+ title="next chapter"><strong>Chapter 22</strong>: Esoteric: Running jobs in parallel without files using <tt class="docutils literal"><span class="pre">@parallel</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/onthefly.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="parallel.html" title="Chapter 22: Esoteric: Running jobs in parallel without files using @parallel"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="inputs.html" title="Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/onthefly_code.html b/doc/_build/html/tutorials/new_tutorial/onthefly_code.html
new file mode 100644
index 0000000..57a038b
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/onthefly_code.html
@@ -0,0 +1,514 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...)" href="flowchart_colours_code.html" />
+ <link rel="prev" title="Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()" href="inputs_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="flowchart_colours_code.html" title="Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...)"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="inputs_code.html" title="Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-on-the-fly-chapter-num-esoteric-python-code-for-generating-parameters-on-the-fly-with-files">
+<span id="new-manual-on-the-fly-code"></span><h1><strong>Chapter 21</strong>: Esoteric: Python Code for Generating parameters on the fly with <a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files</em></a><a class="headerlink" href="#new-manual-on-the-fly-chapter-num-esoteric-python-code-for-generating-parameters-on-the-fly-with-files" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/files_ex.html#decorators-files-on-the-fly"><em>@files on-the-fly syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 21</strong>: <a class="reference internal" href="onthefly.html#new-manual-on-the-fly"><em>Generating parameters on the fly</em></a></li>
+</ul>
+</div>
+<div class="section" id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="line-block">
+<div class="line">This script takes N pairs of input file pairs (with the suffices .gene and .gwas)</div>
+<div class="line">and runs them against M sets of simulation data (with the suffix .simulation)</div>
+<div class="line">A summary per input file pair is then produced</div>
+</div>
+<p>In pseudo-code:</p>
+<blockquote>
+<div><p>STEP_1:</p>
+<div class="highlight-python"><pre>for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res</pre>
+</div>
+<p>STEP_2:</p>
+<div class="highlight-python"><pre>for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean</pre>
+</div>
+<div class="line-block">
+<div class="line">n = CNT_GENE_GWAS_FILES</div>
+<div class="line">m = CNT_SIMULATION_FILES</div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># constants</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="n">working_dir</span> <span class="o">=</span> <span class="s">"temp_NxM"</span>
+<span class="n">simulation_data_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation"</span><span class="p">)</span>
+<span class="n">gene_data_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"gene"</span><span class="p">)</span>
+<span class="n">CNT_GENE_GWAS_FILES</span> <span class="o">=</span> <span class="mi">2</span>
+<span class="n">CNT_SIMULATION_FILES</span> <span class="o">=</span> <span class="mi">3</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># imports</span>
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">sys</span>
+<span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">izip</span>
+<span class="kn">import</span> <span class="nn">glob</span>
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Functions</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># get gene gwas file pairs</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">get_gene_gwas_file_pairs</span><span class="p">(</span> <span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Helper function to get all *.gene, *.gwas from the direction specified</span>
+<span class="sd"> in --gene_data_dir</span>
+
+<span class="sd"> Returns</span>
+<span class="sd"> file pairs with both .gene and .gwas extensions,</span>
+<span class="sd"> corresponding roots (no extension) of each file</span>
+<span class="sd"> """</span>
+ <span class="n">gene_files</span> <span class="o">=</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gene"</span><span class="p">))</span>
+ <span class="n">gwas_files</span> <span class="o">=</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gwas"</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="n">common_roots</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o" [...]
+ <span class="n">common_roots</span> <span class="o">&=</span><span class="nb">set</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class [...]
+ <span class="n">common_roots</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">common_roots</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="n">p</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="p">;</span> <span class="n">g_dir</span> <span class="o">=</span> <span class="n">gene_data_dir</span>
+ <span class="n">file_pairs</span> <span class="o">=</span> <span class="p">[[</span><span class="n">p</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">g_dir</span><span class="p">,</span> <span class="n">x</span> <span class="o">+</span> <span class="s">".gene"</span><span class="p">),</span> <span class="n">p</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">g_dir</span><span cla [...]
+ <span class="k">return</span> <span class="n">file_pairs</span><span class="p">,</span> <span class="n">common_roots</span>
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># get simulation files</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">get_simulation_files</span><span class="p">(</span> <span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Helper function to get all *.simulation from the direction specified</span>
+<span class="sd"> in --simulation_data_dir</span>
+<span class="sd"> Returns</span>
+<span class="sd"> file with .simulation extensions,</span>
+<span class="sd"> corresponding roots (no extension) of each file</span>
+<span class="sd"> """</span>
+ <span class="n">simulation_files</span> <span class="o">=</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"*.simulation"</span><span class="p">))</span>
+ <span class="n">simulation_roots</span> <span class="o">=</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class [...]
+ <span class="k">return</span> <span class="n">simulation_files</span><span class="p">,</span> <span class="n">simulation_roots</span>
+
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+<span class="c"># Main logic</span>
+
+
+<span class="c">#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888</span>
+
+
+
+
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># setup_simulation_data</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+<span class="c">#</span>
+<span class="c"># mkdir: makes sure output directories exist before task</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="n">simulation_data_dir</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">setup_simulation_data</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> create simulation files</span>
+<span class="sd"> """</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">CNT_GENE_GWAS_FILES</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"</span><span class="si">%03d</span><span class="s">.gene"</span> <span class="o">%</span> <span class="n">i</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"</span><span class="si">%03d</span><span class="s">.gwas"</span> <span class="o">%</span> <span class="n">i</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># gene files without corresponding gwas and vice versa</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"orphan1.gene"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"orphan2.gwas"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"orphan3.gwas"</span><span class="p">),</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">CNT_SIMULATION_FILES</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"</span><span class="si">%03d</span><span class="s">.simulation"</span> <span class="o">%</span> <span class="n">i</span><span class="p">),</span> <span class="s">"w"</span><span [...]
+
+
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># cleanup_simulation_data</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">try_rmdir</span> <span class="p">(</span><span class="n">d</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">d</span><span class="p">):</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">rmdir</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">OSError</span><span class="p">:</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Warning:</span><span class="se">\t</span><span class="si">%s</span><span class="s"> is not empty and will not be removed.</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="n">d</span><span class="p">)</span>
+
+
+
+<span class="k">def</span> <span class="nf">cleanup_simulation_data</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> cleanup files</span>
+<span class="sd"> """</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"Cleanup working directory and simulation files.</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup gene and gwas files</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gene"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">,</span> <span class="s">"*.gwas"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">gene_data_dir</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup simulation</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">,</span> <span class="s">"*.simulation"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">simulation_data_dir</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup working_dir</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">,</span> <span class="s">&qu [...]
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">))</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"*.mean"</span><span class="p">)):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">try_rmdir</span><span class="p">(</span><span class="n">working_dir</span><span class="p">)</span>
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># Step 1:</span>
+<span class="c">#</span>
+<span class="c"># for n_file in NNN_pairs_of_input_files:</span>
+<span class="c"># for m_file in MMM_simulation_data:</span>
+<span class="c">#</span>
+<span class="c"># [n_file.gene,</span>
+<span class="c"># n_file.gwas,</span>
+<span class="c"># m_file.simulation] -> working_dir/n_file.m_file.simulation_res</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="k">def</span> <span class="nf">generate_simulation_params</span> <span class="p">():</span>
+ <span class="sd">"""</span>
+<span class="sd"> Custom function to generate</span>
+<span class="sd"> file names for gene/gwas simulation study</span>
+<span class="sd"> """</span>
+ <span class="n">simulation_files</span><span class="p">,</span> <span class="n">simulation_file_roots</span> <span class="o">=</span> <span class="n">get_simulation_files</span><span class="p">()</span>
+ <span class="n">gene_gwas_file_pairs</span><span class="p">,</span> <span class="n">gene_gwas_file_roots</span> <span class="o">=</span> <span class="n">get_gene_gwas_file_pairs</span><span class="p">()</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">sim_file</span><span class="p">,</span> <span class="n">sim_file_root</span> <span class="ow">in</span> <span class="n">izip</span><span class="p">(</span><span class="n">simulation_files</span><span class="p">,</span> <span class="n">simulation_file_roots</span><span class="p">):</span>
+ <span class="k">for</span> <span class="p">(</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">),</span> <span class="n">gene_file_root</span> <span class="ow">in</span> <span class="n">izip</span><span class="p">(</span><span class="n">gene_gwas_file_pairs</span><span class="p">,</span> <span class="n">gene_gwas_file_roots</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="n">result_file</span> <span class="o">=</span> <span class="s">"</span><span class="si">%s</span><span class="s">.</span><span class="si">%s</span><span class="s">.simulation_res"</span> <span class="o">%</span> <span class="p">(</span><span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><span class="p">)</span>
+ <span class="n">result_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">,</span> <span class="n">result_file</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="k">yield</span> <span class="p">[</span><span class="n">gene</span><span class="p">,</span> <span class="n">gwas</span><span class="p">,</span> <span class="n">sim_file</span><span class="p">],</span> <span class="n">result_file_path</span><span class="p">,</span> <span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><span class="p">,</span> <span class="n">result_file</span>
+
+
+
+<span class="c">#</span>
+<span class="c"># mkdir: makes sure output directories exist before task</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">working_dir</span><span class="p">,</span> <span class="s">"simulation_results"</span><span class="p">)))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">generate_simulation_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">gwas_simulation</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">result_file_path</span><span class="p">,</span> <span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><span class="p">,</span> <span class="n">result_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Dummy calculation of gene gwas vs simulation data</span>
+<span class="sd"> Normally runs in parallel on a computational cluster</span>
+<span class="sd"> """</span>
+ <span class="p">(</span><span class="n">gene_file</span><span class="p">,</span>
+ <span class="n">gwas_file</span><span class="p">,</span>
+ <span class="n">simulation_data_file</span><span class="p">)</span> <span class="o">=</span> <span class="n">input_files</span>
+ <span class="c">#</span>
+ <span class="n">simulation_res_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">result_file_path</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">simulation_res_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="s"> + </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">gene_file_root</span><span class="p">,</span> <span class="n">sim_file_root</span><spa [...]
+
+
+<span class="c">#_________________________________________________________________________________________</span>
+<span class="c">#</span>
+<span class="c"># Step 2:</span>
+<span class="c">#</span>
+<span class="c"># Statistical summary per gene/gwas file pair</span>
+<span class="c">#</span>
+<span class="c"># for n_file in NNN_pairs_of_input_files:</span>
+<span class="c"># working_dir/simulation_results/n.*.simulation_res</span>
+<span class="c"># -> working_dir/n.mean</span>
+<span class="c">#</span>
+<span class="c">#_________________________________________________________________________________________</span>
+
+
+<span class="nd">@collate</span><span class="p">(</span><span class="n">gwas_simulation</span><span class="p">,</span> <span class="n">regex</span><span class="p">(</span><span class="s">r"simulation_results/(\d+).\d+.simulation_res"</span><span class="p">),</span> <span class="s">r"\1.mean"</span><span class="p">)</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="k">lambda</span> <span class="p">:</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">OK</span><span class="se">\n</span><span class="s">"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">statistical_summary</span> <span class="p">(</span><span class="n">result_files</span><span class="p">,</span> <span class="n">summary_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Simulate statistical summary</span>
+<span class="sd"> """</span>
+ <span class="n">summary_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">summary_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">result_files</span><span class="p">:</span>
+ <span class="n">summary_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">f</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+
+
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">setup_simulation_data</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">statistical_summary</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+
+<span class="c"># uncomment to printout flowchar</span>
+<span class="c">#</span>
+<span class="c"># pipeline_printout(sys.stdout, [statistical_summary], verbose=2)</span>
+<span class="c"># graph_printout ("flowchart.jpg", "jpg", [statistical_summary])</span>
+<span class="c">#</span>
+
+<span class="n">cleanup_simulation_data</span> <span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">setup_simulation_data</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+<span class="go"> Make directories [temp_NxM/gene, temp_NxM/simulation] completed</span>
+<span class="go">Completed Task = setup_simulation_data_mkdir_1</span>
+<span class="go"> Job completed</span>
+<span class="go">Completed Task = setup_simulation_data</span>
+
+
+<span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">statistical_summary</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+<span class="go"> Make directories [temp_NxM, temp_NxM/simulation_results] completed</span>
+<span class="go">Completed Task = gwas_simulation_mkdir_1</span>
+<span class="go"> Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/001.000.simulation_res, 001, 000, 001.000.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/000.000.simulation_res, 000, 000, 000.000.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/001.001.simulation_res, 001, 001, 001.001.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/000.001.simulation_res, 000, 001, 000.001.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/000.002.simulation_res, 000, 002, 000.002.simulation_res] completed</span>
+<span class="go"> Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/001.002.simulation_res, 001, 002, 001.002.simulation_res] completed</span>
+<span class="go">Completed Task = gwas_simulation</span>
+<span class="go"> Job = [[temp_NxM/simulation_results/000.000.simulation_res, temp_NxM/simulation_results/000.001.simulation_res, temp_NxM/simulation_results/000.002.simulation_res] -> temp_NxM/000.mean] completed</span>
+<span class="go"> Job = [[temp_NxM/simulation_results/001.000.simulation_res, temp_NxM/simulation_results/001.001.simulation_res, temp_NxM/simulation_results/001.002.simulation_res] -> temp_NxM/001.mean] completed</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 21</strong>: Esoteric: Python Code for Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a><ul>
+<li><a class="reference internal" href="#introduction">Introduction</a></li>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="inputs_code.html"
+ title="previous chapter"><strong>Chapter 20</strong>: Python Code for Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="flowchart_colours_code.html"
+ title="next chapter"><strong>Appendix 1</strong>: Python code for Flow Chart Colours with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/onthefly_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="flowchart_colours_code.html" title="Appendix 1: Python code for Flow Chart Colours with pipeline_printout_graph(...)"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="inputs_code.html" title="Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/originate.html b/doc/_build/html/tutorials/new_tutorial/originate.html
new file mode 100644
index 0000000..f075dca
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/originate.html
@@ -0,0 +1,265 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 4: Creating files with @originate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 5: Understanding how your pipeline works with pipeline_printout(...)" href="pipeline_printout.html" />
+ <link rel="prev" title="Chapter 3: More on @transform-ing data" href="transform_in_parallel.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout.html" title="Chapter 5: Understanding how your pipeline works with pipeline_printout(...)"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform_in_parallel.html" title="Chapter 3: More on @transform-ing data"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-originate-chapter-num-creating-files-with-originate">
+<span id="new-manual-originate"></span><span id="index-0"></span><h1><strong>Chapter 4</strong>: Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt><a class="headerlink" href="#new-manual-originate-chapter-num-creating-files-with-originate" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate syntax in detail</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="originate_code.html#new-manual-originate-code"><em>Chapter 4: Python Code for Creating files with @originate</em></a></li>
+</ul>
+</div>
+<div class="section" id="simplifying-our-example-with-originate">
+<h2>Simplifying our example with <a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate</em></a><a class="headerlink" href="#simplifying-our-example-with-originate" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Our previous pipeline example started off with a set of files which we had to create first.</p>
+<p>This is a common task: pipelines have to start <em>somewhere</em>.</p>
+<p>Ideally, though, we would only want to create these starting files if they didn’t already exist. In other words, we want a sort of <tt class="docutils literal"><span class="pre">@transform</span></tt> which makes files from nothing (<tt class="docutils literal"><span class="pre">None</span></tt>?).</p>
+<p>This is exactly what <a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate</em></a> helps you to do.</p>
+<p>Rewriting our pipeline with <a class="reference internal" href="../../decorators/originate.html#decorators-originate"><em>@originate</em></a> gives the following three steps:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># create initial files</span>
+<span class="c">#</span>
+<span class="hll"><span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+</span> <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="s">".output.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># second task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output.2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<div class="highlight-python"><pre> Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+Completed Task = create_initial_file_pairs
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+Completed Task = first_task
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+Completed Task = second_task</pre>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 4</strong>: Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a><ul>
+<li><a class="reference internal" href="#simplifying-our-example-with-originate">Simplifying our example with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform_in_parallel.html"
+ title="previous chapter"><strong>Chapter 3</strong>: More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="pipeline_printout.html"
+ title="next chapter"><strong>Chapter 5</strong>: Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/originate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout.html" title="Chapter 5: Understanding how your pipeline works with pipeline_printout(...)"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform_in_parallel.html" title="Chapter 3: More on @transform-ing data"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/originate_code.html b/doc/_build/html/tutorials/new_tutorial/originate_code.html
new file mode 100644
index 0000000..5399d61
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/originate_code.html
@@ -0,0 +1,260 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 4: Python Code for Creating files with @originate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)" href="pipeline_printout_code.html" />
+ <link rel="prev" title="Chapter 3: Python Code for More on @transform-ing data" href="transform_in_parallel_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout_code.html" title="Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform_in_parallel_code.html" title="Chapter 3: Python Code for More on @transform-ing data"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-originate-chapter-num-python-code-for-creating-files-with-originate">
+<span id="new-manual-originate-code"></span><h1><strong>Chapter 4</strong>: Python Code for Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt><a class="headerlink" href="#new-manual-originate-chapter-num-python-code-for-creating-files-with-originate" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 4</strong>: <a class="reference internal" href="originate.html#new-manual-originate"><em>@originate</em></a></li>
+</ul>
+</div>
+<div class="section" id="using-originate">
+<h2>Using <tt class="docutils literal"><span class="pre">@originate</span></tt><a class="headerlink" href="#using-originate" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># create initial files</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="s">".output.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># second task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output.2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><pre> Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+Completed Task = create_initial_file_pairs
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+Completed Task = first_task
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+Completed Task = second_task</pre>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 4</strong>: Python Code for Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a><ul>
+<li><a class="reference internal" href="#using-originate">Using <tt class="docutils literal"><span class="pre">@originate</span></tt></a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform_in_parallel_code.html"
+ title="previous chapter"><strong>Chapter 3</strong>: Python Code for More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="pipeline_printout_code.html"
+ title="next chapter"><strong>Chapter 5</strong>: Python Code for Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/originate_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout_code.html" title="Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform_in_parallel_code.html" title="Chapter 3: Python Code for More on @transform-ing data"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/output_file_names.html b/doc/_build/html/tutorials/new_tutorial/output_file_names.html
new file mode 100644
index 0000000..fe19952
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/output_file_names.html
@@ -0,0 +1,697 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 8: Specifying output file names with formatter() and regex() — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 9: Preparing directories for output with @mkdir()" href="mkdir.html" />
+ <link rel="prev" title="Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)" href="pipeline_printout_graph.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="mkdir.html" title="Chapter 9: Preparing directories for output with @mkdir()"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout_graph.html" title="Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-output-file-names-chapter-num-specifying-output-file-names-with-formatter-and-regex">
+<span id="new-manual-output-file-names"></span><span id="index-0"></span><h1><strong>Chapter 8</strong>: Specifying output file names with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> and <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a><a class="headerlink" href="#new-manual-output-file-names-chapter-num-specifying-output-file-names-with-formatte [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> syntax</li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="output_file_names_code.html#new-manual-output-file-names-code"><em>Chapter 8: Python Code for Specifying output file names with formatter() and regex()</em></a></li>
+</ul>
+</div>
+<div class="section" id="review">
+<h2>Review<a class="headerlink" href="#review" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/theoretical_pipeline_schematic.png"><img alt="../../_images/theoretical_pipeline_schematic.png" src="../../_images/theoretical_pipeline_schematic.png" style="width: 610.0px; height: 71.0px;" /></a>
+<p>Computational pipelines transform your data in stages until the final result is produced.
+The most straightforward way to use Ruffus is to hold the intermediate results after each stage
+in a series of files with related file names.</p>
+<p>Part of telling Ruffus how these pipeline stages or <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions are connected
+together is to write simple rules for how to the file names for each stage follow on from each other.
+Ruffus helps you to specify these file naming rules.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>The best way to design a pipeline is to:</strong></p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><strong>Write down the file names of the data as it flows across your pipeline.</strong>
+Do these file names follow a <em>pattern</em> ?</li>
+<li><strong>Write down the names of functions which transforms the data at each stage of the pipeline.</strong></li>
+</ul>
+</div></blockquote>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="a-different-file-name-suffix-for-each-pipeline-stage">
+<span id="new-manual-suffix"></span><h2>A different file name <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> for each pipeline stage<a class="headerlink" href="#a-different-file-name-suffix-for-each-pipeline-stage" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The easiest and cleanest way to write Ruffus pipelines is to use a different suffix
+for each stage of your pipeline.</p>
+<p>We used this approach in <a class="reference internal" href="introduction.html#new-manual-introduction"><em>Chapter 1: An introduction to basic Ruffus syntax</em></a> and in <a class="reference internal" href="transform_in_parallel_code.html#new-manual-transform-in-parallel-code"><em>code</em></a> from <a class="reference internal" href="transform_in_parallel.html#new-manual-transform-in-parallel"><em>Chapter 3: More on @transform-ing data</em></a>:</p>
+<div class="highlight-bash"><div class="highlight"><pre><span class="hll"> <span class="c">#Task Name: File suffices</span>
+</span> _________________________ ______________________
+ create_initial_file_pairs *.start
+ first_task *.output.1
+ second_task *.output.2
+</pre></div>
+</div>
+<p>There is a long standing convention of using file suffices to denote file type: For example, a <strong>“compile”</strong> task might convert <strong>source</strong> files of type <tt class="docutils literal"><span class="pre">*.c</span></tt> to <strong>object</strong> files of type <tt class="docutils literal"><span class="pre">*.o</span></tt>.</p>
+<dl class="docutils">
+<dt>We can think of Ruffus tasks comprising :</dt>
+<dd><ul class="first last simple">
+<li>recipes in <tt class="docutils literal"><span class="pre">@transform(...)</span></tt> for transforming file names: changing <tt class="docutils literal"><span class="pre">.c</span></tt> to a <tt class="docutils literal"><span class="pre">.o</span></tt> (e.g. <tt class="docutils literal"><span class="pre">AA.c</span> <span class="pre">-></span> <span class="pre">AA.o</span></tt> <tt class="docutils literal"><span class="pre">BB.c</span> <span class="pre">-></span> <span cla [...]
+<li>recipes in a task function <tt class="docutils literal"><span class="pre">def</span> <span class="pre">foo_bar()</span></tt> for transforming your data: from <strong>source</strong> <tt class="docutils literal"><span class="pre">.c</span></tt> to <strong>object</strong> <tt class="docutils literal"><span class="pre">.o</span></tt></li>
+</ul>
+</dd>
+</dl>
+<p>Let us review the Ruffus syntax for doing this:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="hll">@transform<span class="o">(</span> create_initial_file_pairs, <span class="c"># Input: Name of previous task(s)</span>
+</span><span class="hll"> suffix<span class="o">(</span><span class="s2">".start"</span><span class="o">)</span>, <span class="c"># Matching suffix</span>
+</span><span class="hll"> <span class="s2">".output.1"</span><span class="o">)</span> <span class="c"># Replacement string</span>
+</span>def first_task<span class="o">(</span>input_files, output_file<span class="o">)</span>:
+ with open<span class="o">(</span>output_file, <span class="s2">"w"</span><span class="o">)</span>: pass
+</pre></div>
+</div>
+</div></blockquote>
+<ol class="arabic">
+<li><p class="first"><strong>Input</strong>:</p>
+<blockquote>
+<div><dl class="docutils">
+<dt>The first parameter for <tt class="docutils literal"><span class="pre">@transform</span></tt> can be a mixture of one or more:</dt>
+<dd><ul class="first last simple">
+<li>previous tasks (e.g. <tt class="docutils literal"><span class="pre">create_initial_file_pairs</span></tt>)</li>
+<li>file names (all python strings are treated as paths)</li>
+<li>glob specifications (e.g <tt class="docutils literal"><span class="pre">*.c</span></tt>, <tt class="docutils literal"><span class="pre">/my/path/*.foo</span></tt>)</li>
+</ul>
+</dd>
+</dl>
+<p>Each element provides an input for the task. So if the previous task <tt class="docutils literal"><span class="pre">create_initial_file_pairs</span></tt> has five outputs, the next <tt class="docutils literal"><span class="pre">@transform</span></tt> task will accept
+these as five separate inputs leading to five independent jobs.</p>
+</div></blockquote>
+</li>
+<li><p class="first"><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a>:</p>
+<blockquote>
+<div><p>The second parameter <tt class="docutils literal"><span class="pre">suffix(".start")</span></tt> must match the end of the first string in each input.
+For example, <tt class="docutils literal"><span class="pre">create_initial_file_pairs</span></tt> produces the list <tt class="docutils literal"><span class="pre">['job1.a.start',</span> <span class="pre">'job1.b.start']</span></tt>, then <tt class="docutils literal"><span class="pre">suffix(".start")</span></tt> must matches the first string, i.e. <tt class="docutils literal"><span class="pre">'job1.a.start'</span></tt>.
+If the input is nested structure, this would be iterated through recursively to find the first string.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Inputs which do not match the suffix are discarded altogether.</p>
+</div>
+</div></blockquote>
+</li>
+<li><p class="first"><strong>Replacement</strong>:</p>
+<blockquote>
+<div><p>The third parameter is the replacement for the suffix.
+The pair of input strings in the step3 example produces the following output parameter</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">input_parameters</span> <span class="o">=</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">]</span>
+<span class="n">matching_input</span> <span class="o">=</span> <span class="s">'job1.a.start'</span>
+<span class="n">output_parameter</span> <span class="o">=</span> <span class="s">'job1.a.output.1'</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>When the pipeline is run, this results in the following equivalent call to <tt class="docutils literal"><span class="pre">first_task(...)</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">first_task</span><span class="p">([</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span> <span class="s">'job1.a.output.1'</span><span class="p">):</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The replacement parameter can itself be a list or any arbitrary complicated structure:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="hll">@transform<span class="o">(</span>create_initial_file_pairs, <span class="c"># Input</span>
+</span><span class="hll"> suffix<span class="o">(</span><span class="s2">".a.start"</span><span class="o">)</span>, <span class="c"># Matching suffix</span>
+</span><span class="hll"> <span class="o">[</span><span class="s2">".output.a.1"</span>, <span class="s2">".output.b.1"</span>, 45<span class="o">])</span> <span class="c"># Replacement list</span>
+</span>def first_task<span class="o">(</span>input_files, output_parameters<span class="o">)</span>:
+ print <span class="s2">"input_parameters = "</span>, input_files
+ print <span class="s2">"output_parameters = "</span>, output_parameters
+</pre></div>
+</div>
+</div></blockquote>
+<p>In which case, all the strings are used as replacements, other values are left untouched, and we obtain the following:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="hll"><span class="c"># job #1</span>
+</span><span class="nv">input</span> <span class="o">=</span> <span class="o">[</span><span class="s1">'job1.a.start'</span>, <span class="s1">'job1.b.start'</span><span class="o">]</span>
+<span class="nv">output</span> <span class="o">=</span> <span class="o">[</span><span class="s1">'job1.output.a.1'</span>, <span class="s1">'job1.output.b.1'</span>, 45<span class="o">]</span>
+
+<span class="hll"><span class="c"># job #2</span>
+</span><span class="nv">input</span> <span class="o">=</span> <span class="o">[</span><span class="s1">'job2.a.start'</span>, <span class="s1">'job2.b.start'</span><span class="o">]</span>
+<span class="nv">output</span> <span class="o">=</span> <span class="o">[</span><span class="s1">'job2.output.a.1'</span>, <span class="s1">'job2.output.b.1'</span>, 45<span class="o">]</span>
+
+<span class="hll"><span class="c"># job #3</span>
+</span><span class="nv">input</span> <span class="o">=</span> <span class="o">[</span><span class="s1">'job3.a.start'</span>, <span class="s1">'job3.b.start'</span><span class="o">]</span>
+<span class="nv">output</span> <span class="o">=</span> <span class="o">[</span><span class="s1">'job3.output.a.1'</span>, <span class="s1">'job3.output.b.1'</span>, 45<span class="o">]</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Note how task function is called with the value <tt class="docutils literal"><span class="pre">45</span></tt> <em>verbatim</em> because it is not a string.</p>
+</div></blockquote>
+</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="formatter-manipulates-pathnames-and-regular-expression">
+<span id="new-manual-formatter"></span><h2><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> manipulates pathnames and regular expression<a class="headerlink" href="#formatter-manipulates-pathnames-and-regular-expression" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> replacement is the cleanest and easiest way to generate suitable output file names for each stage in a pipeline.
+Often, however, we require more complicated manipulations to specify our file names.
+For example,</p>
+<blockquote>
+<div><ul class="simple">
+<li>It is common to have to change directories from a <em>data</em> directory to a <em>working</em> directory as the first step of a pipeline.</li>
+<li>Data management can be simplified by separate files from each pipeline stage into their own directory.</li>
+<li>Information may have to be decoded from data file names, e.g. <tt class="docutils literal"><span class="pre">"experiment373.IBM.03March2002.txt"</span></tt></li>
+</ul>
+</div></blockquote>
+<p>Though <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> is much more powerful, the principle and syntax are the same:
+we take string elements from the <strong>Input</strong> and perform some replacements to generate the <strong>Output</strong> parameters.</p>
+<p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a></p>
+<blockquote>
+<div><ul class="simple">
+<li>Allows easy manipulation of path subcomponents in the style of <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.split">os.path.split()</a>, and <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.basename">os.path.basename</a></li>
+<li>Uses familiar python <a class="reference external" href="http://docs.python.org/2/library/string.html#string-formatting">string.format</a> syntax (See <a class="reference external" href="http://docs.python.org/2/library/string.html#format-examples">string.format examples</a>. )</li>
+<li>Supports optional regular expression (<a class="reference external" href="http://docs.python.org/2/library/re.html#re.MatchObject.group">re</a>) matches including named captures.</li>
+<li>Can refer to any file path (i.e. python string) in each input and is not limited like <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> to the first string.</li>
+<li>Can even refer to individual letters within a match</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="path-name-components">
+<h3>Path name components<a class="headerlink" href="#path-name-components" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> breaks down each input pathname into path name components which can then be recombined in whichever way by the replacement string.</p>
+<p>Given an example string of :</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">input_string</span> <span class="o">=</span> <span class="s">"/directory/to/a/file.name.ext"</span>
+<span class="n">formatter</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>the path components are:</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">basename</span></tt>: The <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.basename">base name</a> <em>excluding</em> <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.splitext">extension</a>, <tt class="docutils literal"><span class="pre">"file.name"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ext</span></tt> : The <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.splitext">extension</a>, <tt class="docutils literal"><span class="pre">".ext"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">path</span></tt> : The <a class="reference external" href="http://docs.python.org/2/library/os.path.html#os.path.dirname">dirname</a>, <tt class="docutils literal"><span class="pre">"/directory/to/a"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">subdir</span></tt> : A list of sub-directories in the <tt class="docutils literal"><span class="pre">path</span></tt> in reverse order, <tt class="docutils literal"><span class="pre">["a",</span> <span class="pre">"to",</span> <span class="pre">"directory",</span> <span class="pre">"/"]</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">subpath</span></tt> : A list of descending sub-paths in reverse order, <tt class="docutils literal"><span class="pre">["/directory/to/a",</span> <span class="pre">"/directory/to",</span> <span class="pre">"/directory",</span> <span class="pre">"/"]</span></tt></li>
+</ul>
+</div></blockquote>
+<p>The replacement string refers to these components by using python <a class="reference external" href="http://docs.python.org/2/library/string.html#string-formatting">string.format</a> style curly braces. <tt class="docutils literal"><span class="pre">"{NAME}"</span></tt></p>
+<p>We refer to an element from the Nth input string by index, for example:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">"{ext[0]}"</span></tt> is the extension of the first file name string in <strong>Input</strong>.</li>
+<li><tt class="docutils literal"><span class="pre">"{basename[1]}"</span></tt> is the basename of the second file name in <strong>Input</strong>.</li>
+<li><tt class="docutils literal"><span class="pre">"{basename[1][0:3]}"</span></tt> are the first three letters from the basename of the second file name in <strong>Input</strong>.</li>
+</ul>
+</div></blockquote>
+<p><tt class="docutils literal"><span class="pre">subdir</span></tt>, <tt class="docutils literal"><span class="pre">subpath</span></tt> were designed to help you navigate directory hierachies with the minimum of fuss.
+For example, you might want to graft a hierachical path to another location:
+<tt class="docutils literal"><span class="pre">"{subpath[0][2]}/from/{subdir[0][0]}/{basename[0]}"</span></tt> neatly replaces just one directory (<tt class="docutils literal"><span class="pre">"to"</span></tt>) in the path with another (<tt class="docutils literal"><span class="pre">"from"</span></tt>):</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">replacement_string</span> <span class="o">=</span> <span class="s">"{subpath[0][2]}/from/{subdir[0][0]}/{basename[0]}"</span>
+
+<span class="n">input_string</span> <span class="o">=</span> <span class="s">"/directory/to/a/file.name.ext"</span>
+<span class="n">result_string</span> <span class="o">=</span> <span class="s">"/directory/from/a/file.name.ext"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="filter-and-parse-using-regular-expressions">
+<span id="new-manual-formatter-regex"></span><h3>Filter and parse using regular expressions<a class="headerlink" href="#filter-and-parse-using-regular-expressions" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p><a class="reference external" href="http://docs.python.org/2/library/re.html#re.MatchObject.group">Regular expression</a> matches can be used with the similar syntax.
+Our example string can be parsed using the following regular expression:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">input_string</span> <span class="o">=</span> <span class="s">"/directory/to/a/file.name.ext"</span>
+<span class="n">formatter</span><span class="p">(</span><span class="s">r"/directory/(.+)/(?P<MYFILENAME>)\.ext"</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>We capture part of the path using <tt class="docutils literal"><span class="pre">(.+)</span></tt>, and the base name using <tt class="docutils literal"><span class="pre">(?P<MYFILENAME>)</span></tt>.
+These <a class="reference external" href="http://docs.python.org/2/library/re.html#re.MatchObject.group">matching subgroups</a> can be referred to by index
+but for greater clarity the second named capture can also be referred to by name, i.e. <tt class="docutils literal"><span class="pre">{MYFILENAME}</span></tt>.</p>
+</div></blockquote>
+<p>The regular expression components for the first string can thus be referred to as follows:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre">{0[0]}</span></tt> : The entire match captured by index, <tt class="docutils literal"><span class="pre">"/directory/to/a/file.name.ext"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">{1[0]}</span></tt> : The first match captured by index, <tt class="docutils literal"><span class="pre">"to/a"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">{2[0]}</span></tt> : The second match captured by index, <tt class="docutils literal"><span class="pre">"file.name"</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">{MYFILENAME[0]}</span></tt> : The match captured by name, <tt class="docutils literal"><span class="pre">"file.name"</span></tt></li>
+</ul>
+</div></blockquote>
+<p>If each input consists of a list of paths such as <tt class="docutils literal"><span class="pre">['job1.a.start',</span> <span class="pre">'job1.b.start',</span> <span class="pre">'job1.c.start']</span></tt>, we can match each of them separately
+by using as many regular expressions as necessary. For example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">input_string</span> <span class="o">=</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">,</span> <span class="s">'job1.c.start'</span><span class="p">]</span>
+<span class="c"># Regular expression matches for 1st, 2nd but not 3rd element</span>
+<span class="n">formatter</span><span class="p">(</span><span class="s">".+a.start"</span><span class="p">,</span> <span class="s">"b.start$"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Or if you only wanted regular expression matches for the second file name (string), pad with <tt class="docutils literal"><span class="pre">None</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">input_string</span> <span class="o">=</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">,</span> <span class="s">'job1.c.start'</span><span class="p">]</span>
+<span class="c"># Regular expression matches for 2nd but not 1st or 3rd elements</span>
+<span class="n">formatter</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"b.start$"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="using-transform-with-formatter">
+<h3>Using <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform()</em></a> with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a><a class="headerlink" href="#using-transform-with-formatter" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>We can put these together in the following example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># create initial files</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.c.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># formatter</span>
+<span class="c">#</span>
+
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="c"># Input</span>
+
+<span class="hll"> <span class="n">formatter</span><span class="p">(</span><span class="s">".+/job(?P<JOBNUMBER>\d+).a.start"</span><span class="p">,</span> <span class="c"># Extract job number</span>
+</span><span class="hll"> <span class="s">".+/job[123].b.start"</span><span class="p">),</span> <span class="c"># Match only "b" files</span>
+</span>
+ <span class="p">[</span><span class="s">"{path[0]}/jobs{JOBNUMBER[0]}.output.a.1"</span><span class="p">,</span> <span class="c"># Replacement list</span>
+ <span class="s">"{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"</span><span class="p">,</span> <span class="mi">45</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_parameters</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"input_parameters = "</span><span class="p">,</span> <span class="n">input_files</span>
+ <span class="k">print</span> <span class="s">"output_parameters = "</span><span class="p">,</span> <span class="n">output_parameters</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This produces:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="go">input_parameters = ['job1.a.start',</span>
+<span class="go"> 'job1.b.start']</span>
+<span class="go">output_parameters = ['/home/lg/src/temp/jobs1.output.a.1',</span>
+<span class="go"> '/home/lg/src/temp/jobs1.output.b.1', 45]</span>
+
+<span class="go">input_parameters = ['job2.a.start',</span>
+<span class="go"> 'job2.b.start']</span>
+<span class="go">output_parameters = ['/home/lg/src/temp/jobs2.output.a.1',</span>
+<span class="go"> '/home/lg/src/temp/jobs2.output.b.1', 45]</span>
+</pre></div>
+</div>
+<p>Notice that <tt class="docutils literal"><span class="pre">job3</span></tt> has <tt class="docutils literal"><span class="pre">'job3.c.start'</span></tt> as the second file.
+This fails to match the regular expression and is discarded.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Failed regular expression mismatches are ignored.</p>
+<p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> regular expressions are thus very useful in filtering out all
+files which do not match your specified criteria.</p>
+<p class="last">If your some of your task inputs have a mixture of different file types, a simple <tt class="docutils literal"><span class="pre">Formatter(".txt$")</span></tt>, for example, will make
+your code a lot simpler...</p>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="string-substitution-for-extra-arguments">
+<h3>string substitution for “extra” arguments<a class="headerlink" href="#string-substitution-for-extra-arguments" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The first two arguments for Ruffus task functions are special because they are the <strong>Input</strong> and <strong>Output</strong>
+parameters which link different stages of a pipeline.</p>
+<p>Python strings in these arguments are names of data files whose modification times indicate whether the pipeline is up to date or not.</p>
+<p>Other arguments to task functions are not passed down the pipeline but consumed.
+Any python strings they contain do not need to be file names. These extra arguments are very useful
+for passing data to pipelined tasks, such as shared values, loggers, programme options etc.</p>
+<p>One helpful feature is that strings in these extra arguments are also subject to <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> string substitution.
+This means you can leverage the parsing capabilities of Ruffus to decode any information about the pipeline data files,
+These might include the directories you are running in and parts of the file name.</p>
+<p>For example, if we would want to know which files go with which “job number” in the previous example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># create initial files</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.c.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># print job number as an extra argument</span>
+<span class="c">#</span>
+
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/job(?P<JOBNUMBER>\d+).a.start"</span><span class="p">,</span> <span class="c"># Extract job number</span>
+<span class="hll"> <span class="s">".+/job[123].b.start"</span><span class="p">),</span> <span class="c"># Match only "b" files</span>
+</span><span class="hll">
+</span> <span class="p">[</span><span class="s">"{path[0]}/jobs{JOBNUMBER[0]}.output.a.1"</span><span class="p">,</span> <span class="c"># Replacement list</span>
+ <span class="s">"{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"</span><span class="p">],</span>
+
+ <span class="s">"{JOBNUMBER[0]}"</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_parameters</span><span class="p">,</span> <span class="n">job_number</span><span class="p">):</span>
+ <span class="k">print</span> <span class="n">job_number</span><span class="p">,</span> <span class="s">":"</span><span class="p">,</span> <span class="n">input_files</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">1 : ['job1.a.start', 'job1.b.start']</span>
+<span class="go">2 : ['job2.a.start', 'job2.b.start']</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="changing-directories-using-formatter-in-a-zoo">
+<span id="new-manual-output-file-names-formatter-zoo"></span><h3>Changing directories using <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> in a zoo...<a class="headerlink" href="#changing-directories-using-formatter-in-a-zoo" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Here is a more fun example. We would like to feed the denizens of a zoo. Unfortunately, the file names for
+these are spread over several directories. Ideally, we would like their food supply to be grouped more
+sensibly. And, of course, we only want to feed the animals, not the plants.</p>
+<p>I have colour coded the input and output files for this task to show how we would like to rearrange them:</p>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/simple_tutorial_zoo_animals_formatter_example.jpg"><img alt="../../_images/simple_tutorial_zoo_animals_formatter_example.jpg" src="../../_images/simple_tutorial_zoo_animals_formatter_example.jpg" style="width: 915.5px; height: 208.5px;" /></a>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># Make directories</span>
+<span class="nd">@mkdir</span><span class="p">([</span><span class="s">"tiger"</span><span class="p">,</span> <span class="s">"lion"</span><span class="p">,</span> <span class="s">"dog"</span><span class="p">,</span> <span class="s">"crocodile"</span><span class="p">,</span> <span class="s">"rose"</span><span class="p">])</span>
+
+<span class="nd">@originate</span><span class="p">(</span>
+<span class="hll"> <span class="c"># List of animals and plants</span>
+</span> <span class="p">[</span> <span class="s">"tiger/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.handreared.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.tame.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"crocodile/reptiles.wild.animals"</span><span class="p">,</span>
+ <span class="s">"rose/flowering.handreared.plants"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+<span class="hll"> <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+</span>
+ <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+<span class="hll"> <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+</span><span class="hll"> <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+</span><span class="hll"> <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+</span><span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>We can see that the food for each animal are now grouped by clade in the same directory, which makes a lot more sense...</p>
+<p>Note how we used <tt class="docutils literal"><span class="pre">subpath[0][1]</span></tt> to move down one level of the file path to build a new file name.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">Food for the wild crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles</span>
+<span class="go">Food for the tame dog = ./mammals/tame.dog.food will be placed in ./mammals</span>
+<span class="go">Food for the wild dog = ./mammals/wild.dog.food will be placed in ./mammals</span>
+<span class="go">Food for the handreared lion = ./mammals/handreared.lion.food will be placed in ./mammals</span>
+<span class="go">Food for the wild lion = ./mammals/wild.lion.food will be placed in ./mammals</span>
+<span class="go">Food for the wild tiger = ./mammals/wild.tiger.food will be placed in ./mammals</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="regex-manipulates-via-regular-expressions">
+<span id="new-manual-regex"></span><h2><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> manipulates via regular expressions<a class="headerlink" href="#regex-manipulates-via-regular-expressions" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If you are a hard core regular expressions fan, you may want to use <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> instead of <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> or <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a>.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> uses regular expressions like <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> but</p>
+<blockquote>
+<div><ul class="simple">
+<li>It only matches the first file name in the input. As described above, <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> can match any one or more of the input filename strings.</li>
+<li>It does not understand file paths so you may have to perform your own directory / file name parsing.</li>
+<li>String replacement uses syntax borrowed from <a class="reference external" href="http://docs.python.org/2/library/re.html#re.sub">re.sub()</a>, rather than building a result from parsed regular expression (and file path) components</li>
+</ul>
+</div></blockquote>
+<p class="last">In general <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> is more powerful and was introduced from version 2.4 is intended to be a more user friendly replacement for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a>.</p>
+</div>
+<p>Let us see how the previous zoo example looks with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a>:</p>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> code:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+<span class="hll"> <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+</span>
+<span class="hll"> <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+</span>
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+</pre></div>
+</div>
+<p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> code:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+<span class="hll"> <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+</span>
+<span class="hll"> <span class="s">r"\1/\g<clade>/\g<tame>.\2.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+</span>
+ <span class="s">r"\1/\g<clade>"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">r"\2"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"\g<tame>"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The regular expression to parse the input file path safely was a bit hairy to write, and it is not
+clear that it handles all edge conditions (e.g. files in the root directory). Apart from that, if the
+limitations of <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> do not preclude its use, then the two approaches
+are not so different in practice.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 8</strong>: Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a><ul>
+<li><a class="reference internal" href="#review">Review</a></li>
+<li><a class="reference internal" href="#a-different-file-name-suffix-for-each-pipeline-stage">A different file name <tt class="docutils literal"><span class="pre">suffix()</span></tt> for each pipeline stage</a></li>
+<li><a class="reference internal" href="#formatter-manipulates-pathnames-and-regular-expression"><tt class="docutils literal"><span class="pre">formatter()</span></tt> manipulates pathnames and regular expression</a><ul>
+<li><a class="reference internal" href="#path-name-components">Path name components</a></li>
+<li><a class="reference internal" href="#filter-and-parse-using-regular-expressions">Filter and parse using regular expressions</a></li>
+<li><a class="reference internal" href="#using-transform-with-formatter">Using <tt class="docutils literal"><span class="pre">@transform()</span></tt> with <tt class="docutils literal"><span class="pre">formatter()</span></tt></a></li>
+<li><a class="reference internal" href="#string-substitution-for-extra-arguments">string substitution for “extra” arguments</a></li>
+<li><a class="reference internal" href="#changing-directories-using-formatter-in-a-zoo">Changing directories using <tt class="docutils literal"><span class="pre">formatter()</span></tt> in a zoo...</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#regex-manipulates-via-regular-expressions"><tt class="docutils literal"><span class="pre">regex()</span></tt> manipulates via regular expressions</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="pipeline_printout_graph.html"
+ title="previous chapter"><strong>Chapter 7</strong>: Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="mkdir.html"
+ title="next chapter"><strong>Chapter 9</strong>: Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/output_file_names.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="mkdir.html" title="Chapter 9: Preparing directories for output with @mkdir()"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout_graph.html" title="Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...)"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/output_file_names_code.html b/doc/_build/html/tutorials/new_tutorial/output_file_names_code.html
new file mode 100644
index 0000000..d5d27ec
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/output_file_names_code.html
@@ -0,0 +1,432 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 8: Python Code for Specifying output file names with formatter() and regex() — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 9: Python Code for Preparing directories for output with @mkdir()" href="mkdir_code.html" />
+ <link rel="prev" title="Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)" href="pipeline_printout_graph_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="mkdir_code.html" title="Chapter 9: Python Code for Preparing directories for output with @mkdir()"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout_graph_code.html" title="Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-output-file-names-chapter-num-python-code-for-specifying-output-file-names-with-formatter-and-regex">
+<span id="new-manual-output-file-names-code"></span><h1><strong>Chapter 8</strong>: Python Code for Specifying output file names with <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> and <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a><a class="headerlink" href="#new-manual-output-file-names-chapter-num-python-code-for-specifying-output-file-names-wi [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> syntax</li>
+<li>Back to <strong>Chapter 8</strong>: <a class="reference internal" href="output_file_names.html#new-manual-output-file-names"><em>Specifying output file names</em></a></li>
+</ul>
+</div>
+<div class="section" id="example-code-for-suffix">
+<h2>Example Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix()</em></a><a class="headerlink" href="#example-code-for-suffix" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># create initial files</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># suffix</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="c"># name of previous task(s) (or list of files, or a glob)</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="c"># matching suffix of the "input file"</span>
+ <span class="p">[</span><span class="s">".output.a.1"</span><span class="p">,</span> <span class="mi">45</span><span class="p">,</span> <span class="s">".output.b.1"</span><span class="p">])</span> <span class="c"># resulting suffix</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_parameters</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" input_parameters = "</span><span class="p">,</span> <span class="n">input_files</span>
+ <span class="k">print</span> <span class="s">" output_parameters = "</span><span class="p">,</span> <span class="n">output_parameters</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">first_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-formatter">
+<h2>Example Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a><a class="headerlink" href="#example-code-for-formatter" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># create initial files</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.c.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># formatter</span>
+<span class="c">#</span>
+
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/job(?P<JOBNUMBER>\d+).a.start"</span><span class="p">,</span> <span class="c"># Extract job number</span>
+ <span class="s">".+/job[123].b.start"</span><span class="p">),</span> <span class="c"># Match only "b" files</span>
+
+ <span class="p">[</span><span class="s">"{path[0]}/jobs{JOBNUMBER[0]}.output.a.1"</span><span class="p">,</span> <span class="c"># Replacement list</span>
+ <span class="s">"{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"</span><span class="p">,</span> <span class="mi">45</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_parameters</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"input_parameters = "</span><span class="p">,</span> <span class="n">input_files</span>
+ <span class="k">print</span> <span class="s">"output_parameters = "</span><span class="p">,</span> <span class="n">output_parameters</span>
+
+
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-formatter-with-replacements-in-extra-arguments">
+<h2>Example Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> with replacements in <em>extra</em> arguments<a class="headerlink" href="#example-code-for-formatter-with-replacements-in-extra-arguments" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># create initial files</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.c.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># print job number as an extra argument</span>
+<span class="c">#</span>
+
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/job(?P<JOBNUMBER>\d+).a.start"</span><span class="p">,</span> <span class="c"># Extract job number</span>
+ <span class="s">".+/job[123].b.start"</span><span class="p">),</span> <span class="c"># Match only "b" files</span>
+
+ <span class="p">[</span><span class="s">"{path[0]}/jobs{JOBNUMBER[0]}.output.a.1"</span><span class="p">,</span> <span class="c"># Replacement list</span>
+ <span class="s">"{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"</span><span class="p">],</span>
+
+ <span class="s">"{JOBNUMBER[0]}"</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_parameters</span><span class="p">,</span> <span class="n">job_number</span><span class="p">):</span>
+ <span class="k">print</span> <span class="n">job_number</span><span class="p">,</span> <span class="s">":"</span><span class="p">,</span> <span class="n">input_files</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-formatter-in-zoos">
+<h2>Example Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-formatter"><em>formatter()</em></a> in Zoos<a class="headerlink" href="#example-code-for-formatter-in-zoos" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># Make directories</span>
+<span class="nd">@mkdir</span><span class="p">([</span><span class="s">"tiger"</span><span class="p">,</span> <span class="s">"lion"</span><span class="p">,</span> <span class="s">"dog"</span><span class="p">,</span> <span class="s">"crocodile"</span><span class="p">,</span> <span class="s">"rose"</span><span class="p">])</span>
+
+<span class="nd">@originate</span><span class="p">(</span>
+ <span class="c"># List of animals and plants</span>
+ <span class="p">[</span> <span class="s">"tiger/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.handreared.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.tame.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"crocodile/reptiles.wild.animals"</span><span class="p">,</span>
+ <span class="s">"rose/flowering.handreared.plants"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+<span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">formatter</span><span class="p">(</span><span class="s">".+/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">"{subpath[0][1]}/{clade[0]}"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">"{subdir[0][0]}"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"{tame[0]}"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+
+<span class="n">Results</span> <span class="ow">in</span><span class="p">:</span>
+
+<span class="p">::</span>
+
+ <span class="o">>>></span> <span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">crocodile</span> <span class="o">=</span> <span class="o">./</span><span class="n">reptiles</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">crocodile</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="o [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">tame</span> <span class="n">dog</span> <span class="o">=</span> <span class="o">./</span><span class="n">mammals</span><span class="o">/</span><span class="n">tame</span><span class="o">.</span><span class="n">dog</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="o [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">dog</span> <span class="o">=</span> <span class="o">./</span><span class="n">mammals</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">dog</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="o [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">handreared</span> <span class="n">lion</span> <span class="o">=</span> <span class="o">./</span><span class="n">mammals</span><span class="o">/</span><span class="n">handreared</span><span class="o">.</span><span class="n">lion</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="o [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">lion</span> <span class="o">=</span> <span class="o">./</span><span class="n">mammals</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">lion</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="o [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">tiger</span> <span class="o">=</span> <span class="o">./</span><span class="n">mammals</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">tiger</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="o [...]
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="example-code-for-regex-in-zoos">
+<h2>Example Code for <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-regex"><em>regex()</em></a> in zoos<a class="headerlink" href="#example-code-for-regex-in-zoos" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c"># Make directories</span>
+<span class="nd">@mkdir</span><span class="p">([</span><span class="s">"tiger"</span><span class="p">,</span> <span class="s">"lion"</span><span class="p">,</span> <span class="s">"dog"</span><span class="p">,</span> <span class="s">"crocodile"</span><span class="p">,</span> <span class="s">"rose"</span><span class="p">])</span>
+
+<span class="nd">@originate</span><span class="p">(</span>
+ <span class="c"># List of animals and plants</span>
+ <span class="p">[</span> <span class="s">"tiger/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"lion/mammals.handreared.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.tame.animals"</span><span class="p">,</span>
+ <span class="s">"dog/mammals.wild.animals"</span><span class="p">,</span>
+ <span class="s">"crocodile/reptiles.wild.animals"</span><span class="p">,</span>
+ <span class="s">"rose/flowering.handreared.plants"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+
+
+<span class="c"># Put different animals in different directories depending on their clade</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="c"># Input</span>
+
+ <span class="n">regex</span><span class="p">(</span><span class="s">r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"</span><span class="p">),</span> <span class="c"># Only animals: ignore plants!</span>
+
+ <span class="s">r"\1/\g<clade>/\g<tame>.\2.food"</span><span class="p">,</span> <span class="c"># Replacement</span>
+
+ <span class="s">r"\1/\g<clade>"</span><span class="p">,</span> <span class="c"># new_directory</span>
+ <span class="s">r"\2"</span><span class="p">,</span> <span class="c"># animal_name</span>
+ <span class="s">"\g<tame>"</span><span class="p">)</span> <span class="c"># tameness</span>
+<span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">new_directory</span><span class="p">,</span> <span class="n">animal_name</span><span class="p">,</span> <span class="n">tameness</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">"Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+
+<span class="n">Results</span> <span class="ow">in</span><span class="p">:</span>
+
+<span class="p">::</span>
+
+ <span class="o">>>></span> <span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">crocodile</span> <span class="o">=</span> <span class="n">reptiles</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">crocodile</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="ow">in</span> <span class= [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">tame</span> <span class="n">dog</span> <span class="o">=</span> <span class="n">mammals</span><span class="o">/</span><span class="n">tame</span><span class="o">.</span><span class="n">dog</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="ow">in</span> <span class= [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">dog</span> <span class="o">=</span> <span class="n">mammals</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">dog</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="ow">in</span> <span class= [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">handreared</span> <span class="n">lion</span> <span class="o">=</span> <span class="n">mammals</span><span class="o">/</span><span class="n">handreared</span><span class="o">.</span><span class="n">lion</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="ow">in</span> <span class= [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">lion</span> <span class="o">=</span> <span class="n">mammals</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">lion</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="ow">in</span> <span class= [...]
+ <span class="n">Food</span> <span class="k">for</span> <span class="n">the</span> <span class="n">wild</span> <span class="n">tiger</span> <span class="o">=</span> <span class="n">mammals</span><span class="o">/</span><span class="n">wild</span><span class="o">.</span><span class="n">tiger</span><span class="o">.</span><span class="n">food</span> <span class="n">will</span> <span class="n">be</span> <span class="n">placed</span> <span class="ow">in</span> <span class= [...]
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 8</strong>: Python Code for Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a><ul>
+<li><a class="reference internal" href="#example-code-for-suffix">Example Code for <tt class="docutils literal"><span class="pre">suffix()</span></tt></a></li>
+<li><a class="reference internal" href="#example-code-for-formatter">Example Code for <tt class="docutils literal"><span class="pre">formatter()</span></tt></a></li>
+<li><a class="reference internal" href="#example-code-for-formatter-with-replacements-in-extra-arguments">Example Code for <tt class="docutils literal"><span class="pre">formatter()</span></tt> with replacements in <em>extra</em> arguments</a></li>
+<li><a class="reference internal" href="#example-code-for-formatter-in-zoos">Example Code for <tt class="docutils literal"><span class="pre">formatter()</span></tt> in Zoos</a></li>
+<li><a class="reference internal" href="#example-code-for-regex-in-zoos">Example Code for <tt class="docutils literal"><span class="pre">regex()</span></tt> in zoos</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="pipeline_printout_graph_code.html"
+ title="previous chapter"><strong>Chapter 7</strong>: Python Code for Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="mkdir_code.html"
+ title="next chapter"><strong>Chapter 9</strong>: Python Code for Preparing directories for output with <tt class="docutils literal"><span class="pre">@mkdir()</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/output_file_names_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="mkdir_code.html" title="Chapter 9: Python Code for Preparing directories for output with @mkdir()"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout_graph_code.html" title="Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/parallel.html b/doc/_build/html/tutorials/new_tutorial/parallel.html
new file mode 100644
index 0000000..0ddcf08
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/parallel.html
@@ -0,0 +1,246 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 22: Esoteric: Running jobs in parallel without files using @parallel — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate" href="check_if_uptodate.html" />
+ <link rel="prev" title="Chapter 21: Esoteric: Generating parameters on the fly with @files" href="onthefly.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 21: Esoteric: Generating parameters on the fly with @files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-parallel-chapter-num-esoteric-running-jobs-in-parallel-without-files-using-parallel">
+<span id="new-manual-deprecated-parallel"></span><span id="index-0"></span><h1><strong>Chapter 22</strong>: Esoteric: Running jobs in parallel without files using <a class="reference internal" href="../../decorators/parallel.html#decorators-parallel"><em>@parallel</em></a><a class="headerlink" href="#new-manual-parallel-chapter-num-esoteric-running-jobs-in-parallel-without-files-using-parallel" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/parallel.html#decorators-parallel"><em>@parallel</em></a> syntax in detail</li>
+</ul>
+</div>
+<div class="section" id="parallel">
+<h2><strong>@parallel</strong><a class="headerlink" href="#parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>@parallel</strong> supplies parameters for multiple <strong>jobs</strong> exactly like <a class="reference internal" href="deprecated_files.html#new-manual-deprecated-files"><em>@files</em></a> except that:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The first two parameters are not treated like <em>inputs</em> and <em>ouputs</em> parameters,
+and strings are not assumed to be file names</li>
+<li>Thus no checking of whether each job is up-to-date is made using <em>inputs</em> and <em>outputs</em> files</li>
+<li>No expansions of <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns or <em>output</em> from previous tasks is carried out.</li>
+</ol>
+</div></blockquote>
+<p>This syntax is most useful when a pipeline stage does not involve creating or consuming any files, and
+you wish to forego the conveniences of <a class="reference internal" href="deprecated_files.html#new-manual-deprecated-files"><em>@files</em></a>, <a class="reference internal" href="transform.html#new-manual-transform"><em>@transform</em></a> etc.</p>
+<p>The following code performs some arithmetic in parallel:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">sys</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'A'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span><span class="s">'B'</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">[</span><span class="s">'C'</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="c"># 3rd job</span>
+ <span class="p">]</span>
+<span class="nd">@parallel</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">parallel_task</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">param1</span><span class="p">,</span> <span class="n">param2</span><span class="p">):</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">" Parallel task </span><span class="si">%s</span><span class="s">: "</span> <span class="o">%</span> <span class="n">name</span><span class="p">)</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s"> + </span><span class="si">%d</span><span class="s"> = </span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">param1</span><span class="p">,</span> <span class="n" [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">parallel_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>produces the following:</p>
+<div class="highlight-python"><pre>Task = parallel_task
+ Parallel task A: 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ Parallel task B: 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ Parallel task C: 5 + 6 = 11
+ Job = ["C", 5, 6] completed</pre>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 22</strong>: Esoteric: Running jobs in parallel without files using <tt class="docutils literal"><span class="pre">@parallel</span></tt></a><ul>
+<li><a class="reference internal" href="#parallel"><strong>@parallel</strong></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="onthefly.html"
+ title="previous chapter"><strong>Chapter 21</strong>: Esoteric: Generating parameters on the fly with <tt class="docutils literal"><span class="pre">@files</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="check_if_uptodate.html"
+ title="next chapter"><strong>Chapter 23</strong>: Esoteric: Writing custom functions to decide which jobs are up to date with <tt class="docutils literal"><span class="pre">@check_if_uptodate</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/parallel.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="check_if_uptodate.html" title="Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="onthefly.html" title="Chapter 21: Esoteric: Generating parameters on the fly with @files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/pipeline_printout.html b/doc/_build/html/tutorials/new_tutorial/pipeline_printout.html
new file mode 100644
index 0000000..1168059
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/pipeline_printout.html
@@ -0,0 +1,381 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 5: Understanding how your pipeline works with pipeline_printout(...) — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 6: Running Ruffus from the command line with ruffus.cmdline" href="command_line.html" />
+ <link rel="prev" title="Chapter 4: Creating files with @originate" href="originate.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="command_line.html" title="Chapter 6: Running Ruffus from the command line with ruffus.cmdline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="originate.html" title="Chapter 4: Creating files with @originate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-pipeline-printout-chapter-num-understanding-how-your-pipeline-works-with-pipeline-printout">
+<span id="new-manual-pipeline-printout"></span><span id="index-0"></span><h1><strong>Chapter 5</strong>: Understanding how your pipeline works with <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a><a class="headerlink" href="#new-manual-pipeline-printout-chapter-num-understanding-how-your-pipeline-works-with-pipeline-printout" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> syntax</li>
+<li><a class="reference internal" href="pipeline_printout_code.html#new-manual-pipeline-printout-code"><em>Python Code for this chapter</em></a></li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<ul class="last simple">
+<li><strong>Whether you are learning or developing ruffus pipelines, your best friend is</strong> <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a>
+<strong>This shows the exact parameters and files as they are passed through the pipeline.</strong></li>
+<li><strong>We also</strong> <em>strongly</em> <strong>recommend you use the</strong> <tt class="docutils literal"><span class="pre">Ruffus.cmdline</span></tt> <strong>convenience module which</strong>
+<strong>will take care of all the command line arguments for you. See</strong> <a class="reference internal" href="command_line.html#new-manual-cmdline"><em>Chapter 6: Running Ruffus from the command line with ruffus.cmdline</em></a>.</li>
+</ul>
+</div>
+<div class="section" id="printing-out-which-jobs-will-be-run">
+<h2>Printing out which jobs will be run<a class="headerlink" href="#printing-out-which-jobs-will-be-run" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> takes the same parameters as pipeline_run but just prints
+the tasks which are and are not up-to-date.</p>
+<p>The <tt class="docutils literal"><span class="pre">verbose</span></tt> parameter controls how much detail is displayed.</p>
+<p>Let us take the pipelined code we previously wrote in
+<strong>Chapter 3</strong> <a class="reference internal" href="transform_in_parallel_code.html#new-manual-transform-in-parallel-code"><em>More on @transform-ing data and @originate</em></a>
+but call <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> instead of
+<a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(...)</em></a>.
+This lists the tasks which will be run in the pipeline:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">sys</span>
+<span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">])</span>
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = create_initial_file_pairs</span>
+<span class="go">Task = first_task</span>
+<span class="go">Task = second_task</span>
+<span class="go">________________________________________</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>To see the input and output parameters of each job in the pipeline, try increasing the verbosity from the default (<tt class="docutils literal"><span class="pre">1</span></tt>) to <tt class="docutils literal"><span class="pre">3</span></tt>
+(See <a class="reference internal" href="pipeline_printout_code.html#new-manual-pipeline-printout-code"><em>code</em></a>)</p>
+<p>This is very useful for checking that the input and output parameters have been specified correctly.</p>
+</div></blockquote>
+</div>
+<div class="section" id="determining-which-jobs-are-out-of-date-or-not">
+<h2>Determining which jobs are out-of-date or not<a class="headerlink" href="#determining-which-jobs-are-out-of-date-or-not" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is often useful to see which tasks are or are not up-to-date. For example, if we
+were to run the pipeline in full, and then modify one of the intermediate files, the
+pipeline would be partially out of date.</p>
+<p>Let us start by run the pipeline in full but then modify <tt class="docutils literal"><span class="pre">job1.a.output.1</span></tt> so that the second task appears out-of-date:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+
+<span class="hll"><span class="c"># "touch" job1.stage1</span>
+</span><span class="nb">open</span><span class="p">(</span><span class="s">"job1.a.output.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Run <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> with a verbosity of <tt class="docutils literal"><span class="pre">5</span></tt>.</p>
+<p>This will tell you exactly why <tt class="docutils literal"><span class="pre">second_task(...)</span></tt> needs to be re-run:
+because <tt class="docutils literal"><span class="pre">job1.a.output.1</span></tt> has a file modification time <em>after</em> <tt class="docutils literal"><span class="pre">job1.a.output.2</span></tt> (highlighted):</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which are up-to-date:</span>
+
+<span class="go">Task = create_initial_file_pairs</span>
+<span class="go">Task = first_task</span>
+
+<span class="hll"><span class="go">________________________________________</span>
+</span>
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = second_task</span>
+<span class="go"> Job = [job1.a.output.1</span>
+<span class="go"> -> job1.a.output.2]</span>
+<span class="gp">>>> </span><span class="c"># File modification times shown for out of date files</span>
+<span class="go"> Job needs update:</span>
+<span class="go"> Input files:</span>
+<span class="go"> * 22 Jul 2014 15:29:19.33: job1.a.output.1</span>
+<span class="go"> Output files:</span>
+<span class="go"> * 22 Jul 2014 15:29:07.53: job1.a.output.2</span>
+
+<span class="go"> Job = [job2.a.output.1</span>
+<span class="go"> -> job2.a.output.2]</span>
+<span class="go"> Job = [job3.a.output.1</span>
+<span class="go"> -> job3.a.output.2]</span>
+
+<span class="go">________________________________________</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>N.B. At a verbosity of 5, even jobs which are up-to-date in <tt class="docutils literal"><span class="pre">second_task</span></tt> are displayed.</p>
+</div></blockquote>
+</div>
+<div class="section" id="verbosity-levels">
+<h2>Verbosity levels<a class="headerlink" href="#verbosity-levels" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The verbosity levels for <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> and <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(...)</em></a>
+can be specified from <tt class="docutils literal"><span class="pre">verbose</span> <span class="pre">=</span> <span class="pre">0</span></tt> (print out nothing) to the extreme verbosity of <tt class="docutils literal"><span class="pre">verbose=6</span></tt>. A verbosity of above 10 is reserved for the internal
+debugging of Ruffus</p>
+<blockquote>
+<div><ul class="simple">
+<li>level <strong>0</strong> : <em>nothing</em></li>
+<li>level <strong>1</strong> : <em>Out-of-date Task names</em></li>
+<li>level <strong>2</strong> : <em>All Tasks (including any task function docstrings)</em></li>
+<li>level <strong>3</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, no explanation</em></li>
+<li>level <strong>4</strong> : <em>Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings</em></li>
+<li>level <strong>5</strong> : <em>All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)</em></li>
+<li>level <strong>6</strong> : <em>All jobs in All Tasks whether out of date or not</em></li>
+<li>level <strong>10</strong>: <em>logs messages useful only for debugging ruffus pipeline code</em></li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="abbreviating-long-file-paths-with-verbose-abbreviated-path">
+<span id="new-manual-pipeline-printout-verbose-abbreviated-path"></span><h2>Abbreviating long file paths with <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt><a class="headerlink" href="#abbreviating-long-file-paths-with-verbose-abbreviated-path" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Pipelines often produce interminable lists of deeply nested filenames. It would be nice to be able to abbreviate this
+to just enough information to follow the progress.</p>
+<p>The <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt> parameter specifies that <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> and <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(...)</em></a> only display</p>
+<blockquote>
+<div><ol class="arabic">
+<li><p class="first">the <tt class="docutils literal"><span class="pre">NNN</span></tt> th top level sub-directories to be included, or that</p>
+</li>
+<li><p class="first">the message to be truncated to a specified <tt class="docutils literal"><span class="pre">`MMM</span></tt> characters (to fit onto a line, for example). <tt class="docutils literal"><span class="pre">MMM</span></tt> is specified by setting <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span> <span class="pre">=</span> <span class="pre">-MMM</span></tt>, i.e. negative values.</p>
+<p>Note that the number of characters specified is just the separate lengths of the input and output parameters,
+not the entire indented line. You many need to specify a smaller limit that you expect (e.g. <tt class="docutils literal"><span class="pre">60</span></tt> rather than <cite>80</cite>)</p>
+</li>
+</ol>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="n">NNN</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="o">-</span><span class="n">MMM</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt> defaults to <tt class="docutils literal"><span class="pre">2</span></tt></p>
+<p>For example:</p>
+<blockquote>
+<div><p>Given <tt class="docutils literal"><span class="pre">["aa/bb/cc/dddd.txt",</span> <span class="pre">"aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"]</span></tt></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="hll"> <span class="c"># Original relative paths</span>
+</span> <span class="s">"[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"</span>
+
+<span class="hll"> <span class="c"># Full abspath</span>
+</span> <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="s">"[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"</span>
+
+<span class="hll"> <span class="c"># Specifed level of nested directories</span>
+</span> <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="s">"[.../dddd.txt, .../gggg.txt]"</span>
+
+ <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="s">"[.../cc/dddd.txt, .../ffff/gggg.txt]"</span>
+
+ <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="mi">3</span>
+ <span class="s">"[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]"</span>
+
+
+<span class="hll"> <span class="c"># Truncated to MMM characters</span>
+</span> <span class="n">verbose_abbreviated_path</span> <span class="o">=</span> <span class="o">-</span><span class="mi">60</span>
+ <span class="s">"<???> /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="getting-a-list-of-all-tasks-in-a-pipeline">
+<h2>Getting a list of all tasks in a pipeline<a class="headerlink" href="#getting-a-list-of-all-tasks-in-a-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If you just wanted a list of all tasks (Ruffus decorated function names), then you can
+just run Run <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names"><em>pipeline_get_task_names(...)</em></a>.</p>
+<p>This doesn’t touch any pipeline code or even check to see if the pipeline is connected up properly.</p>
+<p>However, it is sometimes useful to allow users at the command line to choose from a list of
+possible tasks as a target.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 5</strong>: Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a><ul>
+<li><a class="reference internal" href="#printing-out-which-jobs-will-be-run">Printing out which jobs will be run</a></li>
+<li><a class="reference internal" href="#determining-which-jobs-are-out-of-date-or-not">Determining which jobs are out-of-date or not</a></li>
+<li><a class="reference internal" href="#verbosity-levels">Verbosity levels</a></li>
+<li><a class="reference internal" href="#abbreviating-long-file-paths-with-verbose-abbreviated-path">Abbreviating long file paths with <tt class="docutils literal"><span class="pre">verbose_abbreviated_path</span></tt></a></li>
+<li><a class="reference internal" href="#getting-a-list-of-all-tasks-in-a-pipeline">Getting a list of all tasks in a pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="originate.html"
+ title="previous chapter"><strong>Chapter 4</strong>: Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="command_line.html"
+ title="next chapter"><strong>Chapter 6</strong>: Running <em>Ruffus</em> from the command line with ruffus.cmdline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/pipeline_printout.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="command_line.html" title="Chapter 6: Running Ruffus from the command line with ruffus.cmdline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="originate.html" title="Chapter 4: Creating files with @originate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/pipeline_printout_code.html b/doc/_build/html/tutorials/new_tutorial/pipeline_printout_code.html
new file mode 100644
index 0000000..33a35e0
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/pipeline_printout_code.html
@@ -0,0 +1,396 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...) — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)" href="pipeline_printout_graph_code.html" />
+ <link rel="prev" title="Chapter 4: Python Code for Creating files with @originate" href="originate_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout_graph_code.html" title="Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="originate_code.html" title="Chapter 4: Python Code for Creating files with @originate"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-pipeline-printout-chapter-num-python-code-for-understanding-how-your-pipeline-works-with-pipeline-printout">
+<span id="new-manual-pipeline-printout-code"></span><h1><strong>Chapter 5</strong>: Python Code for Understanding how your pipeline works with <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a><a class="headerlink" href="#new-manual-pipeline-printout-chapter-num-python-code-for-understanding-how-your-pipeline-works-with-pipeline-printout" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> syntax</li>
+<li>Back to <strong>Chapter 5</strong>: <a class="reference internal" href="pipeline_printout.html#new-manual-pipeline-printout"><em>Understanding how your pipeline works</em></a></li>
+</ul>
+</div>
+<div class="section" id="display-the-initial-state-of-the-pipeline">
+<h2>Display the initial state of the pipeline<a class="headerlink" href="#display-the-initial-state-of-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># create initial files</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="s">".output.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># second task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output.2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="normal-output">
+<h2>Normal Output<a class="headerlink" href="#normal-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = create_initial_file_pairs</span>
+<span class="go">Task = first_task</span>
+<span class="go">Task = second_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="high-verbosity-output">
+<h2>High Verbosity Output<a class="headerlink" href="#high-verbosity-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = create_initial_file_pairs</span>
+<span class="go"> Job = [None</span>
+<span class="go"> -> job1.a.start</span>
+<span class="go"> -> job1.b.start]</span>
+<span class="go"> Job needs update: Missing files [job1.a.start, job1.b.start]</span>
+<span class="go"> Job = [None</span>
+<span class="go"> -> job2.a.start</span>
+<span class="go"> -> job2.b.start]</span>
+<span class="go"> Job needs update: Missing files [job2.a.start, job2.b.start]</span>
+<span class="go"> Job = [None</span>
+<span class="go"> -> job3.a.start</span>
+<span class="go"> -> job3.b.start]</span>
+<span class="go"> Job needs update: Missing files [job3.a.start, job3.b.start]</span>
+
+<span class="go">Task = first_task</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start]</span>
+<span class="go"> -> job1.a.output.1]</span>
+<span class="go"> Job needs update: Missing files [job1.a.start, job1.b.start, job1.a.output.1]</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start]</span>
+<span class="go"> -> job2.a.output.1]</span>
+<span class="go"> Job needs update: Missing files [job2.a.start, job2.b.start, job2.a.output.1]</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start]</span>
+<span class="go"> -> job3.a.output.1]</span>
+<span class="go"> Job needs update: Missing files [job3.a.start, job3.b.start, job3.a.output.1]</span>
+
+<span class="go">Task = second_task</span>
+<span class="go"> Job = [job1.a.output.1</span>
+<span class="go"> -> job1.a.output.2]</span>
+<span class="go"> Job needs update: Missing files [job1.a.output.1, job1.a.output.2]</span>
+<span class="go"> Job = [job2.a.output.1</span>
+<span class="go"> -> job2.a.output.2]</span>
+<span class="go"> Job needs update: Missing files [job2.a.output.1, job2.a.output.2]</span>
+<span class="go"> Job = [job3.a.output.1</span>
+<span class="go"> -> job3.a.output.2]</span>
+<span class="go"> Job needs update: Missing files [job3.a.output.1, job3.a.output.2]</span>
+
+<span class="go">________________________________________</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="display-the-partially-up-to-date-pipeline">
+<h2>Display the partially up-to-date pipeline<a class="headerlink" href="#display-the-partially-up-to-date-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Run the pipeline, modify <tt class="docutils literal"><span class="pre">job1.stage</span></tt> so that the second task is no longer up-to-date
+and printout the pipeline stage again:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
+<span class="go">Task enters queue = create_initial_file_pairs</span>
+<span class="go"> Job = [None -> [job1.a.start, job1.b.start]]</span>
+<span class="go"> Job = [None -> [job2.a.start, job2.b.start]]</span>
+<span class="go"> Job = [None -> [job3.a.start, job3.b.start]]</span>
+<span class="go"> Job = [None -> [job1.a.start, job1.b.start]] completed</span>
+<span class="go"> Job = [None -> [job2.a.start, job2.b.start]] completed</span>
+<span class="go"> Job = [None -> [job3.a.start, job3.b.start]] completed</span>
+<span class="go">Completed Task = create_initial_file_pairs</span>
+<span class="go">Task enters queue = first_task</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> job1.a.output.1]</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> job2.a.output.1]</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> job3.a.output.1]</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go">Task enters queue = second_task</span>
+<span class="go"> Job = [job1.a.output.1 -> job1.a.output.2]</span>
+<span class="go"> Job = [job2.a.output.1 -> job2.a.output.2]</span>
+<span class="go"> Job = [job3.a.output.1 -> job3.a.output.2]</span>
+<span class="go"> Job = [job1.a.output.1 -> job1.a.output.2] completed</span>
+<span class="go"> Job = [job2.a.output.1 -> job2.a.output.2] completed</span>
+<span class="go"> Job = [job3.a.output.1 -> job3.a.output.2] completed</span>
+<span class="go">Completed Task = second_task</span>
+
+
+<span class="go"># modify job1.stage1</span>
+<span class="gp">>>> </span><span class="nb">open</span><span class="p">(</span><span class="s">"job1.a.output.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>At a verbosity of 6, even jobs which are up-to-date will be displayed:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which are up-to-date:</span>
+
+<span class="go">Task = create_initial_file_pairs</span>
+<span class="go"> Job = [None</span>
+<span class="go"> -> job1.a.start</span>
+<span class="go"> -> job1.b.start]</span>
+<span class="go"> Job = [None</span>
+<span class="go"> -> job2.a.start</span>
+<span class="go"> -> job2.b.start]</span>
+<span class="go"> Job = [None</span>
+<span class="go"> -> job3.a.start</span>
+<span class="go"> -> job3.b.start]</span>
+
+<span class="go">Task = first_task</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start]</span>
+<span class="go"> -> job1.a.output.1]</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start]</span>
+<span class="go"> -> job2.a.output.1]</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start]</span>
+<span class="go"> -> job3.a.output.1]</span>
+
+<span class="go">________________________________________</span>
+
+
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = second_task</span>
+<span class="go"> Job = [job1.a.output.1</span>
+<span class="go"> -> job1.a.output.2]</span>
+<span class="go"> Job needs update:</span>
+<span class="go"> Input files:</span>
+<span class="go"> * 22 Jul 2014 15:29:19.33: job1.a.output.1</span>
+<span class="go"> Output files:</span>
+<span class="go"> * 22 Jul 2014 15:29:07.53: job1.a.output.2</span>
+
+<span class="go"> Job = [job2.a.output.1</span>
+<span class="go"> -> job2.a.output.2]</span>
+<span class="go"> Job = [job3.a.output.1</span>
+<span class="go"> -> job3.a.output.2]</span>
+
+<span class="go">________________________________________</span>
+</pre></div>
+</div>
+<p>We can now see that the there is only one job in “second_task” which needs to be re-run
+because ‘job1.stage1’ has been modified after ‘job1.stage2’</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 5</strong>: Python Code for Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a><ul>
+<li><a class="reference internal" href="#display-the-initial-state-of-the-pipeline">Display the initial state of the pipeline</a></li>
+<li><a class="reference internal" href="#normal-output">Normal Output</a></li>
+<li><a class="reference internal" href="#high-verbosity-output">High Verbosity Output</a></li>
+<li><a class="reference internal" href="#display-the-partially-up-to-date-pipeline">Display the partially up-to-date pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="originate_code.html"
+ title="previous chapter"><strong>Chapter 4</strong>: Python Code for Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="pipeline_printout_graph_code.html"
+ title="next chapter"><strong>Chapter 7</strong>: Python Code for Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/pipeline_printout_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="pipeline_printout_graph_code.html" title="Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="originate_code.html" title="Chapter 4: Python Code for Creating files with @originate"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/pipeline_printout_graph.html b/doc/_build/html/tutorials/new_tutorial/pipeline_printout_graph.html
new file mode 100644
index 0000000..24fe151
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/pipeline_printout_graph.html
@@ -0,0 +1,341 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 7: Displaying the pipeline visually with pipeline_printout_graph(...) — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 8: Specifying output file names with formatter() and regex()" href="output_file_names.html" />
+ <link rel="prev" title="Chapter 6: Running Ruffus from the command line with ruffus.cmdline" href="command_line.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="output_file_names.html" title="Chapter 8: Specifying output file names with formatter() and regex()"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="command_line.html" title="Chapter 6: Running Ruffus from the command line with ruffus.cmdline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-pipeline-printout-graph-chapter-num-displaying-the-pipeline-visually-with-pipeline-printout-graph">
+<span id="new-manual-pipeline-printout-graph"></span><span id="index-0"></span><h1><strong>Chapter 7</strong>: Displaying the pipeline visually with <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a><a class="headerlink" href="#new-manual-pipeline-printout-graph-chapter-num-displaying-the-pipeline-visually-with-pipeline-printout-graph" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/graphviz.html#decorators-graphviz"><em>@graphviz(...)</em></a> syntax</li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="pipeline_printout_graph_code.html#new-manual-pipeline-printout-graph-code"><em>Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...)</em></a></li>
+</ul>
+</div>
+<div class="section" id="printing-out-a-flowchart-of-our-pipeline">
+<h2>Printing out a flowchart of our pipeline<a class="headerlink" href="#printing-out-a-flowchart-of-our-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is all very well being able to trace the data flow through the pipeline as text.
+Sometimes, however, we need a bit of eye-candy!</p>
+<p>We can see a flowchart for our fledgling pipeline by executing:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="s">'flowchart.svg'</span><span class="p">,</span>
+ <span class="s">'svg'</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">second_task</span><span class="p">],</span>
+ <span class="n">no_key_legend</span> <span class="o">=</span> <span class="bp">False</span><span class="p">)</span>
+</pre></div>
+</div>
+<a class="reference internal image-reference" href="../../_images/simple_tutorial_stage5_flowchart.png"><img alt="../../_images/simple_tutorial_stage5_flowchart.png" src="../../_images/simple_tutorial_stage5_flowchart.png" style="width: 588.0px; height: 318.5px;" /></a>
+</div></blockquote>
+<p>Flowcharts can be printed in a large number of formats including <tt class="docutils literal"><span class="pre">jpg</span></tt>, <tt class="docutils literal"><span class="pre">svg</span></tt>, <tt class="docutils literal"><span class="pre">png</span></tt> and <tt class="docutils literal"><span class="pre">pdf</span></tt>.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Flowcharts rely on the <tt class="docutils literal"><span class="pre">dot</span></tt> programme from <a class="reference external" href="http://www.graphviz.org/">Graphviz</a>.</p>
+<p class="last">Please make sure this is installed.</p>
+</div>
+<p>There are 8 standard colour schemes, but you can further customise all the colours to your satisfaction:</p>
+<blockquote>
+<div><img alt="../../_images/flowchart_colour_schemes.png" src="../../_images/flowchart_colour_schemes.png" />
+</div></blockquote>
+<p>See <a class="reference internal" href="flowchart_colours.html#new-manual-flowchart-colours"><em>here</em></a> for example code.</p>
+</div></blockquote>
+</div>
+<div class="section" id="command-line-options-made-easier-with-ruffus-cmdline">
+<h2>Command line options made easier with <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt><a class="headerlink" href="#command-line-options-made-easier-with-ruffus-cmdline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If you are using <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt>, then you can easily ask for a flowchart from the command line:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre>your_script.py --flowchart pipeline_flow_chart.png
+</pre></div>
+</div>
+</div></blockquote>
+<p>The output format is deduced from the extension but can be specified manually:</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="c"># specify format. Otherwise, deduced from the extension</span>
+your_script.py --flowchart pipeline_flow_chart.png --flowchart_format png
+</pre></div>
+</div>
+</div></blockquote>
+<p>Print the flow chart horizontally or vertically...</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="c"># flowchart proceeds from left to right , rather than from top to bottom</span>
+your_script.py --flowchart pipeline_flow_chart.png --draw_graph_horizontally
+</pre></div>
+</div>
+</div></blockquote>
+<p>...with or without a key legend</p>
+<blockquote>
+<div><div class="highlight-bash"><div class="highlight"><pre><span class="c"># Draw key legend</span>
+your_script.py --flowchart pipeline_flow_chart.png --key_legend_in_graph
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="horribly-complicated-pipelines">
+<h2>Horribly complicated pipelines!<a class="headerlink" href="#horribly-complicated-pipelines" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Flowcharts are especially useful if you have really complicated pipelines, such as</p>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/simple_tutorial_complex_flowchart.png"><img alt="../../_images/simple_tutorial_complex_flowchart.png" src="../../_images/simple_tutorial_complex_flowchart.png" style="width: 389.2px; height: 656.6px;" /></a>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="circular-dependency-errors-in-pipelines">
+<h2>Circular dependency errors in pipelines!<a class="headerlink" href="#circular-dependency-errors-in-pipelines" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Especially, if the pipeline is not set up properly, and vicious circular dependencies
+are present:</p>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/simple_tutorial_complex_flowchart_error.png"><img alt="../../_images/simple_tutorial_complex_flowchart_error.png" src="../../_images/simple_tutorial_complex_flowchart_error.png" style="width: 389.2px; height: 656.6px;" /></a>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="graphviz-customising-the-appearance-of-each-task">
+<h2><tt class="docutils literal"><span class="pre">@graphviz</span></tt>: Customising the appearance of each task<a class="headerlink" href="#graphviz-customising-the-appearance-of-each-task" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The graphic for each task can be further customised as you please by adding
+<a class="reference external" href="http://www.graphviz.org/doc/info/attrs.html">graphviz attributes</a> such as the URL, shape, colour
+directly to that node using the decorator <tt class="docutils literal"><span class="pre">`@graphviz</span></tt>.</p>
+<p>For example, we can customise the graphic for <tt class="docutils literal"><span class="pre">myTask()</span></tt> to look like:</p>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/history_html_flowchart2.png"><img alt="../../_images/history_html_flowchart2.png" src="../../_images/history_html_flowchart2.png" style="width: 336.6px; height: 316.5px;" /></a>
+</div></blockquote>
+<p>by adding the requisite attributes as follows:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@graphviz</span><span class="p">(</span><span class="n">URL</span><span class="o">=</span><span class="s">'"http://cnn.com"'</span><span class="p">,</span> <span class="n">fillcolor</span> <span class="o">=</span> <span class="s">'"#FFCCCC"'</span><span class="p">,</span>
+ <span class="n">color</span> <span class="o">=</span> <span class="s">'"#FF0000"'</span><span class="p">,</span> <span class="n">pencolor</span><span class="o">=</span><span class="s">'"#FF0000"'</span><span class="p">,</span> <span class="n">fontcolor</span><span class="o">=</span><span class="s">'"#4B6000"'</span><span class="p">,</span>
+ <span class="n">label_suffix</span> <span class="o">=</span> <span class="s">"???"</span><span class="p">,</span> <span class="n">label_prefix</span> <span class="o">=</span> <span class="s">"What is this?<BR/> "</span><span class="p">,</span>
+ <span class="n">label</span> <span class="o">=</span> <span class="s">"<What <FONT COLOR=</span><span class="se">\"</span><span class="s">red</span><span class="se">\"</span><span class="s">>is</FONT>this>"</span><span class="p">,</span>
+ <span class="n">shape</span><span class="o">=</span> <span class="s">"component"</span><span class="p">,</span> <span class="n">height</span> <span class="o">=</span> <span class="mf">1.5</span><span class="p">,</span> <span class="n">peripheries</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
+ <span class="n">style</span><span class="o">=</span><span class="s">"dashed"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">Up_to_date_task2</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+<span class="c"># Can use dictionary if you wish...</span>
+<span class="n">graphviz_params</span> <span class="o">=</span> <span class="p">{</span><span class="s">"URL"</span><span class="p">:</span><span class="s">"http://cnn.com"</span><span class="p">,</span> <span class="s">"fontcolor"</span><span class="p">:</span> <span class="s">'"#FF00FF"'</span><span class="p">}</span>
+<span class="nd">@graphviz</span><span class="p">(</span><span class="o">**</span><span class="n">graphviz_params</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">myTask</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span><span class="n">output</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>You can even using HTML formatting in task names, including specifying line wraps (as in the above example),
+using the <tt class="docutils literal"><span class="pre">label</span></tt> parameter. However, HTML labels <strong>must</strong> be enclosed in <tt class="docutils literal"><span class="pre"><</span></tt> and <tt class="docutils literal"><span class="pre">></span></tt>.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">label</span> <span class="o">=</span> <span class="s">"<Line <BR/> wrapped task_name()>"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Otherwise, you can also opt to keep the task name and wrap it with a prefix and suffix:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">label_suffix</span> <span class="o">=</span> <span class="s">"??? "</span><span class="p">,</span> <span class="n">label_prefix</span> <span class="o">=</span> <span class="s">": What is this?"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>The <tt class="docutils literal"><span class="pre">URL</span></tt> attribute allows the generation of clickable svg, and also client / server</dt>
+<dd>side image maps usable in web pages.
+See <a class="reference external" href="http://www.graphviz.org/content/output-formats#dimap">Graphviz documentation</a></dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 7</strong>: Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a><ul>
+<li><a class="reference internal" href="#printing-out-a-flowchart-of-our-pipeline">Printing out a flowchart of our pipeline</a></li>
+<li><a class="reference internal" href="#command-line-options-made-easier-with-ruffus-cmdline">Command line options made easier with <tt class="docutils literal"><span class="pre">ruffus.cmdline</span></tt></a></li>
+<li><a class="reference internal" href="#horribly-complicated-pipelines">Horribly complicated pipelines!</a></li>
+<li><a class="reference internal" href="#circular-dependency-errors-in-pipelines">Circular dependency errors in pipelines!</a></li>
+<li><a class="reference internal" href="#graphviz-customising-the-appearance-of-each-task"><tt class="docutils literal"><span class="pre">@graphviz</span></tt>: Customising the appearance of each task</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="command_line.html"
+ title="previous chapter"><strong>Chapter 6</strong>: Running <em>Ruffus</em> from the command line with ruffus.cmdline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="output_file_names.html"
+ title="next chapter"><strong>Chapter 8</strong>: Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/pipeline_printout_graph.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="output_file_names.html" title="Chapter 8: Specifying output file names with formatter() and regex()"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="command_line.html" title="Chapter 6: Running Ruffus from the command line with ruffus.cmdline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/pipeline_printout_graph_code.html b/doc/_build/html/tutorials/new_tutorial/pipeline_printout_graph_code.html
new file mode 100644
index 0000000..55024b1
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/pipeline_printout_graph_code.html
@@ -0,0 +1,364 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 7: Python Code for Displaying the pipeline visually with pipeline_printout_graph(...) — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 8: Python Code for Specifying output file names with formatter() and regex()" href="output_file_names_code.html" />
+ <link rel="prev" title="Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)" href="pipeline_printout_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="output_file_names_code.html" title="Chapter 8: Python Code for Specifying output file names with formatter() and regex()"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout_code.html" title="Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-pipeline-printout-graph-chapter-num-python-code-for-displaying-the-pipeline-visually-with-pipeline-printout-graph">
+<span id="new-manual-pipeline-printout-graph-code"></span><h1><strong>Chapter 7</strong>: Python Code for Displaying the pipeline visually with <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a><a class="headerlink" href="#new-manual-pipeline-printout-graph-chapter-num-python-code-for-displaying-the-pipeline-visually-with-pipeline-printout-graph" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph"><em>pipeline_printout_graph(...)</em></a> syntax</li>
+<li>Back to <strong>Chapter 7</strong>: <a class="reference internal" href="pipeline_printout_graph.html#new-manual-pipeline-printout-graph"><em>Displaying the pipeline visually</em></a></li>
+</ul>
+</div>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre> 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61</pre></div></td><td class="code"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># create initial files</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">([</span> <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">]</span> <span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_initial_file_pairs</span><span class="p">(</span><span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># create both files as necessary</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span> <span class="k">pass</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># first task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_file_pairs</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="s">".output.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># second task</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output.2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+<span class="hll"><span class="c"># Print graph before running pipeline</span>
+</span>
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Show flow chart and tasks before running the pipeline</span>
+<span class="c">#</span>
+<span class="k">print</span> <span class="s">"Show flow chart and tasks before running the pipeline"</span>
+<span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="nb">open</span><span class="p">(</span><span class="s">"simple_tutorial_stage5_before.png"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">),</span>
+ <span class="s">"png"</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">second_task</span><span class="p">],</span>
+ <span class="n">minimal_key_legend</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+
+
+<span class="c"># modify job1.stage1</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"job1.a.output.1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+
+<span class="hll"><span class="c"># Print graph after everything apart from ``job1.a.output.1`` is update</span>
+</span>
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Show flow chart and tasks after running the pipeline</span>
+<span class="c">#</span>
+<span class="k">print</span> <span class="s">"Show flow chart and tasks after running the pipeline"</span>
+<span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="nb">open</span><span class="p">(</span><span class="s">"simple_tutorial_stage5_after.png"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">),</span>
+ <span class="s">"png"</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">second_task</span><span class="p">],</span>
+ <span class="n">no_key_legend</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</td></tr></table></div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-flowcharts">
+<h2>Resulting Flowcharts<a class="headerlink" href="#resulting-flowcharts" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="46%" />
+<col width="54%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><a class="first reference internal image-reference" href="../../_images/simple_tutorial_stage5_before.png"><img alt="Before running the pipeline" class="align-center" src="../../_images/simple_tutorial_stage5_before.png" style="width: 462.65px; height: 375.25px;" /></a>
+<p class="last centered">
+<strong>Before</strong></p></td>
+<td><a class="first reference internal image-reference" href="../../_images/simple_tutorial_stage5_after.png"><img alt="After running the pipeline" class="align-center" src="../../_images/simple_tutorial_stage5_after.png" style="width: 462.65px; height: 375.25px;" /></a>
+<p class="last centered">
+<strong>After</strong></p></td>
+</tr>
+</tbody>
+</table>
+<table border="1" class="docutils">
+<colgroup>
+<col width="100%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><a class="first reference internal image-reference" href="../../_images/tutorial_key.png"><img alt="Legend key" class="align-center" src="../../_images/tutorial_key.png" style="width: 1358.0px; height: 117.0px;" /></a>
+<p class="last centered">
+<strong>Legend</strong></p></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 7</strong>: Python Code for Displaying the pipeline visually with <tt class="docutils literal"><span class="pre">pipeline_printout_graph(...)</span></tt></a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-flowcharts">Resulting Flowcharts</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="pipeline_printout_code.html"
+ title="previous chapter"><strong>Chapter 5</strong>: Python Code for Understanding how your pipeline works with <tt class="docutils literal"><span class="pre">pipeline_printout(...)</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="output_file_names_code.html"
+ title="next chapter"><strong>Chapter 8</strong>: Python Code for Specifying output file names with <tt class="docutils literal"><span class="pre">formatter()</span></tt> and <tt class="docutils literal"><span class="pre">regex()</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/pipeline_printout_graph_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="output_file_names_code.html" title="Chapter 8: Python Code for Specifying output file names with formatter() and regex()"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="pipeline_printout_code.html" title="Chapter 5: Python Code for Understanding how your pipeline works with pipeline_printout(...)"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/posttask.html b/doc/_build/html/tutorials/new_tutorial/posttask.html
new file mode 100644
index 0000000..7e3a9fe
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/posttask.html
@@ -0,0 +1,299 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 19: Signal the completion of each stage of our pipeline with @posttask — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs()" href="inputs.html" />
+ <link rel="prev" title="Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if" href="active_if.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="inputs.html" title="Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="active_if.html" title="Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-posttask-chapter-num-signal-the-completion-of-each-stage-of-our-pipeline-with-posttask">
+<span id="new-manual-posttask"></span><span id="index-0"></span><h1><strong>Chapter 19</strong>: Signal the completion of each stage of our pipeline with <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a><a class="headerlink" href="#new-manual-posttask-chapter-num-signal-the-completion-of-each-stage-of-our-pipeline-with-posttask" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> syntax</li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is often useful to signal the completion of each task by specifying a specific
+action to be taken or function to be called. This can range from
+printing out some message, or <a class="reference external" href="http://en.wikipedia.org/wiki/Touch_(Unix)">touching</a> some sentinel file,
+to emailing the author. This is particular useful if the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> is a recipe apply to an unspecified number
+of parameters in parallel in different <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a>s. If the task is never run, or if it
+fails, needless-to-say no task completion action will happen.</p>
+<p><em>Ruffus</em> uses the <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> decorator for this purpose.</p>
+</div></blockquote>
+<div class="section" id="posttask">
+<h3><strong>@posttask</strong><a class="headerlink" href="#posttask" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>We can signal the completion of each task by specifying
+one or more function(s) using <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a></p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="k">def</span> <span class="nf">task_finished</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"hooray"</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">task_finished</span><span class="p">)</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">create_if_necessary</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>This is such a short function, we might as well write it in-line:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"hooray</span><span class="se">\n</span><span class="s">"</span><span class="p">))</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">The function(s) provided to <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> will be called if the pipeline passes
+through a task, even if none of its jobs are run because they are up-to-date.
+This happens when a upstream task is out-of-date, and the execution passes through
+this point in the pipeline. See the example in <a class="reference internal" href="dependencies.html#new-manual-dependencies"><em>Appendix 2: How dependency is checked</em></a>
+of this manual.</p>
+</div>
+</div>
+<div class="section" id="touch-file">
+<span id="new-manual-posttask-touch-file"></span><span id="index-1"></span><h3><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a><a class="headerlink" href="#touch-file" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>One way to note the completion of a task is to create some sort of
+“flag” file. Each stage in a traditional <tt class="docutils literal"><span class="pre">make</span></tt> pipeline would contain a
+<tt class="docutils literal"><span class="pre">touch</span> <span class="pre">completed.flag</span></tt>.</p>
+<p>This is such a useful idiom that <em>Ruffus</em> provides the shorthand <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">touch_file</span><span class="p">(</span><span class="s">"task_completed.flag"</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="adding-several-post-task-actions">
+<h3>Adding several post task actions<a class="headerlink" href="#adding-several-post-task-actions" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>You can, of course, add more than one different action to be taken on completion of the
+task, either by stacking up as many <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> decorators
+as necessary, or by including several functions in the same <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">print_hooray</span><span class="p">,</span> <span class="n">print_whoppee</span><span class="p">)</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">print_hip_hip</span><span class="p">,</span> <span class="n">touch_file</span><span class="p">(</span><span class="s">"sentinel_flag"</span><span class="p">))</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="s">"a.1"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_if_necessary</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 19</strong>: Signal the completion of each stage of our pipeline with <tt class="docutils literal"><span class="pre">@posttask</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a><ul>
+<li><a class="reference internal" href="#posttask"><strong>@posttask</strong></a></li>
+<li><a class="reference internal" href="#touch-file"><tt class="docutils literal"><span class="pre">touch_file</span></tt></a></li>
+<li><a class="reference internal" href="#adding-several-post-task-actions">Adding several post task actions</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="active_if.html"
+ title="previous chapter"><strong>Chapter 18</strong>: Turning parts of the pipeline on and off at runtime with <tt class="docutils literal"><span class="pre">@active_if</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="inputs.html"
+ title="next chapter"><strong>Chapter 20</strong>: Manipulating task inputs via string substitution using <tt class="docutils literal"><span class="pre">inputs()</span></tt> and <tt class="docutils literal"><span class="pre">add_inputs()</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/posttask.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="inputs.html" title="Chapter 20: Manipulating task inputs via string substitution using inputs() and add_inputs()"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="active_if.html" title="Chapter 18: Turning parts of the pipeline on and off at runtime with @active_if"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/split.html b/doc/_build/html/tutorials/new_tutorial/split.html
new file mode 100644
index 0000000..95027f0
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/split.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 12: Splitting up large tasks / files with @split — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 13: @merge multiple input into a single result" href="merge.html" />
+ <link rel="prev" title="Chapter 11: Pipeline topologies and a compendium of Ruffus decorators" href="decorators_compendium.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 13: @merge multiple input into a single result"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="decorators_compendium.html" title="Chapter 11: Pipeline topologies and a compendium of Ruffus decorators"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-split-chapter-num-splitting-up-large-tasks-files-with-split">
+<span id="new-manual-split"></span><span id="index-0"></span><h1><strong>Chapter 12</strong>: Splitting up large tasks / files with <strong>@split</strong><a class="headerlink" href="#new-manual-split-chapter-num-splitting-up-large-tasks-files-with-split" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> syntax</li>
+<li><a class="reference internal" href="split_code.html#new-manual-split-code"><em>Example code for this chapter</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A common requirement in computational pipelines is to split up a large task into
+small jobs which can be run on different processors, (or sent to a computational
+cluster). Very often, the number of jobs depends dynamically on the size of the
+task, and cannot be known beforehand.</p>
+<p><em>Ruffus</em> uses the <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> decorator to indicate that
+the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function will produce an indeterminate number of independent <em>Outputs</em> from a single <em>Input</em>.</p>
+</div></blockquote>
+</div>
+<div class="section" id="example-calculate-variance-for-a-large-list-of-numbers-in-parallel">
+<h2>Example: Calculate variance for a large list of numbers in parallel<a class="headerlink" href="#example-calculate-variance-for-a-large-list-of-numbers-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Suppose we wanted to calculate the <a class="reference external" href="http://en.wikipedia.org/wiki/Variance">variance</a> for
+100,000 numbers, how can we parallelise the calculation so that we can get an answer as
+speedily as possible?</p>
+<p>We need to</p>
+<blockquote>
+<div><ul class="simple">
+<li>break down the problem into manageable chunks</li>
+<li>solve these in parallel, possibly on a computational cluster and then</li>
+<li>merge the partial solutions back together for a final result.</li>
+</ul>
+</div></blockquote>
+<p>To complicate things, we usually do not want to hard-code the number of parallel chunks beforehand.
+The degree of parallelism is often only apparent as we process our data.</p>
+<p><strong>Ruffus</strong> was designed to solve such problems which are common, for example, in bioinformatics and genomics.</p>
+<p>A flowchart for our variance problem might look like this:</p>
+<a class="reference internal image-reference" href="../../_images/manual_split_merge_example.jpg"><img alt="../../_images/manual_split_merge_example.jpg" src="../../_images/manual_split_merge_example.jpg" style="width: 251.1px; height: 197.4px;" /></a>
+<p>(In this toy example, we create our own starting data in <tt class="docutils literal"><span class="pre">create_random_numbers()</span></tt>.)</p>
+</div></blockquote>
+</div>
+<div class="section" id="output-files-for-split">
+<h2>Output files for <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a><a class="headerlink" href="#output-files-for-split" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The <em>Ruffus</em> decorator <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is designed specifically with this run-time flexibility in mind:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_problem</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+<p>This will split the incoming <tt class="docutils literal"><span class="pre">input_file_names</span></tt> into <tt class="docutils literal"><span class="pre">NNN</span></tt> number of <em>outputs</em> where <tt class="docutils literal"><span class="pre">NNN</span></tt> is not predetermined:</p>
+<p>The <em>output</em> (second) parameter of <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> often contains a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern like the <tt class="docutils literal"><span class="pre">*.chunks</span></tt> above.</p>
+<p>Only <strong>after</strong> the task function has completed, will Ruffus match the <strong>Output</strong> parameter (<tt class="docutils literal"><span class="pre">*.chunks</span></tt>)
+against the files which have been created by <tt class="docutils literal"><span class="pre">split_problem()</span></tt> (e.g. <tt class="docutils literal"><span class="pre">1.chunks</span></tt>, <tt class="docutils literal"><span class="pre">2.chunks</span></tt>, <tt class="docutils literal"><span class="pre">3.chunks</span></tt>)</p>
+</div></blockquote>
+</div>
+<div class="section" id="be-careful-in-specifying-output-globs">
+<h2>Be careful in specifying <strong>Output</strong> globs<a class="headerlink" href="#be-careful-in-specifying-output-globs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Note that it is your responsibility to keep the <strong>Output</strong> specification tight enough so that Ruffus does not
+pick up extraneous files.</p>
+<p>You can specify multiple <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns to match <em>all</em> the files which are the
+result of the splitting task function. These can even cover different directories,
+or groups of file names. This is a more extreme example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"input.file"</span><span class="p">,</span> <span class="p">[</span><span class="s">'a*.bits'</span><span class="p">,</span> <span class="s">'b*.pieces'</span><span class="p">,</span> <span class="s">'somewhere_else/c*.stuff'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">split_function</span> <span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="s">"Code to split up 'input.file'"</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="clean-up-previous-pipeline-runs">
+<h2>Clean up previous pipeline runs<a class="headerlink" href="#clean-up-previous-pipeline-runs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Problem arise when the current directory contains results of previous pipeline runs.</p>
+<ul class="simple">
+<li>For example, if the previous analysis involved a large data set, there might be 3 chunks: <tt class="docutils literal"><span class="pre">1.chunks</span></tt>, <tt class="docutils literal"><span class="pre">2.chunks</span></tt>, <tt class="docutils literal"><span class="pre">3.chunks</span></tt>.</li>
+<li>In the current analysis, there might be a smaller data set which divides into only 2 chunks, <tt class="docutils literal"><span class="pre">1.chunks</span></tt> and <tt class="docutils literal"><span class="pre">2.chunks</span></tt>.</li>
+<li>Unfortunately, <tt class="docutils literal"><span class="pre">3.chunks</span></tt> from the previous run is still hanging around and will be included erroneously by the glob <tt class="docutils literal"><span class="pre">*.chunks</span></tt>.</li>
+</ul>
+<div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<p class="last"><strong>Your first duty in</strong> <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> <strong>tasks functions should be to clean up</strong></p>
+</div>
+<p>To help you clean up thoroughly, Ruffus initialises the <strong>output</strong> parameter to all files which match specification.</p>
+<p>The first order of business is thus invariably to cleanup ( delete with <tt class="docutils literal"><span class="pre">os.unlink</span></tt>) all files in <strong>Output</strong>.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># split initial file</span>
+<span class="c">#</span>
+<span class="nd">@split</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_problem</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> splits random numbers file into xxx files of chunk_size each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+<span class="hll"> <span class="c"># clean up any files from previous runs</span>
+</span> <span class="c">#</span>
+ <span class="c">#for ff in glob.glob("*.chunks"):</span>
+ <span class="k">for</span> <span class="n">ff</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">ff</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>(The first time you run the example code, <tt class="docutils literal"><span class="pre">*.chunks</span></tt> will initialise <tt class="docutils literal"><span class="pre">output_files</span></tt> to an empty list. )</p>
+</div></blockquote>
+</div>
+<div class="section" id="to-many">
+<span id="new-manual-split-one-to-many"></span><h2>1 to many<a class="headerlink" href="#to-many" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is a one to many operator because its
+outputs are a list of <em>independent</em> items.</p>
+<p>If <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> generates 5 files, then this will lead to 5 jobs downstream.</p>
+<p>This means we can just connect our old friend <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> to our pipeline
+and the results of <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> will be analysed in parallel. This code should look
+familiar:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk file</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">split_problem</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Which results in output like this:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[random_numbers.list] -> *.chunks] completed</span>
+<span class="go">Completed Task = split_problem</span>
+<span class="go"> Job = [1.chunks -> 1.sums] completed</span>
+<span class="go"> Job = [10.chunks -> 10.sums] completed</span>
+<span class="go"> Job = [2.chunks -> 2.sums] completed</span>
+<span class="go"> Job = [3.chunks -> 3.sums] completed</span>
+<span class="go"> Job = [4.chunks -> 4.sums] completed</span>
+<span class="go"> Job = [5.chunks -> 5.sums] completed</span>
+<span class="go"> Job = [6.chunks -> 6.sums] completed</span>
+<span class="go"> Job = [7.chunks -> 7.sums] completed</span>
+<span class="go"> Job = [8.chunks -> 8.sums] completed</span>
+<span class="go"> Job = [9.chunks -> 9.sums] completed</span>
+<span class="go">Completed Task = sum_of_squares</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Have a look at the <a class="reference internal" href="split_code.html#new-manual-split-code"><em>Example code for this chapter</em></a></p>
+</div></blockquote>
+</div>
+<div class="section" id="nothing-to-many">
+<span id="new-manual-split-nothing-to-many"></span><h2>Nothing to many<a class="headerlink" href="#nothing-to-many" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Normally we would use <a class="reference internal" href="originate.html#new-manual-originate"><em>@originate</em></a> to create files from
+scratch, for example at the beginning of the pipeline.</p>
+<p>However, sometimes, it is not possible to determine ahead of time how many files you
+will be creating from scratch. <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> can also be useful even in such cases:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">randint</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span>
+
+<span class="c"># Create between 2 and 5 files</span>
+<span class="hll"><span class="nd">@split</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"*.start"</span><span class="p">)</span>
+</span><span class="k">def</span> <span class="nf">create_initial_files</span><span class="p">(</span><span class="n">no_input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="c"># cleanup first</span>
+ <span class="k">for</span> <span class="n">oo</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">oo</span><span class="p">)</span>
+ <span class="c"># make new files</span>
+ <span class="k">for</span> <span class="n">ii</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">randint</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="mi">5</span><span class="p">)):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.start"</span> <span class="o">%</span> <span class="n">ii</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="nd">@transform</span><span class="p">(</span><span class="n">create_initial_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span> <span class="s">".processed"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">process_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>Giving:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [None -> *.start] completed</span>
+<span class="go">Completed Task = create_initial_files</span>
+<span class="go"> Job = [0.start -> 0.processed] completed</span>
+<span class="go"> Job = [1.start -> 1.processed] completed</span>
+<span class="go">Completed Task = process_files</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 12</strong>: Splitting up large tasks / files with <strong>@split</strong></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#example-calculate-variance-for-a-large-list-of-numbers-in-parallel">Example: Calculate variance for a large list of numbers in parallel</a></li>
+<li><a class="reference internal" href="#output-files-for-split">Output files for <tt class="docutils literal"><span class="pre">@split</span></tt></a></li>
+<li><a class="reference internal" href="#be-careful-in-specifying-output-globs">Be careful in specifying <strong>Output</strong> globs</a></li>
+<li><a class="reference internal" href="#clean-up-previous-pipeline-runs">Clean up previous pipeline runs</a></li>
+<li><a class="reference internal" href="#to-many">1 to many</a></li>
+<li><a class="reference internal" href="#nothing-to-many">Nothing to many</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="decorators_compendium.html"
+ title="previous chapter"><strong>Chapter 11</strong>: Pipeline topologies and a compendium of <em>Ruffus</em> decorators</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="merge.html"
+ title="next chapter"><strong>Chapter 13</strong>: <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/split.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="merge.html" title="Chapter 13: @merge multiple input into a single result"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="decorators_compendium.html" title="Chapter 11: Pipeline topologies and a compendium of Ruffus decorators"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/split_code.html b/doc/_build/html/tutorials/new_tutorial/split_code.html
new file mode 100644
index 0000000..c5d16e4
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/split_code.html
@@ -0,0 +1,303 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 12: Python Code for Splitting up large tasks / files with @split — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 13: Python Code for @merge multiple input into a single result" href="merge_code.html" />
+ <link rel="prev" title="Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions" href="checkpointing_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="merge_code.html" title="Chapter 13: Python Code for @merge multiple input into a single result"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="checkpointing_code.html" title="Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-split-chapter-num-python-code-for-splitting-up-large-tasks-files-with-split">
+<span id="new-manual-split-code"></span><h1><strong>Chapter 12</strong>: Python Code for Splitting up large tasks / files with <strong>@split</strong><a class="headerlink" href="#new-manual-split-chapter-num-python-code-for-splitting-up-large-tasks-files-with-split" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 12</strong>: <a class="reference internal" href="split.html#new-manual-split"><em>Splitting up large tasks / files with @split</em></a></li>
+</ul>
+</div>
+<div class="section" id="splitting-large-jobs">
+<h2>Splitting large jobs<a class="headerlink" href="#splitting-large-jobs" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="n">NUMBER_OF_RANDOMS</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
+
+
+<span class="kn">import</span> <span class="nn">random</span><span class="o">,</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">glob</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Create random numbers</span>
+<span class="c">#</span>
+<span class="nd">@originate</span><span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_random_numbers</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%g</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># split initial file</span>
+<span class="c">#</span>
+<span class="nd">@split</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">split_problem</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> splits random numbers file into xxx files of chunk_size each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># clean up any files from previous runs</span>
+ <span class="c">#</span>
+ <span class="c">#for ff in glob.glob("*.chunks"):</span>
+ <span class="k">for</span> <span class="n">ff</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">ff</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># create new file every chunk_size lines and</span>
+ <span class="c"># copy each line into current file</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">cnt_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">input_file_name</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="n">CHUNK_SIZE</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.chunks"</span> <span class="o">%</span> <span class="n">cnt_files</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk file</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">split_problem</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">]</span>
+ <span class="n">cnt_values</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">):</span>
+ <span class="n">cnt_values</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">())</span>
+ <span class="n">sum_squared</span> <span class="o">+=</span> <span class="n">val</span> <span class="o">*</span> <span class="n">val</span>
+ <span class="nb">sum</span> <span class="o">+=</span> <span class="n">val</span>
+ <span class="n">output</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="si">%s</span><span class="se">\n</span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">sum_squared</span><span class="p">),</span> <span c [...]
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [None -> random_numbers.list] completed</span>
+<span class="go">Completed Task = create_random_numbers</span>
+<span class="go"> Job = [[random_numbers.list] -> *.chunks] completed</span>
+<span class="go">Completed Task = split_problem</span>
+<span class="go"> Job = [1.chunks -> 1.sums] completed</span>
+<span class="go"> Job = [10.chunks -> 10.sums] completed</span>
+<span class="go"> Job = [2.chunks -> 2.sums] completed</span>
+<span class="go"> Job = [3.chunks -> 3.sums] completed</span>
+<span class="go"> Job = [4.chunks -> 4.sums] completed</span>
+<span class="go"> Job = [5.chunks -> 5.sums] completed</span>
+<span class="go"> Job = [6.chunks -> 6.sums] completed</span>
+<span class="go"> Job = [7.chunks -> 7.sums] completed</span>
+<span class="go"> Job = [8.chunks -> 8.sums] completed</span>
+<span class="go"> Job = [9.chunks -> 9.sums] completed</span>
+<span class="go">Completed Task = sum_of_squares</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 12</strong>: Python Code for Splitting up large tasks / files with <strong>@split</strong></a><ul>
+<li><a class="reference internal" href="#splitting-large-jobs">Splitting large jobs</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="checkpointing_code.html"
+ title="previous chapter"><strong>Chapter 10</strong>: Python Code for Checkpointing: Interrupted Pipelines and Exceptions</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="merge_code.html"
+ title="next chapter"><strong>Chapter 13</strong>: Python Code for <tt class="docutils literal"><span class="pre">@merge</span></tt> multiple input into a single result</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/split_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="merge_code.html" title="Chapter 13: Python Code for @merge multiple input into a single result"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="checkpointing_code.html" title="Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/subdivide_collate.html b/doc/_build/html/tutorials/new_tutorial/subdivide_collate.html
new file mode 100644
index 0000000..1a61d01
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/subdivide_collate.html
@@ -0,0 +1,397 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 16: @subdivide tasks to run efficiently and regroup with @collate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 17: @combinations, @permutations and all versus all @product" href="combinatorics.html" />
+ <link rel="prev" title="Chapter 15: Logging progress through a pipeline" href="logging.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="combinatorics.html" title="Chapter 17: @combinations, @permutations and all versus all @product"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 15: Logging progress through a pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-subdivide-collate-chapter-num-subdivide-tasks-to-run-efficiently-and-regroup-with-collate">
+<span id="new-manual-subdivide-collate"></span><span id="index-0"></span><h1><strong>Chapter 16</strong>: <a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> tasks to run efficiently and regroup with <a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a><a class="headerlink" href="#new-manual-subdivide-collate-chapter-num-subdivide-tasks-to-run-efficiently-and-regrou [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> syntax</li>
+<li><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a> syntax</li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>In <strong>Chapter 12</strong> and <strong>Chapter 13</strong>, we saw how a large
+task can be <a class="reference internal" href="split.html#new-manual-split"><em>@split</em></a> into small jobs to be analysed efficiently
+in parallel. Ruffus can then <a class="reference internal" href="split.html#new-manual-split"><em>@merge</em></a> these back together
+to give a single, unified result.</p>
+<p>This assumes that your pipeline is processing one item at a time. Usually, however, we
+will have, for example, 10 large pieces of data in play, each of which has to be
+subdivided into smaller pieces for analysis before being put back together.</p>
+<p>This is the role of <a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> and <a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@subdivide</em></a>.</p>
+<p>Like <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a>, the number of output files
+<a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> produces for <em>each</em> <strong>Input</strong> is not predetermined.</p>
+<p>On the other hand, these output files should be named in such a way that they can
+later be grouped back together later using <a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@subdivide</em></a>.</p>
+<p>This will be clearer with some worked examples.</p>
+</div></blockquote>
+</div>
+<div class="section" id="subdivide-in-parallel">
+<span id="new-manual-subdivide"></span><h2><a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> in parallel<a class="headerlink" href="#subdivide-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Let us start from 3 files with varying number of lines. We wish to process these two
+lines at a time but we do not know ahead of time how long each file is:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">random</span><span class="o">,</span> <span class="nn">sys</span>
+<span class="hll">
+</span><span class="c"># Create files a random number of lines</span>
+<span class="hll"><span class="nd">@originate</span><span class="p">([</span><span class="s">"a.start"</span><span class="p">,</span>
+</span> <span class="s">"b.start"</span><span class="p">,</span>
+ <span class="s">"c.start"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_test_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="n">cnt_lines</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">3</span><span class="p">)</span> <span class="o">*</span> <span class="mi">2</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">ii</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">cnt_lines</span><span class="p">):</span>
+ <span class="n">oo</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"data item = </span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="n">ii</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">" </span><span class="si">%s</span><span class="s"> has </span><span class="si">%d</span><span class="s"> lines"</span> <span class="o">%</span> <span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="n">cnt_lines</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># subdivide the input files into NNN fragment files of 2 lines each</span>
+<span class="c">#</span>
+<span class="nd">@subdivide</span><span class="p">(</span> <span class="n">create_test_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+ <span class="s">"{path[0]}/{basename[0]}.*.fragment"</span><span class="p">,</span>
+ <span class="s">"{path[0]}/{basename[0]}"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">subdivide_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">,</span> <span class="n">output_file_name_stem</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup any previous results</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">oo</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">oo</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># Output files contain two lines each</span>
+ <span class="c"># (new output files every even line)</span>
+ <span class="c">#</span>
+ <span class="n">cnt_output_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">ii</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">ii</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_output_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="s">"</span><span class="si">%s</span><span class="s">.</span><span class="si">%d</span><span class="s">.fragment"</span> <span class="o">%</span> <span class="p">(</span><span class="n">output_file_name_stem</span><span class="p">,</span> <span class="n">cnt_output_files</span><span class="p">)</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">" Subdivide </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Analyse each fragment independently</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">subdivide_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".fragment"</span><span class="p">),</span> <span class="s">".analysed"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">analyse_fragments</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" Analysing </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">):</span>
+ <span class="n">oo</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"analysed "</span> <span class="o">+</span> <span class="n">line</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This produces the following output:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+<span class="go"> a.start has 2 lines</span>
+<span class="go"> Job = [None -> a.start] completed</span>
+<span class="go"> b.start has 6 lines</span>
+<span class="go"> Job = [None -> b.start] completed</span>
+<span class="go"> c.start has 6 lines</span>
+<span class="go"> Job = [None -> c.start] completed</span>
+<span class="hll"><span class="go">Completed Task = create_test_files</span>
+</span>
+<span class="go"> Subdivide a.start -> /home/lg/temp/a.1.fragment</span>
+<span class="go"> Job = [a.start -> a.*.fragment, a] completed</span>
+
+<span class="go"> Subdivide b.start -> /home/lg/temp/b.1.fragment</span>
+<span class="go"> Subdivide b.start -> /home/lg/temp/b.2.fragment</span>
+<span class="go"> Subdivide b.start -> /home/lg/temp/b.3.fragment</span>
+<span class="go"> Job = [b.start -> b.*.fragment, b] completed</span>
+
+<span class="go"> Subdivide c.start -> /home/lg/temp/c.1.fragment</span>
+<span class="go"> Subdivide c.start -> /home/lg/temp/c.2.fragment</span>
+<span class="hll"><span class="go"> Subdivide c.start -> /home/lg/temp/c.3.fragment</span>
+</span><span class="go"> Job = [c.start -> c.*.fragment, c] completed</span>
+
+<span class="go">Completed Task = subdivide_files</span>
+
+<span class="go"> Analysing /home/lg/temp/a.1.fragment -> /home/lg/temp/a.1.analysed</span>
+<span class="go"> Job = [a.1.fragment -> a.1.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/b.1.fragment -> /home/lg/temp/b.1.analysed</span>
+<span class="go"> Job = [b.1.fragment -> b.1.analysed] completed</span>
+
+<span class="go"> [ ...SEE EXAMPLE CODE FOR MORE LINES ...]</span>
+
+<span class="go">Completed Task = analyse_fragments</span>
+</pre></div>
+</div>
+<p><tt class="docutils literal"><span class="pre">a.start</span></tt> has two lines and results in a single <tt class="docutils literal"><span class="pre">.fragment</span></tt> file,
+while there are 3 <tt class="docutils literal"><span class="pre">b.*.fragment</span></tt> files because it has 6 lines.
+Whatever their origin, all of the different fragment files are treated equally
+in <tt class="docutils literal"><span class="pre">analyse_fragments()</span></tt> and processed (in parallel) in the same way.</p>
+</div></blockquote>
+</div>
+<div class="section" id="grouping-using-collate">
+<span id="new-manual-collate"></span><h2>Grouping using <a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a><a class="headerlink" href="#grouping-using-collate" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>All that is left in our example is to reassemble the analysed fragments back together into
+3 sets of results corresponding to the original 3 pieces of starting data.</p>
+<p>This is straightforward by eye: the file names all have the same pattern: <tt class="docutils literal"><span class="pre">[abc].*.analysed</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><pre>a.1.analysed -> a.final_result
+b.1.analysed -> b.final_result
+b.2.analysed -> ..
+b.3.analysed -> ..
+c.1.analysed -> c.final_result
+c.2.analysed -> ..</pre>
+</div>
+</div></blockquote>
+<p><a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a> does something similar:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Specify a string substitution e.g. <tt class="docutils literal"><span class="pre">c.??.analysed</span> <span class="pre">-></span> <span class="pre">c.final_result</span></tt> and</li>
+<li>Ask <em>ruffus</em> to group together any <strong>Input</strong> (e.g. <tt class="docutils literal"><span class="pre">c.1.analysed</span></tt>, <tt class="docutils literal"><span class="pre">c.2.analysed</span></tt>)
+that will result in the same <strong>Output</strong> (e.g. <tt class="docutils literal"><span class="pre">c.final_result</span></tt>)</li>
+</ol>
+<div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># ``XXX.??.analysed -> XXX.final_result``</span>
+<span class="hll"><span class="c"># Group results using original names</span>
+</span><span class="c">#</span>
+<span class="hll"><span class="nd">@collate</span><span class="p">(</span> <span class="n">analyse_fragments</span><span class="p">,</span>
+</span>
+ <span class="c"># split file name into [abc].NUMBER.analysed</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">"/(?P<NAME>[abc]+)\.\d+\.analysed$"</span><span class="p">),</span>
+
+ <span class="s">"{path[0]}/{NAME[0]}.final_result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">recombine_analyses</span><span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="k">print</span> <span class="s">" Recombine </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">):</span>
+ <span class="n">oo</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>This produces the following output:</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="go"> Recombine /home/lg/temp/a.1.analysed -> /home/lg/temp/a.final_result</span>
+<span class="go"> Job = [[a.1.analysed] -> a.final_result] completed</span>
+<span class="go"> Recombine /home/lg/temp/b.1.analysed -> /home/lg/temp/b.final_result</span>
+<span class="go"> Recombine /home/lg/temp/b.2.analysed -> /home/lg/temp/b.final_result</span>
+<span class="go"> Recombine /home/lg/temp/b.3.analysed -> /home/lg/temp/b.final_result</span>
+<span class="go"> Job = [[b.1.analysed, b.2.analysed, b.3.analysed] -> b.final_result] completed</span>
+<span class="go"> Recombine /home/lg/temp/c.1.analysed -> /home/lg/temp/c.final_result</span>
+<span class="go"> Recombine /home/lg/temp/c.2.analysed -> /home/lg/temp/c.final_result</span>
+<span class="go"> Recombine /home/lg/temp/c.3.analysed -> /home/lg/temp/c.final_result</span>
+<span class="go"> Job = [[c.1.analysed, c.2.analysed, c.3.analysed] -> c.final_result] completed</span>
+<span class="hll"><span class="go">Completed Task = recombine_analyses</span>
+</span></pre></div>
+</div>
+</div></blockquote>
+<div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<ul class="last">
+<li><p class="first"><strong>Input</strong> file names are grouped together not in a guaranteed order.</p>
+<blockquote>
+<div><p>For example, the fragment files may not be sent to <tt class="docutils literal"><span class="pre">recombine_analyses(input_file_names,</span> <span class="pre">...)</span></tt>
+in alphabetically or any other useful order.</p>
+<p>You may want to sort <strong>Input</strong> before concatenation.</p>
+</div></blockquote>
+</li>
+<li><p class="first">All <strong>Input</strong> are grouped together if they have both the same <strong>Output</strong> <em>and</em> <strong>Extra</strong>
+parameters. If any string substitution is specified in any of the other <strong>Extra</strong> parameters
+to <a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a>, they must give the same answers for <strong>Input</strong>
+in the same group.</p>
+</li>
+</ul>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 16</strong>: <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#subdivide-in-parallel"><tt class="docutils literal"><span class="pre">@subdivide</span></tt> in parallel</a></li>
+<li><a class="reference internal" href="#grouping-using-collate">Grouping using <tt class="docutils literal"><span class="pre">@collate</span></tt></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="logging.html"
+ title="previous chapter"><strong>Chapter 15</strong>: Logging progress through a pipeline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="combinatorics.html"
+ title="next chapter"><strong>Chapter 17</strong>: <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/subdivide_collate.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="combinatorics.html" title="Chapter 17: @combinations, @permutations and all versus all @product"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="logging.html" title="Chapter 15: Logging progress through a pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/subdivide_collate_code.html b/doc/_build/html/tutorials/new_tutorial/subdivide_collate_code.html
new file mode 100644
index 0000000..8bd9472
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/subdivide_collate_code.html
@@ -0,0 +1,341 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 17: Python Code for @combinations, @permutations and all versus all @product" href="combinatorics_code.html" />
+ <link rel="prev" title="Chapter 15: Python Code for Logging progress through a pipeline" href="logging_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="combinatorics_code.html" title="Chapter 17: Python Code for @combinations, @permutations and all versus all @product"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="logging_code.html" title="Chapter 15: Python Code for Logging progress through a pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-subdivide-collate-chapter-num-python-code-for-subdivide-tasks-to-run-efficiently-and-regroup-with-collate">
+<span id="new-manual-subdivide-collate-code"></span><h1><strong>Chapter 16</strong>: Python Code for <a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> tasks to run efficiently and regroup with <a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a><a class="headerlink" href="#new-manual-subdivide-collate-chapter-num-python-code-for-subdivide-tasks-to-run-efficiently [...]
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/jobs_limit.html#decorators-jobs-limit"><em>@jobs_limit</em></a> syntax</li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run()</em></a> syntax</li>
+<li><a class="reference internal" href="../../drmaa_wrapper_functions.html#drmaa-wrapper-run-job"><em>drmaa_wrapper.run_job()</em></a> syntax</li>
+<li>Back to <strong>Chapter 16</strong>: <a class="reference internal" href="subdivide_collate.html#new-manual-subdivide-collate"><em>:ref:`@subdivide tasks to run efficiently and regroup with @collate</em></a></li>
+</ul>
+</div>
+<div class="section" id="subdivide-and-regroup-with-collate-example">
+<h2><a class="reference internal" href="../../decorators/subdivide.html#decorators-subdivide"><em>@subdivide</em></a> and regroup with <a class="reference internal" href="../../decorators/collate.html#decorators-collate"><em>@collate</em></a> example<a class="headerlink" href="#subdivide-and-regroup-with-collate-example" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">random</span><span class="o">,</span> <span class="nn">sys</span>
+
+<span class="c"># Create files a random number of lines</span>
+<span class="nd">@originate</span><span class="p">([</span><span class="s">"a.start"</span><span class="p">,</span>
+ <span class="s">"b.start"</span><span class="p">,</span>
+ <span class="s">"c.start"</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">create_test_files</span><span class="p">(</span><span class="n">output_file</span><span class="p">):</span>
+ <span class="n">cnt_lines</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">3</span><span class="p">)</span> <span class="o">*</span> <span class="mi">2</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">ii</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">cnt_lines</span><span class="p">):</span>
+ <span class="n">oo</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"data item = </span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="n">ii</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">" </span><span class="si">%s</span><span class="s"> has </span><span class="si">%d</span><span class="s"> lines"</span> <span class="o">%</span> <span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="n">cnt_lines</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="hll"><span class="c"># subdivide the input files into NNN fragment files of 2 lines each</span>
+</span><span class="c">#</span>
+<span class="nd">@subdivide</span><span class="p">(</span> <span class="n">create_test_files</span><span class="p">,</span>
+ <span class="n">formatter</span><span class="p">(),</span>
+ <span class="s">"{path[0]}/{basename[0]}.*.fragment"</span><span class="p">,</span>
+ <span class="s">"{path[0]}/{basename[0]}"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">subdivide_files</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_files</span><span class="p">,</span> <span class="n">output_file_name_stem</span><span class="p">):</span>
+ <span class="c">#</span>
+ <span class="c"># cleanup any previous results</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">oo</span> <span class="ow">in</span> <span class="n">output_files</span><span class="p">:</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">oo</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># Output files contain two lines each</span>
+ <span class="c"># (new output files every even line)</span>
+ <span class="c">#</span>
+ <span class="n">cnt_output_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">ii</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">ii</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_output_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file_name</span> <span class="o">=</span> <span class="s">"</span><span class="si">%s</span><span class="s">.</span><span class="si">%d</span><span class="s">.fragment"</span> <span class="o">%</span> <span class="p">(</span><span class="n">output_file_name_stem</span><span class="p">,</span> <span class="n">cnt_output_files</span><span class="p">)</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="s">" Subdivide </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Analyse each fragment independently</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">subdivide_files</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".fragment"</span><span class="p">),</span> <span class="s">".analysed"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">analyse_fragments</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">print</span> <span class="s">" Analysing </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">):</span>
+ <span class="n">oo</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"analysed "</span> <span class="o">+</span> <span class="n">line</span><span class="p">)</span>
+
+
+<span class="c">#</span>
+<span class="c"># Group results using original names</span>
+<span class="c">#</span>
+<span class="nd">@collate</span><span class="p">(</span> <span class="n">analyse_fragments</span><span class="p">,</span>
+
+ <span class="c"># split file name into [abc].NUMBER.analysed</span>
+ <span class="n">formatter</span><span class="p">(</span><span class="s">"/(?P<NAME>[abc]+)\.\d+\.analysed$"</span><span class="p">),</span>
+
+ <span class="s">"{path[0]}/{NAME[0]}.final_result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">recombine_analyses</span><span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">oo</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="k">print</span> <span class="s">" Recombine </span><span class="si">%s</span><span class="s"> -> </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">):</span>
+ <span class="n">oo</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+
+
+
+<span class="c">#pipeline_printout(sys.stdout, verbose = 3)</span>
+
+
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Results in</p>
+<div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+
+<span class="go"> a.start has 2 lines</span>
+<span class="go"> Job = [None -> a.start] completed</span>
+<span class="go"> b.start has 6 lines</span>
+<span class="go"> Job = [None -> b.start] completed</span>
+<span class="go"> c.start has 6 lines</span>
+<span class="go"> Job = [None -> c.start] completed</span>
+<span class="go">Completed Task = create_test_files</span>
+
+<span class="go"> Subdivide a.start -> /home/lg/temp/a.1.fragment</span>
+<span class="go"> Job = [a.start -> a.*.fragment, a] completed</span>
+<span class="go"> Subdivide b.start -> /home/lg/temp/b.1.fragment</span>
+<span class="go"> Subdivide b.start -> /home/lg/temp/b.2.fragment</span>
+<span class="go"> Subdivide b.start -> /home/lg/temp/b.3.fragment</span>
+<span class="go"> Job = [b.start -> b.*.fragment, b] completed</span>
+<span class="go"> Subdivide c.start -> /home/lg/temp/c.1.fragment</span>
+<span class="go"> Subdivide c.start -> /home/lg/temp/c.2.fragment</span>
+<span class="go"> Subdivide c.start -> /home/lg/temp/c.3.fragment</span>
+<span class="go"> Job = [c.start -> c.*.fragment, c] completed</span>
+<span class="go">Completed Task = subdivide_files</span>
+
+<span class="go"> Analysing /home/lg/temp/a.1.fragment -> /home/lg/temp/a.1.analysed</span>
+<span class="go"> Job = [a.1.fragment -> a.1.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/b.1.fragment -> /home/lg/temp/b.1.analysed</span>
+<span class="go"> Job = [b.1.fragment -> b.1.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/b.2.fragment -> /home/lg/temp/b.2.analysed</span>
+<span class="go"> Job = [b.2.fragment -> b.2.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/b.3.fragment -> /home/lg/temp/b.3.analysed</span>
+<span class="go"> Job = [b.3.fragment -> b.3.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/c.1.fragment -> /home/lg/temp/c.1.analysed</span>
+<span class="go"> Job = [c.1.fragment -> c.1.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/c.2.fragment -> /home/lg/temp/c.2.analysed</span>
+<span class="go"> Job = [c.2.fragment -> c.2.analysed] completed</span>
+<span class="go"> Analysing /home/lg/temp/c.3.fragment -> /home/lg/temp/c.3.analysed</span>
+<span class="go"> Job = [c.3.fragment -> c.3.analysed] completed</span>
+<span class="go">Completed Task = analyse_fragments</span>
+
+<span class="go"> Recombine /home/lg/temp/a.1.analysed -> /home/lg/temp/a.final_result</span>
+<span class="go"> Job = [[a.1.analysed] -> a.final_result] completed</span>
+<span class="go"> Recombine /home/lg/temp/b.1.analysed -> /home/lg/temp/b.final_result</span>
+<span class="go"> Recombine /home/lg/temp/b.2.analysed -> /home/lg/temp/b.final_result</span>
+<span class="go"> Recombine /home/lg/temp/b.3.analysed -> /home/lg/temp/b.final_result</span>
+<span class="go"> Job = [[b.1.analysed, b.2.analysed, b.3.analysed] -> b.final_result] completed</span>
+<span class="go"> Recombine /home/lg/temp/c.1.analysed -> /home/lg/temp/c.final_result</span>
+<span class="go"> Recombine /home/lg/temp/c.2.analysed -> /home/lg/temp/c.final_result</span>
+<span class="go"> Recombine /home/lg/temp/c.3.analysed -> /home/lg/temp/c.final_result</span>
+<span class="go"> Job = [[c.1.analysed, c.2.analysed, c.3.analysed] -> c.final_result] completed</span>
+<span class="go">Completed Task = recombine_analyses</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 16</strong>: Python Code for <tt class="docutils literal"><span class="pre">@subdivide</span></tt> tasks to run efficiently and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt></a><ul>
+<li><a class="reference internal" href="#subdivide-and-regroup-with-collate-example"><tt class="docutils literal"><span class="pre">@subdivide</span></tt> and regroup with <tt class="docutils literal"><span class="pre">@collate</span></tt> example</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="logging_code.html"
+ title="previous chapter"><strong>Chapter 15</strong>: Python Code for Logging progress through a pipeline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="combinatorics_code.html"
+ title="next chapter"><strong>Chapter 17</strong>: Python Code for <tt class="docutils literal"><span class="pre">@combinations</span></tt>, <tt class="docutils literal"><span class="pre">@permutations</span></tt> and all versus all <tt class="docutils literal"><span class="pre">@product</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/subdivide_collate_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="combinatorics_code.html" title="Chapter 17: Python Code for @combinations, @permutations and all versus all @product"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="logging_code.html" title="Chapter 15: Python Code for Logging progress through a pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/transform.html b/doc/_build/html/tutorials/new_tutorial/transform.html
new file mode 100644
index 0000000..6b6343f
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/transform.html
@@ -0,0 +1,375 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 2: Transforming data in a pipeline with @transform — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 3: More on @transform-ing data" href="transform_in_parallel.html" />
+ <link rel="prev" title="Chapter 1: An introduction to basic Ruffus syntax" href="introduction.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform_in_parallel.html" title="Chapter 3: More on @transform-ing data"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="introduction.html" title="Chapter 1: An introduction to basic Ruffus syntax"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-transform-chapter-num-transforming-data-in-a-pipeline-with-transform">
+<span id="new-manual-transform"></span><span id="index-0"></span><h1><strong>Chapter 2</strong>: Transforming data in a pipeline with <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a><a class="headerlink" href="#new-manual-transform-chapter-num-transforming-data-in-a-pipeline-with-transform" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> syntax</li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="transform_code.html#new-manual-transform-code"><em>Chapter 1: Python Code for Transforming data in a pipeline with @transform</em></a></li>
+</ul>
+</div>
+<div class="section" id="review">
+<h2>Review<a class="headerlink" href="#review" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/theoretical_pipeline_schematic.png"><img alt="../../_images/theoretical_pipeline_schematic.png" src="../../_images/theoretical_pipeline_schematic.png" style="width: 610.0px; height: 71.0px;" /></a>
+<p>Computational pipelines transform your data in stages until the final result is produced.
+Ruffus automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+and tell Ruffus how these pipeline stages or <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions are connected together.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>The best way to design a pipeline is to:</strong></p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><strong>write down the file names of the data as it flows across your pipeline</strong></li>
+<li><strong>write down the names of functions which transforms the data at each stage of the pipeline.</strong></li>
+</ul>
+</div></blockquote>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="task-functions-as-recipes">
+<h2>Task functions as recipes<a class="headerlink" href="#task-functions-as-recipes" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Each <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function of the pipeline is a recipe or
+<a class="reference external" href="http://www.gnu.org/software/make/manual/make.html#Rule-Introduction">rule</a>
+which can be applied repeatedly to our data.</p>
+<p>For example, one can have</p>
+<blockquote>
+<div><ul class="simple">
+<li>a <tt class="docutils literal"><span class="pre">compile()</span></tt> <em>task</em> which will compile any number of source code files, or</li>
+<li>a <tt class="docutils literal"><span class="pre">count_lines()</span></tt> <em>task</em> which will count the number of lines in any file or</li>
+<li>an <tt class="docutils literal"><span class="pre">align_dna()</span></tt> <em>task</em> which will align the DNA of many chromosomes.</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="transform-is-a-1-to-1-operation">
+<span id="index-1"></span><h2><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> is a 1 to 1 operation<a class="headerlink" href="#transform-is-a-1-to-1-operation" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">@transform</span></tt> is a 1:1 operation because for each input, it generates one output.</p>
+<a class="reference internal image-reference" href="../../_images/transform_1_to_1_example.png"><img alt="../../_images/transform_1_to_1_example.png" src="../../_images/transform_1_to_1_example.png" style="width: 535.5px; height: 371.0px;" /></a>
+<p>This is obvious when you count the number of jobs at each step. In our example pipeline, there are always
+three jobs moving through in step at each stage (<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>).</p>
+<p>Each <strong>Input</strong> or <strong>Output</strong> is not limited, however, to a single filename. Each job can accept, for example,
+a pair of files as its <strong>Input</strong>, or generate more than one file or a dictionary or numbers as its <strong>Output</strong>.</p>
+<p>When each job outputs a pair of files, this does not generate two jobs downstream. It just means that the successive
+<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> in the pipeline will receive a list or tuple of files as its input parameter.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>The different sort of decorators in Ruffus determine the <em>topology</em> of your pipeline,
+i.e. how the jobs from different tasks are linked together seamlessly.</p>
+<p><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> always generates one <strong>Output</strong> for one <strong>Input</strong>.</p>
+<p>In the later parts of the tutorial, we will encounter more decorators which can <em>split up</em>, or <em>join together</em> or <em>group</em> inputs.</p>
+<p class="last">In other words, using other decorators <strong>Input</strong> and <strong>Output</strong> can have <strong>many to one</strong>, <strong>many to many</strong> etc. relationships.</p>
+</div>
+</div></blockquote>
+<div class="section" id="a-pair-of-files-as-the-input">
+<h3>A pair of files as the <strong>Input</strong><a class="headerlink" href="#a-pair-of-files-as-the-input" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>Let us rewrite our previous example so that the <strong>Input</strong> of the first task
+are <a class="reference external" href="http://en.wikipedia.org/wiki/DNA_sequencing_theory#Pairwise_end-sequencing">matching pairs</a>
+of DNA sequence files, processed in tandem.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="n">starting_files</span> <span class="o">=</span> <span class="p">[(</span><span class="s">"a.1.fastq"</span><span class="p">,</span> <span class="s">"a.2.fastq"</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"a.1.fastq"</span><span class="p">,</span> <span class="s">"a.2.fastq"</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"a.1.fastq"</span><span class="p">,</span> <span class="s">"a.2.fastq"</span><span class="p">)]</span>
+<span class="hll">
+</span><span class="hll"><span class="c">#</span>
+</span><span class="hll"><span class="c"># STAGE 1 fasta->sam</span>
+</span><span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">starting_files</span><span class="p">,</span> <span class="c"># Input = starting files</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".1.fastq"</span><span class="p">),</span> <span class="c"># suffix = .1.fastq</span>
+ <span class="s">".sam"</span><span class="p">)</span> <span class="c"># Output suffix = .sam</span>
+<span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="c"># remember there are two input files now</span>
+ <span class="n">ii1</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_files</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+<span class="hll"> <span class="n">ii2</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_files</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+</span><span class="hll"> <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</span></pre></div>
+</div>
+</div></blockquote>
+<p>The only changes are to the first task:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="go">pipeline_run()</span>
+<span class="go"> Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed</span>
+<span class="go"> Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed</span>
+<span class="go"> Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed</span>
+<span class="go">Completed Task = map_dna_sequence</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><a class="reference internal" href="../../decorators/indicator_objects.html#decorators-suffix"><em>suffix</em></a> always matches only the first file name in each <strong>Input</strong>.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="input-and-output-parameters">
+<span id="index-2"></span><h2><strong>Input</strong> and <strong>Output</strong> parameters<a class="headerlink" href="#input-and-output-parameters" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><strong>Ruffus</strong> chains together different tasks by taking the <strong>Output</strong> from one job
+and plugging it automatically as the <strong>Input</strong> of the next.</p>
+<p>The first two parameters of each job are the <strong>Input</strong> and <strong>Output</strong> parameters respectively.</p>
+<p>In the above example, we have:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [a.bam -> a.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [b.bam -> b.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [c.bam -> c.statistics, use_linear_model] completed</span>
+<span class="go">Completed Task = summarise_bam_file</span>
+</pre></div>
+</div>
+<table border="1" class="docutils">
+<caption>Parameters for <tt class="docutils literal"><span class="pre">summarise_bam_file()</span></tt></caption>
+<colgroup>
+<col width="20%" />
+<col width="25%" />
+<col width="56%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head"><strong>Inputs</strong></th>
+<th class="head"><strong>Outputs</strong></th>
+<th class="head"><strong>Extra</strong></th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">"a.bam"</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">"a.statistics"</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">"use_linear_model"</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">"b.bam"</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">"b.statistics"</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">"use_linear_model"</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">"c.bam"</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">"c.statistics"</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">"use_linear_model"</span></tt></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p><strong>Extra</strong> parameters are for the consumption of <tt class="docutils literal"><span class="pre">summarise_bam_file()</span></tt> and will not passed to the next task.</p>
+<p>Ruffus was designed for pipelines which save intermediate data in files. This is not
+compulsory but saving your data in files at each step provides many advantages:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Ruffus can use file system time stamps to check if your pipeline is up to date</li>
+<li>Your data is persistent across runs</li>
+<li>This is a good way to pass large amounts of data across processes and computational nodes</li>
+</ol>
+</div></blockquote>
+<p>Nevertheless, <em>all</em> the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> parameters can include anything which suits your workflow, from lists of files, to numbers,
+sets or tuples. <em>Ruffus</em> imposes few constraints on what <em>you</em>
+would like to send to each stage of your pipeline.</p>
+<p><em>Ruffus</em> does, however, assume that if the <strong>Input</strong> and <strong>Output</strong> parameter contains strings, these will be interpreted as file names
+required by and produced by that job. As we shall see, the modification times of these file names
+indicate whether that part of the pipeline is up to date or needs to be rerun.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 2</strong>: Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a><ul>
+<li><a class="reference internal" href="#review">Review</a></li>
+<li><a class="reference internal" href="#task-functions-as-recipes">Task functions as recipes</a></li>
+<li><a class="reference internal" href="#transform-is-a-1-to-1-operation"><tt class="docutils literal"><span class="pre">@transform</span></tt> is a 1 to 1 operation</a><ul>
+<li><a class="reference internal" href="#a-pair-of-files-as-the-input">A pair of files as the <strong>Input</strong></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#input-and-output-parameters"><strong>Input</strong> and <strong>Output</strong> parameters</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="introduction.html"
+ title="previous chapter"><strong>Chapter 1</strong>: An introduction to basic <em>Ruffus</em> syntax</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform_in_parallel.html"
+ title="next chapter"><strong>Chapter 3</strong>: More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/transform.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform_in_parallel.html" title="Chapter 3: More on @transform-ing data"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="introduction.html" title="Chapter 1: An introduction to basic Ruffus syntax"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/transform_code.html b/doc/_build/html/tutorials/new_tutorial/transform_code.html
new file mode 100644
index 0000000..e210228
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/transform_code.html
@@ -0,0 +1,289 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 1: Python Code for Transforming data in a pipeline with @transform — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 3: Python Code for More on @transform-ing data" href="transform_in_parallel_code.html" />
+ <link rel="prev" title="Chapter 1: Python Code for An introduction to basic Ruffus syntax" href="introduction_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="transform_in_parallel_code.html" title="Chapter 3: Python Code for More on @transform-ing data"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="introduction_code.html" title="Chapter 1: Python Code for An introduction to basic Ruffus syntax"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-introduction-chapter-num-python-code-for-transforming-data-in-a-pipeline-with-transform">
+<span id="new-manual-transform-code"></span><h1><strong>Chapter 1</strong>: Python Code for Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt><a class="headerlink" href="#new-manual-introduction-chapter-num-python-code-for-transforming-data-in-a-pipeline-with-transform" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 2</strong>: <a class="reference internal" href="transform.html#new-manual-transform"><em>Transforming data in a pipeline with @transform</em></a></li>
+</ul>
+</div>
+<div class="section" id="your-first-ruffus-script">
+<h2>Your first Ruffus script<a class="headerlink" href="#your-first-ruffus-script" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># The starting data files would normally exist beforehand!</span>
+<span class="c"># We create some empty files for this example</span>
+<span class="c">#</span>
+<span class="n">starting_files</span> <span class="o">=</span> <span class="p">[(</span><span class="s">"a.1.fastq"</span><span class="p">,</span> <span class="s">"a.2.fastq"</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"b.1.fastq"</span><span class="p">,</span> <span class="s">"b.2.fastq"</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">"c.1.fastq"</span><span class="p">,</span> <span class="s">"c.2.fastq"</span><span class="p">)]</span>
+
+
+<span class="k">for</span> <span class="n">ff_pair</span> <span class="ow">in</span> <span class="n">starting_files</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">ff_pair</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">ff_pair</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 1 fasta->sam</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">starting_files</span><span class="p">,</span> <span class="c"># Input = starting files</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".1.fastq"</span><span class="p">),</span> <span class="c"># suffix = .1.fastq</span>
+ <span class="s">".sam"</span><span class="p">)</span> <span class="c"># Output suffix = .sam</span>
+<span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="c"># remember there are two input files now</span>
+ <span class="n">ii1</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_files</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="n">ii2</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_files</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 2 sam->bam</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">map_dna_sequence</span><span class="p">,</span> <span class="c"># Input = previous stage</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".sam"</span><span class="p">),</span> <span class="c"># suffix = .sam</span>
+ <span class="s">".bam"</span><span class="p">)</span> <span class="c"># Output suffix = .bam</span>
+<span class="k">def</span> <span class="nf">compress_sam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#</span>
+<span class="c"># STAGE 3 bam->statistics</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">compress_sam_file</span><span class="p">,</span> <span class="c"># Input = previous stage</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".bam"</span><span class="p">),</span> <span class="c"># suffix = .bam</span>
+ <span class="s">".statistics"</span><span class="p">,</span> <span class="c"># Output suffix = .statistics</span>
+ <span class="s">"use_linear_model"</span><span class="p">)</span> <span class="c"># Extra statistics parameter</span>
+<span class="k">def</span> <span class="nf">summarise_bam_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">,</span>
+ <span class="n">extra_stats_parameter</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Sketch of real analysis function</span>
+<span class="sd"> """</span>
+ <span class="n">ii</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">)</span>
+ <span class="n">oo</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">()</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">()</span>
+<span class="go"> Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed</span>
+<span class="go"> Job = [[b.1.fastq, b.2.fastq] -> b.sam] completed</span>
+<span class="go"> Job = [[c.1.fastq, c.2.fastq] -> c.sam] completed</span>
+<span class="go">Completed Task = map_dna_sequence</span>
+<span class="go"> Job = [a.sam -> a.bam] completed</span>
+<span class="go"> Job = [b.sam -> b.bam] completed</span>
+<span class="go"> Job = [c.sam -> c.bam] completed</span>
+<span class="go">Completed Task = compress_sam_file</span>
+<span class="go"> Job = [a.bam -> a.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [b.bam -> b.statistics, use_linear_model] completed</span>
+<span class="go"> Job = [c.bam -> c.statistics, use_linear_model] completed</span>
+<span class="go">Completed Task = summarise_bam_file</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 1</strong>: Python Code for Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a><ul>
+<li><a class="reference internal" href="#your-first-ruffus-script">Your first Ruffus script</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="introduction_code.html"
+ title="previous chapter"><strong>Chapter 1</strong>: Python Code for An introduction to basic Ruffus syntax</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="transform_in_parallel_code.html"
+ title="next chapter"><strong>Chapter 3</strong>: Python Code for More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/transform_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="transform_in_parallel_code.html" title="Chapter 3: Python Code for More on @transform-ing data"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="introduction_code.html" title="Chapter 1: Python Code for An introduction to basic Ruffus syntax"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/transform_in_parallel.html b/doc/_build/html/tutorials/new_tutorial/transform_in_parallel.html
new file mode 100644
index 0000000..30c13fa
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/transform_in_parallel.html
@@ -0,0 +1,530 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 3: More on @transform-ing data — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 4: Creating files with @originate" href="originate.html" />
+ <link rel="prev" title="Chapter 2: Transforming data in a pipeline with @transform" href="transform.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="originate.html" title="Chapter 4: Creating files with @originate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 2: Transforming data in a pipeline with @transform"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-transform-in-parallel-chapter-num-more-on-transform-ing-data">
+<span id="new-manual-transform-in-parallel"></span><span id="index-0"></span><h1><strong>Chapter 3</strong>: More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data<a class="headerlink" href="#new-manual-transform-in-parallel-chapter-num-more-on-transform-ing-data" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> syntax</li>
+</ul>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="transform_in_parallel_code.html#new-manual-transform-in-parallel-code"><em>Chapter 3: Python Code for More on @transform-ing data</em></a></li>
+</ul>
+</div>
+<div class="section" id="review">
+<h2>Review<a class="headerlink" href="#review" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><a class="reference internal image-reference" href="../../_images/theoretical_pipeline_schematic.png"><img alt="../../_images/theoretical_pipeline_schematic.png" src="../../_images/theoretical_pipeline_schematic.png" style="width: 610.0px; height: 71.0px;" /></a>
+<p>Computational pipelines transform your data in stages until the final result is produced.
+<em>Ruffus</em> automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+and tell <em>Ruffus</em> how these pipeline stages or <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions are connected together.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>The best way to design a pipeline is to:</strong></p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><strong>write down the file names of the data as it flows across your pipeline</strong></li>
+<li><strong>write down the names of functions which transforms the data at each stage of the pipeline.</strong></li>
+</ul>
+</div></blockquote>
+</div>
+<p><a class="reference internal" href="introduction.html#new-manual-introduction"><em>Chapter 1: An introduction to basic Ruffus syntax</em></a> described the bare bones of a simple <em>Ruffus</em> pipeline.</p>
+<p>Using the <em>Ruffus</em> <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> decorator, we were able to
+specify the data files moving through our pipeline so that our specified task functions
+could be invoked.</p>
+<p>This may seem like a lot of effort and complication for something so simple: a couple of
+simple python function calls we could have invoked ourselves.
+However, By letting <em>Ruffus</em> manage your pipeline parameters, you will get the following features
+for free:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Only out-of-date parts of the pipeline will be re-run</li>
+<li>Multiple jobs can be run in parallel (on different processors if possible)</li>
+<li>Pipeline stages can be chained together automatically. This means you can apply your
+pipeline just as easily to 1000 files as to 3.</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="running-pipelines-in-parallel">
+<h2>Running pipelines in parallel<a class="headerlink" href="#running-pipelines-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Even though three sets of files have been specified for our initial pipeline, and they can be
+processed completely independently, by default <em>Ruffus</em> runs each of them serially in succession.</p>
+<p>To ask <em>Ruffus</em> to run them in parallel, all you have to do is to add a <tt class="docutils literal"><span class="pre">multiprocess</span></tt> parameter to <tt class="docutils literal"><span class="pre">pipeline_run</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>In this case, we are telling <em>Ruffus</em> to run a maximum of 5 jobs at the same time. Since we only have
+three sets of data, that is as much parallelism as we are going to get...</p>
+</div></blockquote>
+</div>
+<div class="section" id="up-to-date-jobs-are-not-re-run-unnecessarily">
+<span id="new-manual-only-rerun-out-of-date"></span><h2>Up-to-date jobs are not re-run unnecessarily<a class="headerlink" href="#up-to-date-jobs-are-not-re-run-unnecessarily" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A job will be run only if the output file timestamps are out of date.
+If you ran our example code a second time, nothing would happen because all the work is already complete.</p>
+<p>We can check the details by asking <em>Ruffus</em> for more <tt class="docutils literal"><span class="pre">verbose</span></tt> output</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">(</span><span class="n">verbose</span> <span class="o">=</span> <span class="mi">4</span><span class="p">)</span>
+<span class="go"> Task = map_dna_sequence</span>
+<span class="go"> All jobs up to date</span>
+<span class="go"> Task = compress_sam_file</span>
+<span class="go"> All jobs up to date</span>
+<span class="go"> Task = summarise_bam_file</span>
+<span class="go"> All jobs up to date</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>Nothing happens because:</dt>
+<dd><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">a.sam</span></tt> was created later than <tt class="docutils literal"><span class="pre">a.1.fastq</span></tt> and <tt class="docutils literal"><span class="pre">a.2.fastq</span></tt>, and</li>
+<li><tt class="docutils literal"><span class="pre">a.bam</span></tt> was created later than <tt class="docutils literal"><span class="pre">a.sam</span></tt> and</li>
+<li><tt class="docutils literal"><span class="pre">a.statistics</span></tt> was created later than <tt class="docutils literal"><span class="pre">a.bam</span></tt>.</li>
+</ul>
+</dd>
+</dl>
+<p>and so on...</p>
+<dl class="docutils">
+<dt>Let us see what happens if we recreated the file <tt class="docutils literal"><span class="pre">a.1.fastq</span></tt> so that it appears as if 1 out of the original data files is out of date</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">open</span><span class="p">(</span><span class="s">"a.1.fastq"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">(</span><span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>The up to date jobs are cleverly ignored and only the out of date files are reprocessed.</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="go"> >>> open("a.1.fastq", "w")</span>
+<span class="go"> >>> pipeline_run(verbose=2)</span>
+<span class="hll"><span class="go"> Job = [[b.1.fastq, b.2.fastq] -> b.sam] # unnecessary: already up to date</span>
+</span><span class="hll"><span class="go"> Job = [[c.1.fastq, c.2.fastq] -> c.sam] # unnecessary: already up to date</span>
+</span><span class="go"> Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed</span>
+<span class="go"> Completed Task = map_dna_sequence</span>
+<span class="hll"><span class="go"> Job = [b.sam -> b.bam] # unnecessary: already up to date</span>
+</span><span class="hll"><span class="go"> Job = [c.sam -> c.bam] # unnecessary: already up to date</span>
+</span><span class="go"> Job = [a.sam -> a.bam] completed</span>
+<span class="go"> Completed Task = compress_sam_file</span>
+<span class="hll"><span class="go"> Job = [b.bam -> b.statistics, use_linear_model] # unnecessary: already up to date</span>
+</span><span class="hll"><span class="go"> Job = [c.bam -> c.statistics, use_linear_model] # unnecessary: already up to date</span>
+</span><span class="go"> Job = [a.bam -> a.statistics, use_linear_model] completed</span>
+<span class="go"> Completed Task = summarise_bam_file</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="defining-pipeline-tasks-out-of-order">
+<span id="new-manual-output-from"></span><span id="index-1"></span><h2>Defining pipeline tasks out of order<a class="headerlink" href="#defining-pipeline-tasks-out-of-order" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>The examples so far assumes that all your pipelined tasks are defined in order.
+(<tt class="docutils literal"><span class="pre">first_task</span></tt> before <tt class="docutils literal"><span class="pre">second_task</span></tt>). This is usually the most sensible way to arrange your code.</p>
+<p>If you wish to refer to tasks which are not yet defined, you can do so by quoting the function name as a string and wrapping
+it with the <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-indicator-objects"><em>indicator class</em></a> <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-output-from"><em>output_from(...)</em></a> so that <em>Ruffus</em>
+knowns this is a <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> name, not a file name</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre> <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># second task</span>
+ <span class="c">#</span>
+<span class="hll"> <span class="c"># task name string wrapped in output_from(...)</span>
+</span> <span class="nd">@transform</span><span class="p">(</span><span class="n">output_from</span><span class="p">(</span><span class="s">"first_task"</span><span class="p">),</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># first task</span>
+ <span class="c">#</span>
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># Run</span>
+ <span class="c">#</span>
+ <span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>You can also refer to tasks (functions) in other modules, in which case the full
+qualified name must be used:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">output_from</span><span class="p">(</span><span class="s">"other_module.first_task"</span><span class="p">),</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="multiple-dependencies">
+<span id="new-manual-transform-multiple-dependencies"></span><span id="index-2"></span><h2>Multiple dependencies<a class="headerlink" href="#multiple-dependencies" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Each task can depend on more than one antecedent simply by chaining to a list in <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a></p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre> <span class="c">#</span>
+<span class="hll"> <span class="c"># third_task depends on both first_task() and second_task()</span>
+</span> <span class="c">#</span>
+ <span class="nd">@transform</span><span class="p">([</span><span class="n">first_task</span><span class="p">,</span> <span class="n">second_task</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">third_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><tt class="docutils literal"><span class="pre">third_task()</span></tt> depends on and follows both <tt class="docutils literal"><span class="pre">first_task()</span></tt> and <tt class="docutils literal"><span class="pre">second_task()</span></tt>. However, these latter two tasks are independent of each other
+and can and will run in parallel. This can be clearly shown for our example if we added a little randomness to the run time of each job:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">())</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The execution of <tt class="docutils literal"><span class="pre">first_task()</span></tt> and <tt class="docutils literal"><span class="pre">second_task()</span></tt> jobs will be interleaved and they finish in no particular order:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">third_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = second_task</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">See the <a class="reference internal" href="transform_in_parallel_code.html#new-manual-transform-multiple-dependencies-code"><em>example code</em></a></p>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="follows">
+<span id="new-manual-follows"></span><span id="index-3"></span><h2><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a><a class="headerlink" href="#follows" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If there is some extrinsic reason one non-dependent task has to precede the other, then this can be specified explicitly using <a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre> <span class="c">#</span>
+<span class="hll"> <span class="c"># @follows specifies a preceding task</span>
+</span> <span class="c">#</span>
+ <span class="nd">@follows</span><span class="p">(</span><span class="s">"first_task"</span><span class="p">)</span>
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">second_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a> specifies either a preceding task (e.g. <tt class="docutils literal"><span class="pre">first_task</span></tt>), or if
+it has not yet been defined, the name (as a string) of a task function (e.g. <tt class="docutils literal"><span class="pre">"first_task"</span></tt>).</p>
+<p>With the addition of <a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a>, all the jobs
+of <tt class="docutils literal"><span class="pre">second_task()</span></tt> start <em>after</em> those from <tt class="docutils literal"><span class="pre">first_task()</span></tt> have finished:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">third_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go"> Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = second_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="making-directories-automatically-with-follows-and-mkdir">
+<span id="new-manual-follows-mkdir"></span><span id="index-4"></span><h2>Making directories automatically with <a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a> and <a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>mkdir</em></a><a class="headerlink" href="#making-directories-automatically-with-follows-and-mkdir" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a> is also useful for making sure one or more destination directories
+exist before a task is run.</p>
+<p><em>Ruffus</em> provides special syntax to support this, using the special
+<a class="reference internal" href="../../decorators/mkdir.html#decorators-mkdir"><em>mkdir</em></a> indicator class. For example:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="hll"><span class="c"># @follows specifies both a preceding task and a directory name</span>
+</span><span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="s">"first_task"</span><span class="p">,</span> <span class="n">mkdir</span><span class="p">(</span><span class="s">"output/results/here"</span><span class="p">))</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">second_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Before <tt class="docutils literal"><span class="pre">second_task()</span></tt> is run, the <tt class="docutils literal"><span class="pre">output/results/here</span></tt> directory will be created if necessary.</p>
+</div></blockquote>
+</div>
+<div class="section" id="globs-in-the-input-parameter">
+<span id="new-manual-globs-as-input"></span><span id="index-5"></span><h2>Globs in the <strong>Input</strong> parameter<a class="headerlink" href="#globs-in-the-input-parameter" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><ul class="simple">
+<li>As a syntactic convenience, <em>Ruffus</em> also allows you to specify a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern (e.g. <tt class="docutils literal"><span class="pre">*.txt</span></tt>) in the
+<strong>Input</strong> parameter.</li>
+<li><a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns will be automatically specify all matching file names as the <strong>Input</strong>.</li>
+<li>Any strings within <strong>Input</strong> which contain the letters: <tt class="docutils literal"><span class="pre">*?[]</span></tt> will be treated as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern.</li>
+</ul>
+<p>The first function in our initial <em>Ruffus</em> pipeline example could have been written as:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c">#</span>
+<span class="c"># STAGE 1 fasta->sam</span>
+<span class="c">#</span>
+<span class="hll"><span class="nd">@transform</span><span class="p">(</span><span class="s">"*.fasta"</span><span class="p">,</span> <span class="c"># Input = glob</span>
+</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".fasta"</span><span class="p">),</span> <span class="c"># suffix = .fasta</span>
+ <span class="s">".sam"</span><span class="p">)</span> <span class="c"># Output suffix = .sam</span>
+<span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="mixing-tasks-and-globs-in-the-input-parameter">
+<span id="index-6"></span><h2>Mixing Tasks and Globs in the <strong>Input</strong> parameter<a class="headerlink" href="#mixing-tasks-and-globs-in-the-input-parameter" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> patterns, references to tasks and file names strings
+can be mixed freely in (nested) python lists and tuples in the <strong>Input</strong> parameter.</p>
+<p>For example, a task function can chain to the <strong>Output</strong> from multiple upstream tasks:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">([</span><span class="n">task1</span><span class="p">,</span> <span class="n">task2</span><span class="p">,</span> <span class="c"># Input = multiple tasks</span>
+ <span class="s">"aa*.fasta"</span><span class="p">,</span> <span class="o">+</span> <span class="nb">all</span> <span class="n">files</span> <span class="n">matching</span> <span class="n">glob</span>
+ <span class="s">"zz.fasta"</span><span class="p">]</span> <span class="o">+</span> <span class="nb">file</span> <span class="n">name</span>
+ <span class="n">suffix</span><span class="p">(</span><span class="s">".fasta"</span><span class="p">),</span> <span class="c"># suffix = .fasta</span>
+ <span class="s">".sam"</span><span class="p">)</span> <span class="c"># Output suffix = .sam</span>
+<span class="k">def</span> <span class="nf">map_dna_sequence</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span>
+ <span class="n">output_file</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>In all cases, <em>Ruffus</em> tries to do the right thing, and to make the simple or
+obvious case require the simplest, least onerous syntax.</p>
+<p>If sometimes <em>Ruffus</em> does not behave the way you expect, please write to the authors:
+it may be a bug!</p>
+<p><a class="reference internal" href="pipeline_printout.html#new-manual-pipeline-printout"><em>Chapter 5: Understanding how your pipeline works with pipeline_printout(...)</em></a> and
+<a class="reference internal" href="command_line.html#new-manual-cmdline"><em>Chapter 6: Running Ruffus from the command line with ruffus.cmdline</em></a> will show you how to
+to make sure that your intentions are reflected in <em>Ruffus</em> code.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 3</strong>: More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a><ul>
+<li><a class="reference internal" href="#review">Review</a></li>
+<li><a class="reference internal" href="#running-pipelines-in-parallel">Running pipelines in parallel</a></li>
+<li><a class="reference internal" href="#up-to-date-jobs-are-not-re-run-unnecessarily">Up-to-date jobs are not re-run unnecessarily</a></li>
+<li><a class="reference internal" href="#defining-pipeline-tasks-out-of-order">Defining pipeline tasks out of order</a></li>
+<li><a class="reference internal" href="#multiple-dependencies">Multiple dependencies</a></li>
+<li><a class="reference internal" href="#follows"><tt class="docutils literal"><span class="pre">@follows</span></tt></a></li>
+<li><a class="reference internal" href="#making-directories-automatically-with-follows-and-mkdir">Making directories automatically with <tt class="docutils literal"><span class="pre">@follows</span></tt> and <tt class="docutils literal"><span class="pre">mkdir</span></tt></a></li>
+<li><a class="reference internal" href="#globs-in-the-input-parameter">Globs in the <strong>Input</strong> parameter</a></li>
+<li><a class="reference internal" href="#mixing-tasks-and-globs-in-the-input-parameter">Mixing Tasks and Globs in the <strong>Input</strong> parameter</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform.html"
+ title="previous chapter"><strong>Chapter 2</strong>: Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="originate.html"
+ title="next chapter"><strong>Chapter 4</strong>: Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/transform_in_parallel.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="originate.html" title="Chapter 4: Creating files with @originate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform.html" title="Chapter 2: Transforming data in a pipeline with @transform"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/new_tutorial/transform_in_parallel_code.html b/doc/_build/html/tutorials/new_tutorial/transform_in_parallel_code.html
new file mode 100644
index 0000000..67a60ad
--- /dev/null
+++ b/doc/_build/html/tutorials/new_tutorial/transform_in_parallel_code.html
@@ -0,0 +1,561 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Chapter 3: Python Code for More on @transform-ing data — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Chapter 4: Python Code for Creating files with @originate" href="originate_code.html" />
+ <link rel="prev" title="Chapter 1: Python Code for Transforming data in a pipeline with @transform" href="transform_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="originate_code.html" title="Chapter 4: Python Code for Creating files with @originate"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="transform_code.html" title="Chapter 1: Python Code for Transforming data in a pipeline with @transform"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="new-manual-transform-in-parallel-chapter-num-python-code-for-more-on-transform-ing-data">
+<span id="new-manual-transform-in-parallel-code"></span><h1><strong>Chapter 3</strong>: Python Code for More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data<a class="headerlink" href="#new-manual-transform-in-parallel-chapter-num-python-code-for-more-on-transform-ing-data" title="Permalink to this headline">¶</a></h1>
+<div class="admonition seealso">
+<p class="first admonition-title">See also</p>
+<ul class="last simple">
+<li><a class="reference internal" href="manual_contents.html#new-manual-table-of-contents"><em>Manual Table of Contents</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax in detail</em></a></li>
+<li>Back to <strong>Chapter 3</strong>: <a class="reference internal" href="transform_in_parallel.html#new-manual-transform-in-parallel"><em>More on @transform-ing data and @originate</em></a></li>
+</ul>
+</div>
+<div class="section" id="producing-several-items-files-per-job">
+<h2>Producing several items / files per job<a class="headerlink" href="#producing-several-items-files-per-job" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># Create pairs of input files</span>
+<span class="c">#</span>
+<span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">],</span>
+ <span class="p">]</span>
+
+<span class="k">for</span> <span class="n">input_file_pairs</span> <span class="ow">in</span> <span class="n">first_task_params</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_pairs</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># first task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># second task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="section" id="resulting-output">
+<h3>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go"> Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed</span>
+<span class="go"> Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed</span>
+<span class="go"> Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed</span>
+<span class="go">Completed Task = second_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="defining-tasks-function-out-of-order">
+<h2>Defining tasks function out of order<a class="headerlink" href="#defining-tasks-function-out-of-order" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre> <span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c"># Create pairs of input files</span>
+ <span class="c">#</span>
+ <span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">],</span>
+ <span class="p">]</span>
+
+ <span class="k">for</span> <span class="n">input_file_pairs</span> <span class="ow">in</span> <span class="n">first_task_params</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_pairs</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># second task defined first</span>
+ <span class="c">#</span>
+<span class="hll"> <span class="c"># task name string wrapped in output_from(...)</span>
+</span> <span class="nd">@transform</span><span class="p">(</span><span class="n">output_from</span><span class="p">(</span><span class="s">"first_task"</span><span class="p">),</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># first task</span>
+ <span class="c">#</span>
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># Run</span>
+ <span class="c">#</span>
+ <span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="section" id="id1">
+<h3>Resulting Output<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go"> Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed</span>
+<span class="go"> Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed</span>
+<span class="go"> Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed</span>
+<span class="go">Completed Task = second_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="multiple-dependencies">
+<span id="new-manual-transform-multiple-dependencies-code"></span><h2>Multiple dependencies<a class="headerlink" href="#multiple-dependencies" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre> <span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+ <span class="kn">import</span> <span class="nn">time</span>
+ <span class="kn">import</span> <span class="nn">random</span>
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c"># Create pairs of input files</span>
+ <span class="c">#</span>
+ <span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">],</span>
+ <span class="p">]</span>
+ <span class="n">second_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job4.a.start'</span><span class="p">,</span> <span class="s">'job4.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job5.a.start'</span><span class="p">,</span> <span class="s">'job5.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job6.a.start'</span><span class="p">,</span> <span class="s">'job6.b.start'</span><span class="p">],</span>
+ <span class="p">]</span>
+
+ <span class="k">for</span> <span class="n">input_file_pairs</span> <span class="ow">in</span> <span class="n">first_task_params</span> <span class="o">+</span> <span class="n">second_task_params</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_pairs</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># first task</span>
+ <span class="c">#</span>
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">())</span>
+
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># second task</span>
+ <span class="c">#</span>
+ <span class="nd">@transform</span><span class="p">(</span><span class="n">second_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">())</span>
+
+
+<span class="hll"> <span class="c">#---------------------------------------------------------------</span>
+</span> <span class="c">#</span>
+ <span class="c"># third task</span>
+ <span class="c">#</span>
+ <span class="c"># depends on both first_task() and second_task()</span>
+ <span class="nd">@transform</span><span class="p">([</span><span class="n">first_task</span><span class="p">,</span> <span class="n">second_task</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">third_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+ <span class="c">#---------------------------------------------------------------</span>
+ <span class="c">#</span>
+ <span class="c"># Run</span>
+ <span class="c">#</span>
+ <span class="n">pipeline_run</span><span class="p">([</span><span class="n">third_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="section" id="id2">
+<h3>Resulting Output<a class="headerlink" href="#id2" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">third_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = second_task</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go"> Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed</span>
+<span class="go"> Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed</span>
+<span class="go"> Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed</span>
+<span class="go"> Job = [[job4.a.output.1, job4.a.output.extra.1] -> job4.a.output2] completed</span>
+<span class="go"> Job = [[job5.a.output.1, job5.a.output.extra.1] -> job5.a.output2] completed</span>
+<span class="go"> Job = [[job6.a.output.1, job6.a.output.extra.1] -> job6.a.output2] completed</span>
+<span class="go">Completed Task = third_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="multiple-dependencies-after-follows">
+<h2>Multiple dependencies after @follows<a class="headerlink" href="#multiple-dependencies-after-follows" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="kn">import</span> <span class="nn">random</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># Create pairs of input files</span>
+<span class="c">#</span>
+<span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job1.a.start'</span><span class="p">,</span> <span class="s">'job1.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.start'</span><span class="p">,</span> <span class="s">'job2.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.start'</span><span class="p">,</span> <span class="s">'job3.b.start'</span><span class="p">],</span>
+ <span class="p">]</span>
+<span class="n">second_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job4.a.start'</span><span class="p">,</span> <span class="s">'job4.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job5.a.start'</span><span class="p">,</span> <span class="s">'job5.b.start'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job6.a.start'</span><span class="p">,</span> <span class="s">'job6.b.start'</span><span class="p">],</span>
+ <span class="p">]</span>
+
+<span class="k">for</span> <span class="n">input_file_pairs</span> <span class="ow">in</span> <span class="n">first_task_params</span> <span class="o">+</span> <span class="n">second_task_params</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_pairs</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># first task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+<span class="hll"> <span class="s">".output.extra.1"</span><span class="p">],</span>
+</span> <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">())</span>
+
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># second task</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="s">"first_task"</span><span class="p">)</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">second_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".start"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pair</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pair</span><span class="p">:</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">())</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># third task</span>
+<span class="c">#</span>
+<span class="c"># depends on both first_task() and second_task()</span>
+<span class="nd">@transform</span><span class="p">([</span><span class="n">first_task</span><span class="p">,</span> <span class="n">second_task</span><span class="p">],</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">third_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">):</span> <span class="k">pass</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">third_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="section" id="resulting-output-first-task-completes-before-second-task">
+<h3>Resulting Output: <tt class="docutils literal"><span class="pre">first_task</span></tt> completes before <tt class="docutils literal"><span class="pre">second_task</span></tt><a class="headerlink" href="#resulting-output-first-task-completes-before-second-task" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">third_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">6</span><span class="p">)</span>
+<span class="go"> Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go"> Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = second_task</span>
+<span class="go"> Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed</span>
+<span class="go"> Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed</span>
+<span class="go"> Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed</span>
+<span class="go"> Job = [[job4.a.output.1, job4.a.output.extra.1] -> job4.a.output2] completed</span>
+<span class="go"> Job = [[job5.a.output.1, job5.a.output.extra.1] -> job5.a.output2] completed</span>
+<span class="go"> Job = [[job6.a.output.1, job6.a.output.extra.1] -> job6.a.output2] completed</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#"><strong>Chapter 3</strong>: Python Code for More on <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data</a><ul>
+<li><a class="reference internal" href="#producing-several-items-files-per-job">Producing several items / files per job</a><ul>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#defining-tasks-function-out-of-order">Defining tasks function out of order</a><ul>
+<li><a class="reference internal" href="#id1">Resulting Output</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#multiple-dependencies">Multiple dependencies</a><ul>
+<li><a class="reference internal" href="#id2">Resulting Output</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#multiple-dependencies-after-follows">Multiple dependencies after @follows</a><ul>
+<li><a class="reference internal" href="#resulting-output-first-task-completes-before-second-task">Resulting Output: <tt class="docutils literal"><span class="pre">first_task</span></tt> completes before <tt class="docutils literal"><span class="pre">second_task</span></tt></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="transform_code.html"
+ title="previous chapter"><strong>Chapter 1</strong>: Python Code for Transforming data in a pipeline with <tt class="docutils literal"><span class="pre">@transform</span></tt></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="originate_code.html"
+ title="next chapter"><strong>Chapter 4</strong>: Python Code for Creating files with <tt class="docutils literal"><span class="pre">@originate</span></tt></a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/new_tutorial/transform_in_parallel_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="originate_code.html" title="Chapter 4: Python Code for Creating files with @originate"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="transform_code.html" title="Chapter 1: Python Code for Transforming data in a pipeline with @transform"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="introduction.html">Manual</a> / </li>
+ <li><a href="manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/simple_tutorial.html b/doc/_build/html/tutorials/simple_tutorial/simple_tutorial.html
new file mode 100644
index 0000000..0a0d5fd
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/simple_tutorial.html
@@ -0,0 +1,252 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>A simple tutorial: 8 steps to Ruffus — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 1: An introduction to Ruffus pipelines" href="step1_follows.html" />
+ <link rel="prev" title="Recipes" href="../../recipes.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step1_follows.html" title="Step 1: An introduction to Ruffus pipelines"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../../recipes.html" title="Recipes"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="a-simple-tutorial-8-steps-to-ruffus">
+<span id="simple-tutorial"></span><h1>A simple tutorial: 8 steps to <em>Ruffus</em><a class="headerlink" href="#a-simple-tutorial-8-steps-to-ruffus" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="table-of-contents">
+<h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="features">
+<h3>Features<a class="headerlink" href="#features" title="Permalink to this headline">¶</a></h3>
+<p>The <strong>Ruffus</strong> provides automatic support for</p>
+<blockquote>
+<div><blockquote>
+<div><ul class="simple">
+<li>Managing dependencies</li>
+<li>Parallel jobs</li>
+<li>Re-starting from arbitrary points, especially after errors</li>
+<li>Display of the pipeline as a flowchart</li>
+<li>Reporting</li>
+</ul>
+</div></blockquote>
+<div class="line-block">
+<div class="line">This tutorial has seven steps which cover all the core functionality of <em>Ruffus</em>.</div>
+<div class="line">Don’t worry if steps 1 and 2 seem a bit slow: Once you get used to <strong>Ruffus</strong>
+steps 4-8 will be a breeze.</div>
+</div>
+<p>You can click on “previous” and “next” at the top and bottom of each page to
+navigate through the tutorial.</p>
+</div></blockquote>
+</div>
+<div class="section" id="the-first-steps-1-4">
+<h3>The first steps (1-4)<a class="headerlink" href="#the-first-steps-1-4" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The first half of the tutorial will show you how to:</p>
+<ol class="arabic simple">
+<li><a class="reference internal" href="step1_follows.html#simple-tutorial-1st-step"><em>Chain tasks (functions) together into a pipeline</em></a></li>
+<li><a class="reference internal" href="step2.html#simple-tutorial-2nd-step"><em>Provide parameters to run jobs in parallel</em></a></li>
+<li><a class="reference internal" href="step3_run_pipeline.html#simple-tutorial-3rd-step"><em>Tracing through your new pipeline</em></a></li>
+<li><a class="reference internal" href="step4_run_pipeline_graphically.html#simple-tutorial-4th-step-graphical"><em>Using flowcharts</em></a></li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="a-worked-example-steps-5-8">
+<h3>A worked example (steps 5-8)<a class="headerlink" href="#a-worked-example-steps-5-8" title="Permalink to this headline">¶</a></h3>
+<blockquote>
+<div><p>The second half of the tutorial is a worked example to calculate
+the sample variance of 10,000 random numbers. This shows you how to:</p>
+<ol class="arabic simple" start="5">
+<li><a class="reference internal" href="step5_split.html#simple-tutorial-5th-step"><em>Split up a large problem into smaller chunks</em></a></li>
+<li><a class="reference internal" href="step6_transform.html#simple-tutorial-6th-step"><em>Calculate partial solutions in parallel</em></a></li>
+<li><a class="reference internal" href="step7_merge.html#simple-tutorial-7th-step"><em>Re-combine the partial solutions into the final result</em></a></li>
+<li><a class="reference internal" href="step8_posttask.html#simple-tutorial-8th-step"><em>Automatically signal the completion of each step of our pipeline</em></a></li>
+</ol>
+<p>This covers the core functionality of <em>Ruffus</em>.</p>
+</div></blockquote>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">A simple tutorial: 8 steps to <em>Ruffus</em></a><ul>
+<li><a class="reference internal" href="#table-of-contents">Table of Contents</a><ul>
+<li><a class="reference internal" href="#features">Features</a></li>
+<li><a class="reference internal" href="#the-first-steps-1-4">The first steps (1-4)</a></li>
+<li><a class="reference internal" href="#a-worked-example-steps-5-8">A worked example (steps 5-8)</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../../recipes.html"
+ title="previous chapter">Recipes</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step1_follows.html"
+ title="next chapter">Step 1: An introduction to Ruffus pipelines</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/simple_tutorial.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step1_follows.html" title="Step 1: An introduction to Ruffus pipelines"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../../recipes.html" title="Recipes"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/simple_tutorial_code.html b/doc/_build/html/tutorials/simple_tutorial/simple_tutorial_code.html
new file mode 100644
index 0000000..550c84d
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/simple_tutorial_code.html
@@ -0,0 +1,213 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for the simple tutorial: 8 steps to Ruffus — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Ruffus Decorators" href="../../decorators/decorators.html" />
+ <link rel="prev" title="Code for the manual tutorial" href="../manual/manual_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../../decorators/decorators.html" title="Ruffus Decorators"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../manual/manual_code.html" title="Code for the manual tutorial"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-the-simple-tutorial-8-steps-to-ruffus">
+<span id="simple-tutorial-code"></span><h1>Code for the simple tutorial: 8 steps to <em>Ruffus</em><a class="headerlink" href="#code-for-the-simple-tutorial-8-steps-to-ruffus" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>A simple tutorial</em></a></li>
+</ul>
+</div></blockquote>
+<div class="section" id="table-of-contents">
+<h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><a class="reference internal" href="step1_follows.html#simple-tutorial-1st-step"><em>Chain tasks (functions) together into a pipeline</em></a>
+<a class="reference internal" href="step2.html#simple-tutorial-2nd-step"><em>Provide parameters to run jobs in parallel</em></a>
+<a class="reference internal" href="step3_run_pipeline.html#simple-tutorial-3rd-step"><em>Tracing through your new pipeline</em></a>
+<a class="reference internal" href="step4_run_pipeline_graphically.html#simple-tutorial-4th-step-graphical"><em>Using flowcharts</em></a>
+<a class="reference internal" href="step5_split.html#simple-tutorial-5th-step"><em>Split up a large problem into smaller chunks</em></a>
+<a class="reference internal" href="step6_transform.html#simple-tutorial-6th-step"><em>Calculate partial solutions in parallel</em></a>
+<a class="reference internal" href="step7_merge.html#simple-tutorial-7th-step"><em>Re-combine the partial solutions into the final result</em></a>
+<a class="reference internal" href="step8_posttask.html#simple-tutorial-8th-step"><em>Automatically signal the completion of each step of our pipeline</em></a></div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for the simple tutorial: 8 steps to <em>Ruffus</em></a><ul>
+<li><a class="reference internal" href="#table-of-contents">Table of Contents</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../manual/manual_code.html"
+ title="previous chapter">Code for the manual tutorial</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../../decorators/decorators.html"
+ title="next chapter">Ruffus Decorators</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/simple_tutorial_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../../decorators/decorators.html" title="Ruffus Decorators"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../manual/manual_code.html" title="Code for the manual tutorial"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step1_follows.html b/doc/_build/html/tutorials/simple_tutorial/step1_follows.html
new file mode 100644
index 0000000..abb723b
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step1_follows.html
@@ -0,0 +1,395 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 1: An introduction to Ruffus pipelines — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 2: @transform-ing data in a pipeline" href="step2.html" />
+ <link rel="prev" title="A simple tutorial: 8 steps to Ruffus" href="simple_tutorial.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step2.html" title="Step 2: @transform-ing data in a pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="simple_tutorial.html" title="A simple tutorial: 8 steps to Ruffus"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <blockquote id="simple-tutorial-1st-step">
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+</ul>
+</div></blockquote>
+<div class="section" id="step-1-an-introduction-to-ruffus-pipelines">
+<h1>Step 1: An introduction to Ruffus pipelines<a class="headerlink" href="#step-1-an-introduction-to-ruffus-pipelines" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="731.3pt"
+ height="83pt"
+ viewBox="0 0 731.3 83">
+ <defs id="defs3287">
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Lend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-1.1,0,0,-1.1,-1.1,0)" id="path4118" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Lend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.8,0,0,-0.8,-10,0)" id="path4100" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-4" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-2" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-7" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-23" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-3" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ </defs>
+ <g transform="translate(-14.608261,-32.693481)" id="layer1">
+ <rect width="89.826035" height="65.392792" x="21.063463" y="39.148708" id="rect3309" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.41040453" />
+ <text x="64.540756" y="62.738293" id="text3311" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="67.314194" y="62.738293" id="tspan3313" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:center; [...]
+ <text x="118.47811" y="104.62877" id="text4956" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="118.47811" y="104.62877" id="tspan4958" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;font-family [...]
+ <text x="345.62097" y="104.98591" id="text4956-1" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="345.62097" y="104.98591" id="tspan4958-7" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;font-fa [...]
+ <text x="575.62097" y="103.03347" id="text4956-2" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="575.62097" y="103.03347" id="tspan4958-3" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;font-fa [...]
+ <path d="m 110.71429,72.362182 87.14285,0" id="path5080" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="206.10674" y="39.191959" id="rect3309-6" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="273.11218" y="65.772057" id="text3311-2-2" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="273.11218" y="65.772057" id="tspan3313-4-2" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align [...]
+ <path d="m 338.57143,72.362177 87.14285,0" id="path5080-9" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="433.96387" y="39.191959" id="rect3309-6-4" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="500.96933" y="65.772057" id="text3311-2-2-1" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="500.96933" y="65.772057" id="tspan3313-4-2-1" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-a [...]
+ <path d="m 566.42857,72.362178 87.14285,0" id="path5080-9-8" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <flowRoot id="flowRoot5373" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><flowRegion id="flowRegion5375"><rect width="56.42857" height="339.28571" x="214.28572" y="123.07647" id="rect5377" /></flowRegion><flowPara id="flowPara5379"></flowPara></flowRoot> [...]
+ <text x="700.25507" y="65.071579" id="text3311-2-8" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="700.25507" y="65.071579" id="tspan3315-5-7" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align [...]
+ </g>
+</svg><p>Computational pipelines transform your data in stages until the final result is produced. One easy way to understand pipelines is by imagining your data flowing across a series of pipes until it reaches its final destination. Even quite complicated processes can be simplified if we broke things down into simple stages. Of course, it helps if we can visualise the whole process.</p>
+<p>Ruffus is a way of automating the plumbing in your pipeline: You supply the python functions which perform the data transformation, and tell Ruffus how these pipeline <tt class="docutils literal"><span class="pre">task</span></tt> functions are connected up. Ruffus will make sure that the right data flows down your pipeline in the right way at the right time.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">Ruffus refers to each stage of your pipeline as a <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a>.</p>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="a-gentle-introduction-to-ruffus-syntax">
+<h2>A gentle introduction to Ruffus syntax<a class="headerlink" href="#a-gentle-introduction-to-ruffus-syntax" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="line-block">
+<div class="line">Let us start with the usual “Hello World” programme.</div>
+<div class="line">We have the following two python functions which
+we would like to turn into an automatic pipeline:</div>
+</div>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="k">def</span> <span class="nf">first_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"Hello "</span>
+
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"world"</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The simplest <strong>Ruffus</strong> pipeline would look like this:</p>
+<blockquote>
+<div></div></blockquote>
+<svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="411pt"
+ height="166pt"
+ viewBox="0 0 411 166">
+<rect width="193.46577" height="153.25462" x="113.1341" y="6.25" id="rect3523-1" style="fill:#eeffcc" /><g transform="matrix(0.74399708,0,0,0.74399708,123.4741,7.2693622)" id="g6703" style="font-size:14px;font-family:monospace"><text x="0" y="14" id="text6705" xml:space="preserve"><tspan id="tspan6707" style="font-weight:bold;fill:#008000">from</tspan> <tspan id="tspan6709" style="font-weight:bold;fill:#0e84b5">ruffus</tspan> <tspan id="tspan6711" style="font-weight:bold;fill:#008000">im [...]
+<text x="0" y="33" id="text6715" xml:space="preserve" />
+<text x="0" y="52" id="text6717" xml:space="preserve"><tspan id="tspan6719" style="font-weight:bold;fill:#008000">def</tspan> <tspan id="tspan6721" style="font-weight:bold;fill:#0060b0">first_task</tspan>():</text>
+<text x="0" y="71" id="text6723" xml:space="preserve"> <tspan id="tspan6725" style="font-weight:bold;fill:#008000">print</tspan> "Hello "</text>
+<text x="0" y="90" id="text6727" xml:space="preserve" />
+<text x="0" y="109" id="text6729" xml:space="preserve"><tspan id="tspan6731" style="font-weight:bold;fill:#505050">@follows</tspan>(first_task)</text>
+<text x="0" y="128" id="text6733" xml:space="preserve"><tspan id="tspan6735" style="font-weight:bold;fill:#008000">def</tspan> <tspan id="tspan6737" style="font-weight:bold;fill:#0060b0">second_task</tspan>():</text>
+<text x="0" y="147" id="text6739" xml:space="preserve"> <tspan id="tspan6741" style="font-weight:bold;fill:#008000">print</tspan> "world"</text>
+<text x="0" y="166" id="text6743" xml:space="preserve" />
+<text x="0" y="185" id="text6745" xml:space="preserve">pipeline_run([second_task])</text>
+<text x="0" y="204" id="text6747" xml:space="preserve" />
+</g><g transform="matrix(0,-1.0740862,0.50028548,0,83.609122,151.75772)" id="g3645-7" style="fill:#ff0000;stroke:#ff0000;stroke-opacity:1"><line x1="125.896" y1="53.333" x2="125.896" y2="15.667" id="line3647-4" style="fill:#ff0000;stroke:#ff0000;stroke-opacity:1" /><g id="g3649-0" style="fill:#ff0000;stroke:#ff0000;stroke-opacity:1"><line stroke-miterlimit="10" x1="125.896" y1="49.028" x2="125.896" y2="15.667" id="line3651-9" style="fill:#ff0000;stroke:#ff0000;stroke-miterlimit:10;stroke [...]
+<path d="m 295.24733,142.14802 c 0,3.84316 -10.60785,6.95911 -23.6936,6.95911 H 138.38975 c -13.08581,0 -23.69366,-3.11595 -23.69366,-6.95911 l 0,0 c 0,-3.84321 10.60785,-6.95927 23.69366,-6.95927 h 133.16146 c 13.08587,0 23.69612,3.11606 23.69612,6.95927 l 0,0 z" id="path3671-9" style="fill:none;stroke:#ff0000;stroke-width:1.07262194;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none" /><path d="m 254.65378,15.578563 c 0,3.87736 -8.2265,7.02106 -18.37473,7.02106 H 133.00887 c -1 [...]
+<line style="fill:#ff0000;stroke:#ff0000;stroke-width:0.73304141;stroke-opacity:1" id="line3647-8" y2="83.510956" x2="78.575699" y1="83.510956" x1="97.419533" /><g transform="matrix(0,-1.0740862,0.50028548,0,70.737661,218.73401)" id="g3649-6" style="fill:#008000;stroke:#008000;stroke-opacity:1"><line style="fill:#008000;stroke:#008000;stroke-miterlimit:10;stroke-opacity:1" id="line3651-5" y2="15.667" x2="125.896" y1="49.028" x1="125.896" stroke-miterlimit="10" /><g id="g3653-0" style="fi [...]
+<text x="270.09064" y="129.21878" transform="scale(1.1082192,0.90234857)" id="text7608-0" xml:space="preserve" style="font-size:43.24214554px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="270.09064" y="129.21878" id="tspan7610-6">}</tspan></text>
+<text x="330.33087" y="60.88369" id="text7633" xml:space="preserve" style="font-size:15.01670647px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="330.33087" y="60.88369" id="tspan7635" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#0000ff;font-family:Ari [...]
+<text x="6.2617145" y="89.451149" id="text7600" xml:space="preserve" style="font-size:15.01670647px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="6.2617145" y="89.451149" id="tspan7602" style="font-size:12.87146473px;fill:#008000;font-family:arial;-inkscape-font-specification:arial">2. Decorate< [...]
+</svg><p>The functions which do the actual work of each stage of the pipeline remain unchanged.
+The role of <strong>Ruffus</strong> is to make sure these functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if desired.</p>
+<p>There are three simple parts to building a <strong>ruffus</strong> pipeline</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>importing ruffus</li>
+<li>“Decorating” functions which are part of the pipeline</li>
+<li>Running the pipeline!</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="decorators">
+<span id="index-0"></span><h2>“Decorators”<a class="headerlink" href="#decorators" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You need to tag or <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a> existing code to tell <strong>Ruffus</strong> that they are part
+of the pipeline.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>python <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a>s are ways to tag or mark out functions.</p>
+<p>They start with a <tt class="docutils literal"><span class="pre">@</span></tt> prefix and take a number of parameters in parenthesis.</p>
+<div class="last"><svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="249.5pt" height="67.5pt" viewBox="0 0 249.5 67.5">
+<g transform="scale(1)">
+ <rect x="4.5" y="14.667" fill="#eeffcc" stroke="#016735" stroke-width="0.25" stroke-miterlimit="10" width="157" height="52.833"/>
+ <rect x="3.25" y="14.667" fill="#eeffcc" width="159.5" height="52.833"/>
+ <text transform="matrix(1 0 0 1 14.5 33.6177)"><tspan x="0" y="0" font-family="'Courier'" font-weight="bold" font-size="12">@follows</tspan><tspan x="57.609" y="0" font-family="'Courier'" font-size="12">(first_task)</tspan><tspan x="0" y="14.4" fill="#006838" font-family="'Courier'" font-weight="bold" font-size="12">def</tspan><tspan x="21.604" y="14.4" font-family="'Courier'" font-size="12"> second_task():</tspan><tspan x="0" y="28.8" font-family="'Courier'" font-size="12"> &qu [...]
+ <path fill="none" stroke="#ED1C24" stroke-miterlimit="10" d="M73.25,29.762c0,4.688-3.731,8.488-8.333,8.488H18.083
+ c-4.602,0-8.333-3.8-8.333-8.488l0,0c0-4.688,3.731-8.488,8.333-8.488h46.834C69.519,21.274,73.25,25.075,73.25,29.762L73.25,29.762
+ z"/>
+ <g>
+ <g>
+ <line fill="none" stroke="#FF0000" stroke-miterlimit="10" x1="74.775" y1="20.142" x2="106" y2="7.5"/>
+ <g>
+ <path fill="#ED1C24" d="M71.978,21.274c1.514-0.044,3.484,0.127,4.854,0.6l-1.689-1.881l-0.095-2.526
+ C74.392,18.759,73.097,20.253,71.978,21.274z"/>
+ </g>
+ </g>
+ </g>
+ <text transform="matrix(1 0 0 1 107.75 11.5)" fill="#FF0000" " font-size="12">Decorator</text>
+ <text transform="matrix(1 0 0 1 170.75 50.75)"><tspan x="0" y="0" fill="#0000FF" font-size="12">Normal Python </tspan><tspan x="0" y="14.4" fill="#0000FF" font-size="12">Function</tspan></text>
+ <g>
+ <line fill="#0000FF" x1="166.5" y1="46.5" x2="147" y2="46.5"/>
+ <g>
+ <line fill="none" stroke="#0000FF" stroke-miterlimit="10" x1="166.5" y1="46.5" x2="150.018" y2="46.5"/>
+ <g>
+ <path fill="#0000FF" d="M147,46.5c1.42-0.527,3.182-1.426,4.273-2.378l-0.86,2.378l0.86,2.377
+ C150.182,47.925,148.42,47.026,147,46.5z"/>
+ </g>
+ </g>
+ </g>
+</g>
+</svg></div></div>
+<p>The <strong>ruffus</strong> decorator <a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows</em></a> makes sure that
+<tt class="docutils literal"><span class="pre">second_task</span></tt> follows <tt class="docutils literal"><span class="pre">first_task</span></tt>.</p>
+<div class="line-block">
+<div class="line">Multiple <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a>s can be used for each <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function to add functionality
+to <em>Ruffus</em> pipeline functions.</div>
+<div class="line">However, the decorated python functions can still be
+called normally, outside of <em>Ruffus</em>.</div>
+<div class="line"><em>Ruffus</em> <a class="reference internal" href="../../glossary.html#term-decorator"><em class="xref std std-term">decorator</em></a>s can be added to (stacked on top of) any function in any order.</div>
+</div>
+<ul class="simple">
+<li><a class="reference internal" href="../manual/follows.html#manual-follows"><em>More on @follows in the in the Ruffus `Manual</em></a></li>
+<li><a class="reference internal" href="../../decorators/follows.html#decorators-follows"><em>@follows syntax in detail</em></a></li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="running-the-pipeline">
+<span id="index-1"></span><h2>Running the pipeline<a class="headerlink" href="#running-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>We run the pipeline by specifying the <strong>last</strong> stage (<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function) of your pipeline.
+Ruffus will know what other functions this depends on, following the appropriate chain of
+dependencies automatically, making sure that the entire pipeline is up-to-date.</p>
+<p>Because <tt class="docutils literal"><span class="pre">second_task</span></tt> depends on <tt class="docutils literal"><span class="pre">first_task</span></tt>, both functions are executed in order.</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Ruffus by default prints out the <tt class="docutils literal"><span class="pre">verbose</span></tt> progress through the pipelined code,
+interleaved with the <strong>Hello</strong> printed by <tt class="docutils literal"><span class="pre">first_task</span></tt> and <strong>World</strong> printed
+by <tt class="docutils literal"><span class="pre">second_task</span></tt>.</p>
+<blockquote>
+<div><svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="375pt" height="108pt" viewBox="0 0 375 108">
+<rect width="359.146" height="95.347786" x="7.8544765" y="6.3284979" id="rect3521" style="fill:none;stroke:#016735;stroke-width:0.18506026;stroke-miterlimit:10" /><rect width="362.35596" height="95.347786" x="6.2499924" y="6.3284979" id="rect3523" style="fill:#eeffcc" />
+<text x="9.2210703" y="18.304934" id="text3345" xml:space="preserve" style="font-size:10.39404392px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="9.2210703" y="18.304934" id="tspan3347"><tspan id="tspan3365" style="font-weight:bold;fill:#ff0000;-inkscape-font-specification:Monospace Bold">>&g [...]
+<text x="392.0932" y="73.633965" transform="scale(0.78097325,1.2804536)" id="text3373" xml:space="preserve" style="font-size:17.92634964px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="392.0932" y="73.633965" id="tspan3375"> </tspan></text>
+</svg></div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 1: An introduction to Ruffus pipelines</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#a-gentle-introduction-to-ruffus-syntax">A gentle introduction to Ruffus syntax</a></li>
+<li><a class="reference internal" href="#decorators">“Decorators”</a></li>
+<li><a class="reference internal" href="#running-the-pipeline">Running the pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="simple_tutorial.html"
+ title="previous chapter">A simple tutorial: 8 steps to <em>Ruffus</em></a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step2.html"
+ title="next chapter">Step 2: <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data in a pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step1_follows.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step2.html" title="Step 2: @transform-ing data in a pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="simple_tutorial.html" title="A simple tutorial: 8 steps to Ruffus"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step2.html b/doc/_build/html/tutorials/simple_tutorial/step2.html
new file mode 100644
index 0000000..a3148ae
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step2.html
@@ -0,0 +1,689 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 2: @transform-ing data in a pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 3: Understanding how your pipeline works" href="step3_run_pipeline.html" />
+ <link rel="prev" title="Step 1: An introduction to Ruffus pipelines" href="step1_follows.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step3_run_pipeline.html" title="Step 3: Understanding how your pipeline works"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step1_follows.html" title="Step 1: An introduction to Ruffus pipelines"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="simple-tutorial-2nd-step"></span><div class="section" id="step-2-transform-ing-data-in-a-pipeline">
+<span id="index-0"></span><h1>Step 2: <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data in a pipeline<a class="headerlink" href="#step-2-transform-ing-data-in-a-pipeline" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax in detail</em></a></li>
+</ul>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step2_code.html#simple-tutorial-2nd-step-code"><em>Python Code for step 2</em></a></li>
+</ul>
+</div>
+<div class="section" id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="731.3pt"
+ height="83pt"
+ viewBox="0 0 731.3 83">
+ <defs id="defs3287">
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Lend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-1.1,0,0,-1.1,-1.1,0)" id="path4118" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Lend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.8,0,0,-0.8,-10,0)" id="path4100" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-4" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-1" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-8" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-2" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-7" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend-1-23" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4124-1-3" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ </defs>
+ <g transform="translate(-14.608261,-32.693481)" id="layer1">
+ <rect width="89.826035" height="65.392792" x="21.063463" y="39.148708" id="rect3309" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.41040453" />
+ <text x="64.540756" y="62.738293" id="text3311" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="67.314194" y="62.738293" id="tspan3313" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:center; [...]
+ <text x="118.47811" y="104.62877" id="text4956" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="118.47811" y="104.62877" id="tspan4958" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;font-family [...]
+ <text x="345.62097" y="104.98591" id="text4956-1" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="345.62097" y="104.98591" id="tspan4958-7" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;font-fa [...]
+ <text x="575.62097" y="103.03347" id="text4956-2" xml:space="preserve" style="font-size:21.02927971px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#ff0000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="575.62097" y="103.03347" id="tspan4958-3" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;fill:#ff0000;font-fa [...]
+ <path d="m 110.71429,72.362182 87.14285,0" id="path5080" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="206.10674" y="39.191959" id="rect3309-6" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="273.11218" y="65.772057" id="text3311-2-2" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="273.11218" y="65.772057" id="tspan3313-4-2" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align [...]
+ <path d="m 338.57143,72.362177 87.14285,0" id="path5080-9" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <rect width="131.88234" height="65.306244" x="433.96387" y="39.191959" id="rect3309-6-4" style="fill:#ffff00;fill-opacity:1;stroke:#ff0000;stroke-width:0.49695465" />
+ <text x="500.96933" y="65.772057" id="text3311-2-2-1" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="500.96933" y="65.772057" id="tspan3313-4-2-1" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-a [...]
+ <path d="m 566.42857,72.362178 87.14285,0" id="path5080-9-8" style="fill:#ff0000;stroke:#ff0000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend-1)" />
+ <flowRoot id="flowRoot5373" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><flowRegion id="flowRegion5375"><rect width="56.42857" height="339.28571" x="214.28572" y="123.07647" id="rect5377" /></flowRegion><flowPara id="flowPara5379"></flowPara></flowRoot> [...]
+ <text x="700.25507" y="65.071579" id="text3311-2-8" xml:space="preserve" style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Monospace;-inkscape-font-specification:Monospace"><tspan x="700.25507" y="65.071579" id="tspan3315-5-7" style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align [...]
+ </g>
+</svg><p>Computational pipelines transform your data in stages until the final result is produced.
+Ruffus automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+and tell Ruffus how these pipeline stages or <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> functions are connected together.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>The best way to design a pipeline is to:</strong></p>
+<blockquote class="last">
+<div><ul class="simple">
+<li><strong>write down the file names of the data as it flows across your pipeline</strong></li>
+<li><strong>write down the names of functions which transforms the data at each stage of the pipeline.</strong></li>
+</ul>
+</div></blockquote>
+</div>
+<p>By letting <strong>Ruffus</strong> manage your pipeline parameters, you will get the following features
+for free:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>only out-of-date parts of the pipeline will be re-run</li>
+<li>multiple jobs can be run in parallel (on different processors if possible)</li>
+<li>pipeline stages can be chained together automatically</li>
+</ol>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="transform">
+<h2>@transform<a class="headerlink" href="#transform" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Let us start with the simplest pipeline with a single <em>input</em> data file <strong>transform</strong>ed
+into a single <em>output</em> file. We will add some arbitrary extra parameters as well.</p>
+<p>The <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> decorator tells Ruffus that this
+task function <strong>transforms</strong> each and every piece of input data into a corresponding output.</p>
+<blockquote>
+<div>In other words, inputs and outputs have a <strong>1 to 1</strong> relationship.</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>In the second part of the tutorial, we will encounter more decorators which can <em>split up</em>, or <em>join together</em> or <em>group</em> inputs.</p>
+<blockquote class="last">
+<div>In other words, inputs and output can have <strong>many to one</strong>, <strong>many to many</strong> etc. relationships.</div></blockquote>
+</div>
+<p>Let us provide <strong>input</strong>s and <strong>output</strong>s to our new pipeline:</p>
+<blockquote>
+<div><svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0"
+ width="385pt"
+ height="210pt"
+ viewBox="0 0 385 210">
+ <defs id="defs3568">
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Mend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="scale(-0.6,-0.6)" id="path4497" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Lend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.8,0,0,-0.8,-10,0)" id="path4473" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="TriangleOutL" style="overflow:visible">
+ <path d="m 5.77,0 -8.65,5 0,-10 8.65,5 z" transform="scale(0.8,0.8)" id="path4612" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Send" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-0.3,0,0,-0.3,0.69,0)" id="path4503" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow1Mend" style="overflow:visible">
+ <path d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" transform="matrix(-0.4,0,0,-0.4,-4,0)" id="path4479" style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" />
+ </marker>
+ <marker refX="0" refY="0" orient="auto" id="Arrow2Lend" style="overflow:visible">
+ <path d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" transform="matrix(-1.1,0,0,-1.1,-1.1,0)" id="path4491" style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round" />
+ </marker>
+ </defs>
+ <g transform="matrix(1.0077068,0,0,1,9.8598546,3.6099157)" id="g3519">
+ <rect width="359.146" height="174.006" x="6.244" y="27.667" id="rect3521" style="fill:none;stroke:#016735;stroke-width:0.25;stroke-miterlimit:10" />
+ <rect width="364.86499" height="174.006" x="3.385" y="27.667" id="rect3523" style="fill:#eeffcc" />
+ </g>
+ <path d="m 84.221497,82.954914 c 0,3.583 -3.942,6.488 -8.804,6.488 h -49.481 c -4.862,0 -8.803999,-2.905 -8.803999,-6.488 l 0,0 c 0,-3.582998 3.941999,-6.487998 8.803999,-6.487998 h 49.481 c 4.862,0 8.804,2.905 8.804,6.487998 l 0,0 z" id="path3641" style="opacity:0.2;fill:#ed1c24" />
+ <path d="m 190.77333,82.954914 c 0,3.583 -6.09517,6.488 -13.61411,6.488 h -76.51461 c -7.518948,0 -13.614113,-2.905 -13.614113,-6.488 l 0,0 c 0,-3.582998 6.095165,-6.487998 13.614113,-6.487998 h 76.51322 c 7.51894,0 13.6155,2.905 13.6155,6.487998 l 0,0 z" id="path3671" style="opacity:0.2;fill:#00a14b" />
+ <path d="m 368.12884,82.954914 c 0,3.583 -3.66127,6.488 -8.17701,6.488 h -45.95795 c -4.5166,0 -8.17787,-2.905 -8.17787,-6.488 l 0,0 c 0,-3.582998 3.66127,-6.487998 8.17787,-6.487998 h 45.95795 c 4.51574,0 8.17701,2.905 8.17701,6.487998 l 0,0 z" id="path3687" style="opacity:0.2;fill:#00a14b" />
+ <path d="m 300.38251,97.842914 c 0,3.406006 -2.537,6.167006 -5.667,6.167006 H 90.049497 c -3.129,0 -5.667,-2.761 -5.667,-6.167006 l 0,0 c 0,-3.406 2.537,-6.167 5.667,-6.167 H 294.71651 c 3.129,10e-4 5.666,2.762 5.666,6.167 l 0,0 z" id="path3689" style="opacity:0.2;fill:#00a14b" />
+ <text x="22.156881" y="58.294636" transform="scale(1.0042467,0.99577126)" id="text3295" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="138.88107" transform="scale(1.0042467,0.99577126)" id="text3317" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="179.17427" transform="scale(1.0042467,0.99577126)" id="text3327" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="206.03641" transform="scale(1.0042467,0.99577126)" id="text3331" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <text x="22.156881" y="54.504337" transform="scale(1.0042467,0.99577126)" id="text3285-5" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace" />
+ <g transform="matrix(1,0,0,1.442061,16.381543,-1.5326283)" id="g3645">
+ <line x1="125.896" y1="53.333" x2="125.896" y2="15.667" id="line3647" style="fill:#00ff00" />
+ <g id="g3649">
+ <line stroke-miterlimit="10" x1="125.896" y1="49.028" x2="125.896" y2="15.667" id="line3651" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3653">
+ <polygon points="128.888,48.153 125.897,53.333 122.905,48.153 " id="polygon3655" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <g transform="matrix(1,0,0,1.3096241,27.941781,-1.0861523)" id="g3659">
+ <line x1="267.23001" y1="70.667" x2="267.23001" y2="15.667" id="line3661" style="fill:#00ff00" />
+ <g id="g3663">
+ <line stroke-miterlimit="10" x1="267.23001" y1="66.361" x2="267.23001" y2="15.667" id="line3665" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3667">
+ <polygon points="270.222,65.486 267.23,70.667 264.238,65.486 " id="polygon3669" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <g transform="matrix(1,0,0,1.4502473,19.50593,-3.2189853)" id="g3675">
+ <line x1="313.56299" y1="53.333" x2="313.56299" y2="15.667" id="line3677" style="fill:#00ff00" />
+ <g id="g3679">
+ <line stroke-miterlimit="10" x1="313.56299" y1="49.028" x2="313.56299" y2="15.667" id="line3681" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3683">
+ <polygon points="316.556,48.153 313.564,53.333 310.572,48.153 " id="polygon3685" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <text x="4.0742145" y="13.589844" id="text3629" style="font-size:12px;fill:#ff0000;font-family:ArialMT">Decorator</text>
+ <text x="114.1965" y="15.776917" id="text3643" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Inputs</text>
+ <text x="207.08836" y="15.776917" id="text3657" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Extra parameters</text>
+ <text x="313.42374" y="15.776917" id="text3673" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Outputs</text>
+ <text x="22.156881" y="44.86356" transform="scale(1.0042467,0.99577126)" id="text3285" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3287" style="font-weight:bold;fill:#008000">from</tspan> <tspan id="tspan3289" style="font-weight:bold;fill:#0e84b5">ruffus</tspan> <tspan id="tspan3291" style="font-weight:bold;fill:#008000">import</tspan> <tspan id="tspan3293" style="fill:#303030">*</tspan></text>
+ <text x="22.156881" y="64.113289" transform="scale(1.0042467,0.99577126)" id="text3329-1" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace">first_task_params = 'job1.input'</text>
+ <text x="22.156881" y="86.186874" transform="scale(1.0042467,0.99577126)" id="text3297" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3299" style="font-weight:bold;fill:#505050">@transform</tspan>(first_task_params, <tspan id="tspan3548" style="font-weight:bold;fill:#ff0000">suffix</tspan>(".input"), ".output1", </text>
+ <text x="22.156881" y="99.61795" transform="scale(1.0042467,0.99577126)" id="text3301" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"> "some_extra.string.for_example", <tspan id="tspan3303" style="font-weight:bold;fill:#0000d0">14</tspan>)</text>
+ <text x="22.156881" y="113.04904" transform="scale(1.0042467,0.99577126)" id="text3305" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3307" style="font-weight:bold;fill:#008000">def</tspan> <tspan id="tspan3309" style="font-weight:bold;fill:#0060b0">first_task</tspan>(input_file, output_file,</text>
+ <text x="22.156881" y="126.48013" transform="scale(1.0042467,0.99577126)" id="text3311" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"> extra_parameter_str, extra_parameter_num):</text>
+ <text x="22.156881" y="139.91115" transform="scale(1.0042467,0.99577126)" id="text3313" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"> <tspan id="tspan3315" style="font-weight:bold;fill:#008000">pass</tspan></text>
+ <text x="22.156881" y="166.77328" transform="scale(1.0042467,0.99577126)" id="text3319" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3321" style="fill:#808080"># make sure the input file is there</tspan></text>
+ <text x="22.156881" y="180.20436" transform="scale(1.0042467,0.99577126)" id="text3323" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace"><tspan id="tspan3325" style="fill:#007020">open</tspan>('job1.input', "w")</text>
+ <text x="22.156881" y="199.1844" transform="scale(1.0042467,0.99577126)" id="text3329" xml:space="preserve" style="font-size:9.89657974px;font-family:monospace">pipeline_run([first_task])</text>
+ <path d="m 11.798134,17.75418 0,40.879392 11.506636,16.489166" id="path3696" style="fill:none;stroke:#ff0000;stroke-width:1.25536001;stroke-linecap:butt;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Mend)" />
+</svg><p>The <tt class="docutils literal"><span class="pre">@transform</span></tt> decorator tells Ruffus to generate the appropriate arguments for our python function:</p>
+<blockquote>
+<div><ul class="simple">
+<li>The input file name is as given: <tt class="docutils literal"><span class="pre">job1.input</span></tt></li>
+<li>The output file name is the input file name with its <strong>suffix</strong> of <tt class="docutils literal"><span class="pre">.input</span></tt> replaced with <tt class="docutils literal"><span class="pre">.output1</span></tt></li>
+<li>There are two extra parameters, a string and a number.</li>
+</ul>
+</div></blockquote>
+<p>This is exactly equivalent to the following function call:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">first_task</span><span class="p">(</span><span class="s">'job1.input'</span><span class="p">,</span> <span class="s">'job1.output1'</span><span class="p">,</span> <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Even though this (empty) function doesn’t do anything just yet, the output from <strong>Ruffus</strong> <tt class="docutils literal"><span class="pre">pipeline_run</span></tt> will show that that this part of the pipeline completed successfully:</p>
+<blockquote>
+<div><svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0" width="374.86499pt" height="92.02504pt" viewBox="0 0 374.86498 92.025041">
+ <rect width="359.146" height="58.154449" x="7.8589935" y="28.798326" id="rect3521" style="fill:none;stroke:#016735;stroke-width:0.14452712;stroke-miterlimit:10" /><rect width="364.86499" height="58.154449" x="4.9999938" y="28.798326" id="rect3523" style="fill:#eeffcc" />
+ <g transform="matrix(1,0,0,0.72872639,-16.352384,4.6212592)" id="g3645">
+ <line x1="125.896" y1="53.333" x2="125.896" y2="15.667" id="line3647" style="fill:#00ff00" />
+ <g id="g3649">
+ <line stroke-miterlimit="10" x1="125.896" y1="49.028" x2="125.896" y2="15.667" id="line3651" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3653">
+ <polygon points="128.888,48.153 125.897,53.333 122.905,48.153 " id="polygon3655" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <g transform="matrix(1,0,0,0.73146564,-0.38500643,4.5843285)" id="g3659">
+ <line x1="267.23001" y1="70.667" x2="267.23001" y2="15.667" id="line3661" style="fill:#00ff00" />
+ <g id="g3663">
+ <line stroke-miterlimit="10" x1="267.23001" y1="66.361" x2="267.23001" y2="15.667" id="line3665" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3667">
+ <polygon points="267.23,70.667 264.238,65.486 270.222,65.486 " id="polygon3669" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <path d="m 153.04485,51.262472 c 0,3.583 -4.15868,6.488 -9.28879,6.488 H 91.550787 c -5.130114,0 -9.288794,-2.905 -9.288794,-6.488 l 0,0 c 0,-3.583 4.15868,-6.488 9.288794,-6.488 h 52.204323 c 5.13012,0 9.28974,2.905 9.28974,6.488 l 0,0 z" id="path3671" style="opacity:0.2;fill:#00a14b" />
+ <g transform="matrix(1,0,0,0.72872639,-103.64072,4.6212592)" id="g3675">
+ <line x1="313.56299" y1="53.333" x2="313.56299" y2="15.667" id="line3677" style="fill:#00ff00" />
+ <g id="g3679">
+ <line stroke-miterlimit="10" x1="313.56299" y1="49.028" x2="313.56299" y2="15.667" id="line3681" style="fill:none;stroke:#00a651;stroke-miterlimit:10" />
+ <g id="g3683">
+ <polygon points="316.556,48.153 313.564,53.333 310.572,48.153 " id="polygon3685" style="fill:#00a651" />
+ </g>
+ </g>
+ </g>
+ <path d="m 250.58388,51.262472 c 0,3.583 -4.83746,6.488 -10.80388,6.488 h -60.72201 c -5.96757,0 -10.80503,-2.905 -10.80503,-6.488 l 0,0 c 0,-3.583 4.83746,-6.488 10.80503,-6.488 H 239.78 c 5.96642,0 10.80388,2.905 10.80388,6.488 l 0,0 z" id="path3687" style="opacity:0.2;fill:#00a14b" />
+ <path d="m 295.61399,65.440811 c 0,3.406 -2.537,6.167 -5.667,6.167 H 85.280993 c -3.129,0 -5.667,-2.761 -5.667,-6.167 l 0,0 c 0,-3.406 2.537,-6.167 5.667,-6.167 H 289.94799 c 3.129,10e-4 5.666,2.762 5.666,6.167 l 0,0 z" id="path3689" style="opacity:0.2;fill:#00a14b" />
+ <text x="92.396126" y="13.742188" id="text3643" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Inputs</text>
+ <text x="193.83928" y="13.742188" id="text3673" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Outputs</text>
+ <text x="256.93237" y="13.742188" id="text3657" style="font-size:12px;fill:#00a14b;font-family:ArialMT">Extra parameters</text>
+ <text x="9.3237839" y="40.037392" id="text3040" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace"><tspan id="tspan3042" style="font-weight:bold;fill:#ff0000">>>></tspan> pipeline_run([first_task])</text>
+ <text x="9.3237839" y="54.044189" id="text3046" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace"> Job <tspan id="tspan3048" style="fill:#666666">=</tspan> [job1<tspan id="tspan3050" style="fill:#666666">.</tspan>input <tspan id="tspan3052" style="fill:#666666">-></tspan> job1<tspan id="tspan3056" style="fill:#666666">.</tspan>output1,</text>
+ <text x="58.633194" y="67.193367" id="text3160" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace"> some_extra.string.for_example, 14] completed</text>
+ <text x="9.3237839" y="82.432899" id="text3058" xml:space="preserve" style="font-size:10.32079887px;font-family:monospace">Completed Task <tspan id="tspan3060" style="fill:#666666">=</tspan> first_task</text>
+ </svg></div></blockquote>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="task-functions-as-recipes">
+<h2>Task functions as recipes<a class="headerlink" href="#task-functions-as-recipes" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This may seem like a lot of effort and complication for something so simple: a normal python function call.
+However, now that we have annotated a task, we can start using it as part of our computational pipeline:</p>
+<p>Each <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> function of the pipeline is a recipe or
+<a class="reference external" href="http://www.gnu.org/software/make/manual/make.html#Rule-Introduction">rule</a>
+which can be applied repeatedly to our data.</p>
+<p>For example, one can have</p>
+<blockquote>
+<div><ul class="simple">
+<li>a <tt class="docutils literal"><span class="pre">compile()</span></tt> <em>task</em> which will compile any number of source code files, or</li>
+<li>a <tt class="docutils literal"><span class="pre">count_lines()</span></tt> <em>task</em> which will count the number of lines in any file or</li>
+<li>an <tt class="docutils literal"><span class="pre">align_dna()</span></tt> <em>task</em> which will align the DNA of many chromosomes.</li>
+</ul>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p><strong>Key Ruffus Terminology</strong>:</p>
+<p>A <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> is an annotated python function which represents a recipe or stage of your pipeline.</p>
+<p>A <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a> is each time your recipe is applied to a piece of data, i.e. each time Ruffus calls your function.</p>
+<p class="last">Each <strong>task</strong> or pipeline recipe can thus have many <strong>jobs</strong> each of which can work in parallel on different data.</p>
+</div>
+<p>In the original example, we have made a single output file by supplying a single input parameter.
+We shall use much the same syntax to apply the same recipe to <em>multiple</em> input files.
+Instead of providing a single <em>input</em>, and a single <em>output</em>, we are going to specify
+the parameters for <em>three</em> jobs at once:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="c"># previously,</span>
+<span class="c"># first_task_params = 'job1.input'</span>
+<span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="s">'job1.input'</span><span class="p">,</span>
+ <span class="s">'job2.input'</span>
+ <span class="s">'job3.input'</span>
+ <span class="p">]</span>
+
+<span class="c"># make sure the input files are there</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">'job1.input'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">'job2.input'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">'job3.input'</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">first_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Just by changing the inputs from a single file to a list of three files, we now have a pipeline which runs independently on three pieces of data.
+The results should look familiar:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">first_task</span><span class="p">])</span>
+<span class="go"> Job = [job1.input -> job1.output1,</span>
+<span class="go"> some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [job2.input -> job2.output1,</span>
+<span class="go"> some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [job3.input -> job3.output1,</span>
+<span class="go"> some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="multiple-steps">
+<h2>Multiple steps<a class="headerlink" href="#multiple-steps" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Best of all, it is easy to add another step to our initial pipeline.</p>
+<p>We have to</p>
+<blockquote>
+<div><ul class="simple">
+<li>add another <tt class="docutils literal"><span class="pre">@transform</span></tt> decorated function (<tt class="docutils literal"><span class="pre">second_task()</span></tt>),</li>
+<li>specify <tt class="docutils literal"><span class="pre">first_task()</span></tt> as the source:</li>
+<li>use a <tt class="docutils literal"><span class="pre">suffix</span></tt> which matches the output from <tt class="docutils literal"><span class="pre">first_task()</span></tt></li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="c"># make output file</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+</pre></div>
+</div>
+<ul class="simple">
+<li>call <tt class="docutils literal"><span class="pre">pipeline_run()</span></tt> with the correct final task (<tt class="docutils literal"><span class="pre">second_task()</span></tt>)</li>
+</ul>
+</div></blockquote>
+<p>The full source code can be found <a class="reference internal" href="step2_code.html#simple-tutorial-2nd-step-code"><em>here</em></a></p>
+<p>With very little effort, we now have three independent pieces of information coursing through our pipeline.
+Because <tt class="docutils literal"><span class="pre">second_task()</span></tt> <em>transforms</em> the output from <tt class="docutils literal"><span class="pre">first_task()</span></tt>, it magically knows its dependencies and
+that it too has to work on three jobs.</p>
+</div></blockquote>
+</div>
+<div class="section" id="multi-tasking">
+<h2>Multi-tasking<a class="headerlink" href="#multi-tasking" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Though, three jobs have been specified in parallel, <strong>Ruffus</strong> defaults to running
+each of them successively. With modern CPUs, it is often a lot faster to run parts
+of your pipeline in parallel, all at the same time.</p>
+<p>To do this, all you have to do is to add a multiprocess parameter to pipeline_run:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">],</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>In this case, ruffus will try to run up to 5 jobs at the same time. Since our second
+task only has three jobs, these will be started simultaneously.</p>
+</div></blockquote>
+</div>
+<div class="section" id="up-to-date-jobs-are-not-re-run-unnecessarily">
+<h2>Up-to-date jobs are not re-run unnecessarily<a class="headerlink" href="#up-to-date-jobs-are-not-re-run-unnecessarily" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A job will be run only if the output file timestamps are out of date.
+If you ran the same code a second time,</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<dl class="docutils">
+<dt>Nothing would happen because:</dt>
+<dd><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">job1.output2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">job1.output1</span></tt> and</li>
+<li><tt class="docutils literal"><span class="pre">job2.output2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">job2.output1</span></tt> and</li>
+<li><tt class="docutils literal"><span class="pre">job3.output2</span></tt> is more recent than <tt class="docutils literal"><span class="pre">job3.output1</span></tt>.</li>
+</ul>
+</dd>
+<dt>Let us see what happens when just 1 out of 3 pieces of data is modified</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="nb">open</span><span class="p">(</span><span class="s">"job1.input1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>You would see that only the out of date jobs (highlighted) have been re-run:</p>
+<blockquote>
+<div><div class="highlight-pycon"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">multiprocess</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+<span class="hll"><span class="go"> Job = [job1.input -> job1.output1, some_extra.string.for_example, 14] completed</span>
+</span><span class="go"> Job = [job3.input -> job3.output1, some_extra.string.for_example, 14] unnecessary: already up to date</span>
+<span class="go"> Job = [job2.input -> job2.output1, some_extra.string.for_example, 14] unnecessary: already up to date</span>
+<span class="go">Completed Task = first_task</span>
+<span class="hll"><span class="go"> Job = [job1.output1 -> job1.output2] completed</span>
+</span><span class="go"> Job = [job2.output1 -> job2.output2] unnecessary: already up to date</span>
+<span class="go"> Job = [job3.output1 -> job3.output2] unnecessary: already up to date</span>
+<span class="go">Completed Task = second_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="intermediate-files">
+<span id="index-1"></span><h2>Intermediate files<a class="headerlink" href="#intermediate-files" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>In the above examples, the <em>input</em> and <em>output</em> parameters are file names.
+Ruffus was designed for pipelines which save intermediate data in files. This is not
+compulsory but saving your data in files at each step provides a few advantages:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Ruffus can use file system time stamps to check if your pipeline is up to date</li>
+<li>Your data is persistent across runs</li>
+<li>This is a good way to pass large amounts of data across processes and computational nodes</li>
+</ol>
+</div></blockquote>
+<p>Otherwise, task parameters could be all sorts of data, from lists of files, to numbers,
+sets or tuples. Ruffus imposes few constraints on what <em>you</em>
+would like to send to each stage of your pipeline.</p>
+<p><strong>Ruffus</strong> does, however, assume that all strings in your <em>input</em> and <em>output</em>
+parameters represent file names.</p>
+<p><em>input</em> parameters which contains a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> pattern (e.g. <tt class="docutils literal"><span class="pre">*.txt</span></tt>) are expanded to the matching file names.</p>
+</div></blockquote>
+</div>
+<div class="section" id="transform-is-a-1-to-1-operation">
+<h2>@transform is a 1 to 1 operation<a class="headerlink" href="#transform-is-a-1-to-1-operation" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><tt class="docutils literal"><span class="pre">@transform</span></tt> is a 1:1 operation because it keeps the number of jobs constant
+entering and leaving the task. Each job can accept, for example, a pair of files as its input,
+or generate more than one output files.</p>
+<dl class="docutils">
+<dt>Let us see this in action using the previous example:</dt>
+<dd><ul class="first">
+<li><p class="first"><tt class="docutils literal"><span class="pre">first_task_params</span></tt> is changed to 3 <em>pairs</em> of file names</p>
+</li>
+<li><dl class="first docutils">
+<dt><tt class="docutils literal"><span class="pre">@transform</span></tt> for <tt class="docutils literal"><span class="pre">first_task</span></tt> is modified to produce <em>pairs</em> of file names</dt>
+<dd><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">.output.1</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">.output.extra.1</span></tt></li>
+</ul>
+</dd>
+</dl>
+</li>
+</ul>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># Create pairs of input files</span>
+<span class="c">#</span>
+<span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span><span class="s">'job1.a.input'</span><span class="p">,</span> <span class="s">'job1.b.input'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job2.a.input'</span><span class="p">,</span> <span class="s">'job2.b.input'</span><span class="p">],</span>
+ <span class="p">[</span><span class="s">'job3.a.input'</span><span class="p">,</span> <span class="s">'job3.b.input'</span><span class="p">],</span>
+ <span class="p">]</span>
+
+<span class="k">for</span> <span class="n">input_file_pairs</span> <span class="ow">in</span> <span class="n">first_task_params</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">input_file_pairs</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># first task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".input"</span><span class="p">),</span>
+ <span class="p">[</span><span class="s">".output.1"</span><span class="p">,</span>
+ <span class="s">".output.extra.1"</span><span class="p">],</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file_pairs</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="c"># make both pairs of output files</span>
+ <span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="n">output_file_pairs</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># second task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output.1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_files</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="c"># make output file</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<p>This gives the following results:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">pipeline_task</span><span class="p">])</span>
+</pre></div>
+</div>
+<p class="last">We see that apart from having a file pair where previously there was a single file,
+little else has changed. We still have three pieces of data going through the
+pipeline in three parallel jobs.</p>
+</dd>
+</dl>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 2: <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data in a pipeline</a><ul>
+<li><a class="reference internal" href="#overview">Overview</a></li>
+<li><a class="reference internal" href="#transform">@transform</a></li>
+<li><a class="reference internal" href="#task-functions-as-recipes">Task functions as recipes</a></li>
+<li><a class="reference internal" href="#multiple-steps">Multiple steps</a></li>
+<li><a class="reference internal" href="#multi-tasking">Multi-tasking</a></li>
+<li><a class="reference internal" href="#up-to-date-jobs-are-not-re-run-unnecessarily">Up-to-date jobs are not re-run unnecessarily</a></li>
+<li><a class="reference internal" href="#intermediate-files">Intermediate files</a></li>
+<li><a class="reference internal" href="#transform-is-a-1-to-1-operation">@transform is a 1 to 1 operation</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step1_follows.html"
+ title="previous chapter">Step 1: An introduction to Ruffus pipelines</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step3_run_pipeline.html"
+ title="next chapter">Step 3: Understanding how your pipeline works</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step2.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step3_run_pipeline.html" title="Step 3: Understanding how your pipeline works"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step1_follows.html" title="Step 1: An introduction to Ruffus pipelines"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step2_code.html b/doc/_build/html/tutorials/simple_tutorial/step2_code.html
new file mode 100644
index 0000000..680e034
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step2_code.html
@@ -0,0 +1,264 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 2: Passing parameters to the pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 3: Displaying the pipeline visually" href="step3_run_pipeline_code.html" />
+ <link rel="prev" title="Code for Chapter 6: Applying the same recipe to create many different files" href="../manual/transform_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step3_run_pipeline_code.html" title="Code for Step 3: Displaying the pipeline visually"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="../manual/transform_code.html" title="Code for Chapter 6: Applying the same recipe to create many different files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-step-2-passing-parameters-to-the-pipeline">
+<span id="simple-tutorial-2nd-step-code"></span><h1>Code for Step 2: Passing parameters to the pipeline<a class="headerlink" href="#code-for-step-2-passing-parameters-to-the-pipeline" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Up</em></a></li>
+<li><a class="reference internal" href="step2.html#simple-tutorial-2nd-step"><em>Back</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform syntax</em></a> in detail</li>
+</ul>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c"># Create input files</span>
+<span class="c">#</span>
+<span class="n">first_task_params</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="s">'job1.input'</span><span class="p">,</span>
+ <span class="s">'job2.input'</span><span class="p">,</span>
+ <span class="s">'job3.input'</span>
+ <span class="p">]</span>
+
+<span class="k">for</span> <span class="n">input_file</span> <span class="ow">in</span> <span class="n">first_task_params</span><span class="p">:</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># first task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task_params</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".input"</span><span class="p">),</span> <span class="s">".output1"</span><span class="p">,</span>
+ <span class="s">"some_extra.string.for_example"</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span>
+ <span class="n">extra_parameter_str</span><span class="p">,</span> <span class="n">extra_parameter_num</span><span class="p">):</span>
+ <span class="c"># make output file</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># second task</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">first_task</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".output1"</span><span class="p">),</span> <span class="s">".output2"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="c"># make output file</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+<span class="go"> Job = [job1.input -> job1.output1, some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [job2.input -> job2.output1, some_extra.string.for_example, 14] completed</span>
+<span class="go"> Job = [job3.input -> job3.output1, some_extra.string.for_example, 14] completed</span>
+<span class="go">Completed Task = first_task</span>
+<span class="go"> Job = [job1.output1 -> job1.output2] completed</span>
+<span class="go"> Job = [job2.output1 -> job2.output2] completed</span>
+<span class="go"> Job = [job3.output1 -> job3.output2] completed</span>
+<span class="go">Completed Task = second_task</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 2: Passing parameters to the pipeline</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="../manual/transform_code.html"
+ title="previous chapter">Code for Chapter 6: Applying the same recipe to create many different files</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step3_run_pipeline_code.html"
+ title="next chapter">Code for Step 3: Displaying the pipeline visually</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step2_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step3_run_pipeline_code.html" title="Code for Step 3: Displaying the pipeline visually"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="../manual/transform_code.html" title="Code for Chapter 6: Applying the same recipe to create many different files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step3_run_pipeline.html b/doc/_build/html/tutorials/simple_tutorial/step3_run_pipeline.html
new file mode 100644
index 0000000..4a4ee86
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step3_run_pipeline.html
@@ -0,0 +1,262 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 3: Understanding how your pipeline works — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 4: Displaying the pipeline visually" href="step4_run_pipeline_graphically.html" />
+ <link rel="prev" title="Step 2: @transform-ing data in a pipeline" href="step2.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically.html" title="Step 4: Displaying the pipeline visually"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step2.html" title="Step 2: @transform-ing data in a pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="step-3-understanding-how-your-pipeline-works">
+<span id="simple-tutorial-3rd-step"></span><h1>Step 3: Understanding how your pipeline works<a class="headerlink" href="#step-3-understanding-how-your-pipeline-works" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions"><em>pipeline functions</em></a> in detail</li>
+</ul>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step3_run_pipeline_code.html#simple-tutorial-3nd-step-code"><em>Python Code for step 3</em></a></li>
+</ul>
+</div>
+<p id="index-0">The trickiest part of developing pipelines is understanding how your
+data flows through the pipeline.</p>
+<p>Parameters and files are passed from one task to another down the chain
+of pipelined functions.</p>
+<p>Whether you are learning how to use <strong>ruffus</strong>, or trying out a new
+feature in <strong>ruffus</strong>, or just have a horrendously complicated pipeline
+to debug (we have colleagues with >100 criss-crossing pipelined stages),
+your best friend is <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a></p>
+<div class="section" id="printing-out-which-jobs-will-be-run">
+<h2>Printing out which jobs will be run<a class="headerlink" href="#printing-out-which-jobs-will-be-run" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> takes the same parameters as pipeline_run but just prints
+the tasks which are and are not up-to-date.</p>
+<p>The <tt class="docutils literal"><span class="pre">verbose</span></tt> parameter controls how much detail is displayed.</p>
+<p>Let us take the two step <a class="reference internal" href="step3_run_pipeline_code.html#simple-tutorial-3nd-step-code"><em>pipelined code</em></a> we have previously written,
+but call <a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-printout"><em>pipeline_printout(...)</em></a> instead of
+<a class="reference internal" href="../../pipeline_functions.html#pipeline-functions-pipeline-run"><em>pipeline_run(...)</em></a>.
+This lists the tasks which will be run in the pipeline:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_pipeline_printout11.png" src="../../_images/simple_tutorial_pipeline_printout11.png" />
+</div></blockquote>
+<p>To see the input and output parameters of each job in the pipeline, we can increase the verbosity from the default (1) to 3:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_pipeline_printout21.png" src="../../_images/simple_tutorial_pipeline_printout21.png" />
+</div></blockquote>
+<dl class="docutils">
+<dt>This is very useful for checking that the input and output parameters have been specified</dt>
+<dd>correctly.</dd>
+</dl>
+</div></blockquote>
+</div>
+<div class="section" id="determining-which-jobs-are-out-of-date-or-not">
+<h2>Determining which jobs are out-of-date or not<a class="headerlink" href="#determining-which-jobs-are-out-of-date-or-not" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is often useful to see which tasks are or are not up-to-date. For example, if we
+were to run the pipeline in full, and then modify one of the intermediate files, the
+pipeline would be partially out of date.</p>
+<p>Let us start by run the pipeline in full but then modify <tt class="docutils literal"><span class="pre">job1.stage</span></tt> so that the second task is no longer up-to-date:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+
+<span class="c"># modify job1.stage1</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"job1.stage1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>At a verbosity of 5, even jobs which are up-to-date will be displayed.
+We can now see that the there is only one job in <tt class="docutils literal"><span class="pre">second_task(...)</span></tt> which needs to be re-run
+because <tt class="docutils literal"><span class="pre">job1.stage1</span></tt> has been modified after <tt class="docutils literal"><span class="pre">job1.stage2</span></tt> (highlighted in blue):</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_pipeline_printout31.png" src="../../_images/simple_tutorial_pipeline_printout31.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 3: Understanding how your pipeline works</a><ul>
+<li><a class="reference internal" href="#printing-out-which-jobs-will-be-run">Printing out which jobs will be run</a></li>
+<li><a class="reference internal" href="#determining-which-jobs-are-out-of-date-or-not">Determining which jobs are out-of-date or not</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step2.html"
+ title="previous chapter">Step 2: <tt class="docutils literal"><span class="pre">@transform</span></tt>-ing data in a pipeline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step4_run_pipeline_graphically.html"
+ title="next chapter">Step 4: Displaying the pipeline visually</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step3_run_pipeline.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically.html" title="Step 4: Displaying the pipeline visually"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step2.html" title="Step 2: @transform-ing data in a pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step3_run_pipeline_code.html b/doc/_build/html/tutorials/simple_tutorial/step3_run_pipeline_code.html
new file mode 100644
index 0000000..7de16b7
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step3_run_pipeline_code.html
@@ -0,0 +1,305 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 3: Displaying the pipeline visually — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 4: Displaying the pipeline visually" href="step4_run_pipeline_graphically_code.html" />
+ <link rel="prev" title="Code for Step 2: Passing parameters to the pipeline" href="step2_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically_code.html" title="Code for Step 4: Displaying the pipeline visually"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step2_code.html" title="Code for Step 2: Passing parameters to the pipeline"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-step-3-displaying-the-pipeline-visually">
+<span id="simple-tutorial-3nd-step-code"></span><h1>Code for Step 3: Displaying the pipeline visually<a class="headerlink" href="#code-for-step-3-displaying-the-pipeline-visually" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions"><em>pipeline functions</em></a> in detail</li>
+<li><a class="reference internal" href="step3_run_pipeline.html#simple-tutorial-3rd-step"><em>Back to Step 3</em></a></li>
+</ul>
+<div class="section" id="display-the-initial-state-of-the-pipeline">
+<h2>Display the initial state of the pipeline<a class="headerlink" href="#display-the-initial-state-of-the-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># first task</span>
+<span class="c">#</span>
+<span class="n">task1_param</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="bp">None</span><span class="p">,</span> <span class="s">'job1.stage1'</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="bp">None</span><span class="p">,</span> <span class="s">'job2.stage1'</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">task1_param</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">no_input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># pretend we have worked hard</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># second task</span>
+<span class="c">#</span>
+<span class="n">task2_param</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'job1.stage1'</span><span class="p">,</span> <span class="s">"job1.stage2"</span><span class="p">,</span> <span class="s">" 1st_job"</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'job2.stage1'</span><span class="p">,</span> <span class="s">"job2.stage2"</span><span class="p">,</span> <span class="s">" 2nd_job"</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">)</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">task2_param</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">extra_parameter</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="n">extra_parameter</span>
+
+<span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">])</span>
+
+<span class="go"> Task = first_task</span>
+<span class="go"> Job = [None -> job1.stage1]</span>
+<span class="go"> Job = [None -> job2.stage1]</span>
+
+<span class="go"> Task = second_task</span>
+<span class="go"> Job = [job1.stage1 -> job1.stage2, 1st_job]</span>
+<span class="go"> Job = [job2.stage1 -> job2.stage2, 2nd_job]</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="display-the-partially-up-to-date-pipeline">
+<h2>Display the partially up-to-date pipeline<a class="headerlink" href="#display-the-partially-up-to-date-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Run the pipeline, modify <tt class="docutils literal"><span class="pre">job1.stage</span></tt> so that the second task is no longer up-to-date
+and printout the pipeline stage again:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+
+<span class="c"># modify job1.stage1</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"job1.stage1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+</pre></div>
+</div>
+<p>At a verbosity of 5, even jobs which are up-to-date will be displayed:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_printout</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="p">[</span><span class="n">second_task</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span>
+<span class="go">________________________________________</span>
+<span class="go">Tasks which are up-to-date:</span>
+
+<span class="go">Task = first_task</span>
+<span class="go"> Job = [None</span>
+<span class="go"> ->job1.stage1]</span>
+<span class="go"> Job up-to-date</span>
+<span class="go"> Job = [None</span>
+<span class="go"> ->job2.stage1]</span>
+<span class="go"> Job up-to-date</span>
+
+
+<span class="go">________________________________________</span>
+<span class="go">Tasks which will be run:</span>
+
+<span class="go">Task = second_task</span>
+<span class="go"> Job = [job1.stage1</span>
+<span class="go"> ->job1.stage2, 1st_job]</span>
+<span class="go"> Job needs update: Need update file times= [[(1269025787.0, 'job1.stage1')], [(1269025785.0,</span>
+<span class="go"> 'job1.stage2')]]</span>
+<span class="go"> Job = [job2.stage1</span>
+<span class="go"> ->job2.stage2, 2nd_job]</span>
+<span class="go"> Job up-to-date</span>
+
+<span class="go">________________________________________</span>
+</pre></div>
+</div>
+<p>We can now see that the there is only one job in “second_task” which needs to be re-run
+because ‘job1.stage1’ has been modified after ‘job1.stage2’</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 3: Displaying the pipeline visually</a><ul>
+<li><a class="reference internal" href="#display-the-initial-state-of-the-pipeline">Display the initial state of the pipeline</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+<li><a class="reference internal" href="#display-the-partially-up-to-date-pipeline">Display the partially up-to-date pipeline</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step2_code.html"
+ title="previous chapter">Code for Step 2: Passing parameters to the pipeline</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step4_run_pipeline_graphically_code.html"
+ title="next chapter">Code for Step 4: Displaying the pipeline visually</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step3_run_pipeline_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically_code.html" title="Code for Step 4: Displaying the pipeline visually"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step2_code.html" title="Code for Step 2: Passing parameters to the pipeline"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step4_run_pipeline_graphically.html b/doc/_build/html/tutorials/simple_tutorial/step4_run_pipeline_graphically.html
new file mode 100644
index 0000000..97fa216
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step4_run_pipeline_graphically.html
@@ -0,0 +1,266 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 4: Displaying the pipeline visually — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 5: Splitting up large tasks / files" href="step5_split.html" />
+ <link rel="prev" title="Step 3: Understanding how your pipeline works" href="step3_run_pipeline.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step5_split.html" title="Step 5: Splitting up large tasks / files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step3_run_pipeline.html" title="Step 3: Understanding how your pipeline works"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="step-4-displaying-the-pipeline-visually">
+<span id="simple-tutorial-4th-step-graphical"></span><h1>Step 4: Displaying the pipeline visually<a class="headerlink" href="#step-4-displaying-the-pipeline-visually" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions"><em>pipeline functions</em></a> in detail</li>
+</ul>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step4_run_pipeline_graphically_code.html#simple-tutorial-4th-step-graphical-code"><em>Python Code for step 4</em></a></li>
+</ul>
+</div>
+<span class="target" id="index-0"></span><div class="section" id="printing-out-a-flowchart-of-our-pipeline">
+<span id="index-1"></span><h2>Printing out a flowchart of our pipeline<a class="headerlink" href="#printing-out-a-flowchart-of-our-pipeline" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>It is all very well being able to trace the data flow through the pipeline.
+Sometimes, however, we need a bit of eye-candy.</p>
+<table border="1" class="borderless docutils">
+<colgroup>
+<col width="1%" />
+<col width="99%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><img alt="../../_images/simple_tutorial_step4.png" class="first last" src="../../_images/simple_tutorial_step4.png" />
+</td>
+<td><blockquote class="first last">
+<div><dl class="docutils">
+<dt>We can see this flowchart of our fledgling pipeline by executing:</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="s">'flowchart.svg'</span><span class="p">,</span>
+ <span class="s">'svg'</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">second_task</span><span class="p">],</span>
+ <span class="n">no_key_legend</span> <span class="o">=</span> <span class="bp">False</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>Flowcharts can be printed in a large number of formats including jpg, svg,
+png and pdf provided that the <tt class="docutils literal"><span class="pre">dot</span></tt> programme from
+<a class="reference external" href="http://www.graphviz.org/">Graphviz</a> is installed.</p>
+<p>For this simple case, we have ommitted the legend key which distinguishes between the
+different states of the various tasks. (See below for the legend key.)</p>
+</div></blockquote>
+</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="horribly-complicated-pipelines">
+<h2>Horribly complicated pipelines!<a class="headerlink" href="#horribly-complicated-pipelines" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Flowcharts are especially useful if you have really complicated pipelines, such as</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_complex_flowchart1.png" src="../../_images/simple_tutorial_complex_flowchart1.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="circular-dependency-errors-in-pipelines">
+<h2>Circular dependency errors in pipelines!<a class="headerlink" href="#circular-dependency-errors-in-pipelines" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Especially, if the pipeline is not set up properly, and vicious circular dependencies
+are present:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_complex_flowchart_error1.png" src="../../_images/simple_tutorial_complex_flowchart_error1.png" />
+</div></blockquote>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 4: Displaying the pipeline visually</a><ul>
+<li><a class="reference internal" href="#printing-out-a-flowchart-of-our-pipeline">Printing out a flowchart of our pipeline</a></li>
+<li><a class="reference internal" href="#horribly-complicated-pipelines">Horribly complicated pipelines!</a></li>
+<li><a class="reference internal" href="#circular-dependency-errors-in-pipelines">Circular dependency errors in pipelines!</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step3_run_pipeline.html"
+ title="previous chapter">Step 3: Understanding how your pipeline works</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step5_split.html"
+ title="next chapter">Step 5: Splitting up large tasks / files</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step5_split.html" title="Step 5: Splitting up large tasks / files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step3_run_pipeline.html" title="Step 3: Understanding how your pipeline works"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.html b/doc/_build/html/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.html
new file mode 100644
index 0000000..5841ccb
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.html
@@ -0,0 +1,302 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 4: Displaying the pipeline visually — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 5: Splitting up large tasks / files" href="step5_split_code.html" />
+ <link rel="prev" title="Code for Step 3: Displaying the pipeline visually" href="step3_run_pipeline_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step5_split_code.html" title="Code for Step 5: Splitting up large tasks / files"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step3_run_pipeline_code.html" title="Code for Step 3: Displaying the pipeline visually"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-step-4-displaying-the-pipeline-visually">
+<span id="simple-tutorial-4th-step-graphical-code"></span><h1>Code for Step 4: Displaying the pipeline visually<a class="headerlink" href="#code-for-step-4-displaying-the-pipeline-visually" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../pipeline_functions.html#pipeline-functions"><em>pipeline functions</em></a> in detail</li>
+<li><a class="reference internal" href="step4_run_pipeline_graphically.html#simple-tutorial-4th-step-graphical"><em>Back to Step 4</em></a></li>
+</ul>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># first task</span>
+<span class="c">#</span>
+<span class="n">task1_param</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="bp">None</span><span class="p">,</span> <span class="s">'job1.stage1'</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="bp">None</span><span class="p">,</span> <span class="s">'job2.stage1'</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@files</span><span class="p">(</span><span class="n">task1_param</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">first_task</span><span class="p">(</span><span class="n">no_input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># second task</span>
+<span class="c">#</span>
+<span class="n">task2_param</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="p">[</span> <span class="s">'job1.stage1'</span><span class="p">,</span> <span class="s">"job1.stage2"</span><span class="p">,</span> <span class="s">" 1st_job"</span><span class="p">],</span> <span class="c"># 1st job</span>
+ <span class="p">[</span> <span class="s">'job2.stage1'</span><span class="p">,</span> <span class="s">"job2.stage2"</span><span class="p">,</span> <span class="s">" 2nd_job"</span><span class="p">],</span> <span class="c"># 2nd job</span>
+ <span class="p">]</span>
+
+<span class="nd">@follows</span><span class="p">(</span><span class="n">first_task</span><span class="p">)</span>
+<span class="nd">@files</span><span class="p">(</span><span class="n">task2_param</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">second_task</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">output_file</span><span class="p">,</span> <span class="n">extra_parameter</span><span class="p">):</span>
+ <span class="nb">open</span><span class="p">(</span><span class="n">output_file</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">print</span> <span class="n">extra_parameter</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Show flow chart and tasks before running the pipeline</span>
+<span class="c">#</span>
+<span class="k">print</span> <span class="s">"Show flow chart and tasks before running the pipeline"</span>
+<span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="nb">open</span><span class="p">(</span><span class="s">"flowchart_before.png"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">),</span>
+ <span class="s">"png"</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">second_task</span><span class="p">],</span>
+ <span class="n">no_key_legend</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">second_task</span><span class="p">])</span>
+
+
+<span class="c"># modify job1.stage1</span>
+<span class="nb">open</span><span class="p">(</span><span class="s">"job1.stage1"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Show flow chart and tasks after running the pipeline</span>
+<span class="c">#</span>
+<span class="k">print</span> <span class="s">"Show flow chart and tasks after running the pipeline"</span>
+<span class="n">pipeline_printout_graph</span> <span class="p">(</span> <span class="nb">open</span><span class="p">(</span><span class="s">"flowchart_after.png"</span><span class="p">,</span> <span class="s">"w"</span><span class="p">),</span>
+ <span class="s">"png"</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">second_task</span><span class="p">],</span>
+ <span class="n">no_key_legend</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-flowcharts">
+<h2>Resulting Flowcharts<a class="headerlink" href="#resulting-flowcharts" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="46%" />
+<col width="54%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><a class="first reference internal image-reference" href="../../_images/simple_tutorial_stage4_before.png"><img alt="Before running the pipeline" class="align-center" src="../../_images/simple_tutorial_stage4_before.png" style="width: 103.0px; height: 94.5px;" /></a>
+<p class="last centered">
+<strong>Before</strong></p></td>
+<td><a class="first reference internal image-reference" href="../../_images/simple_tutorial_stage4_after.png"><img alt="After running the pipeline" class="align-center" src="../../_images/simple_tutorial_stage4_after.png" style="width: 103.0px; height: 98.5px;" /></a>
+<p class="last centered">
+<strong>After</strong></p></td>
+</tr>
+</tbody>
+</table>
+<table border="1" class="docutils">
+<colgroup>
+<col width="100%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><a class="first reference internal image-reference" href="../../_images/tutorial_key.jpg"><img alt="Legend key" class="align-center" src="../../_images/tutorial_key.jpg" style="width: 722.25px; height: 78.75px;" /></a>
+<p class="last centered">
+<strong>Legend</strong></p></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 4: Displaying the pipeline visually</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-flowcharts">Resulting Flowcharts</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step3_run_pipeline_code.html"
+ title="previous chapter">Code for Step 3: Displaying the pipeline visually</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step5_split_code.html"
+ title="next chapter">Code for Step 5: Splitting up large tasks / files</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step4_run_pipeline_graphically_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step5_split_code.html" title="Code for Step 5: Splitting up large tasks / files"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step3_run_pipeline_code.html" title="Code for Step 3: Displaying the pipeline visually"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step5_split.html b/doc/_build/html/tutorials/simple_tutorial/step5_split.html
new file mode 100644
index 0000000..3109f10
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step5_split.html
@@ -0,0 +1,297 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 5: Splitting up large tasks / files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 6: Running jobs in parallel" href="step6_transform.html" />
+ <link rel="prev" title="Step 4: Displaying the pipeline visually" href="step4_run_pipeline_graphically.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step6_transform.html" title="Step 6: Running jobs in parallel"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically.html" title="Step 4: Displaying the pipeline visually"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="tutorial-split"><span id="simple-tutorial-5th-step"></span></span><div class="section" id="step-5-splitting-up-large-tasks-files">
+<span id="index-0"></span><h1>Step 5: Splitting up large tasks / files<a class="headerlink" href="#step-5-splitting-up-large-tasks-files" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split in detail</em></a></li>
+</ul>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step5_split_code.html#simple-tutorial-5th-step-code"><em>Python Code for step 5</em></a></li>
+</ul>
+</div>
+<div class="line-block">
+<div class="line">The second half of this tutorial is a worked example to calculate
+the sample variance of 10,000 random numbers.</div>
+<div class="line">This is similar to many computational projects: we are tackling a big problem
+by splitting it up into many tiny problems solved in parallel. We can then
+merge our piecemeal solutions into our final answer. These
+<a class="reference external" href="http://en.wikipedia.org/wiki/Embarrassingly_parallel">embarassingly parallel</a>
+problems motivated the original design of <strong>Ruffus</strong>.</div>
+</div>
+<p><strong>Ruffus</strong> has three dedicated decorators to handle these problems with ease:</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> to break up the big problem</li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@transfrom</em></a> to solve the parts in parallel</li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@merge</em></a> to merge our piecemeal solutions into the final answer.</li>
+</ul>
+</div></blockquote>
+</div></blockquote>
+<div class="section" id="splitting-up-a-long-list-of-random-numbers-to-calculate-their-variance">
+<h2>Splitting up a long list of random numbers to calculate their variance<a class="headerlink" href="#splitting-up-a-long-list-of-random-numbers-to-calculate-their-variance" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><table border="1" class="borderless docutils">
+<colgroup>
+<col width="1%" />
+<col width="99%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><p class="first centered">
+<strong>Step 5 from:</strong></p><img alt="../../_images/simple_tutorial_step5.png" class="last" src="../../_images/simple_tutorial_step5.png" />
+</td>
+<td><blockquote class="first last">
+<div><p>Suppose we had a list of 100,000 random numbers in the file <tt class="docutils literal"><span class="pre">random_numbers.list</span></tt>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="kn">import</span> <span class="nn">random</span>
+<span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">'random_numbers.list'</span><span class="p">,</span> <span class="s">'w'</span><span class="p">)</span>
+<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">'</span><span class="si">%g</span><span class="se">\n</span><span class="s">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>We might want to calculate the sample variance more quickly by splitting them
+into <tt class="docutils literal"><span class="pre">NNN</span></tt> parcels of 1000 numbers each and working on them in parallel.
+In this case we known that <tt class="docutils literal"><span class="pre">NNN</span> <span class="pre">==</span> <span class="pre">100</span></tt> but usually the number of resulting files
+is only apparent after we have finished processing our starting file.</p>
+</div></blockquote>
+</td>
+</tr>
+</tbody>
+</table>
+<p>Our pipeline function needs to take the random numbers file <tt class="docutils literal"><span class="pre">random_numbers.list</span></tt>,
+read the random numbers from it, and write to a new file every 100 lines.</p>
+<p>The <em>Ruffus</em> decorator <a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split</em></a> is designed specifically for
+splitting up input into an indeterminate <tt class="docutils literal"><span class="pre">NNN</span></tt> number of output files:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_split1.png" src="../../_images/simple_tutorial_split1.png" />
+</div></blockquote>
+<p>Ruffus will set</p>
+<blockquote>
+<div><div class="line-block">
+<div class="line"><tt class="docutils literal"><span class="pre">input_file_name</span></tt> to <tt class="docutils literal"><span class="pre">"random_numbers.list"</span></tt></div>
+<div class="line"><tt class="docutils literal"><span class="pre">output_files</span></tt> to all files which match <tt class="docutils literal"><span class="pre">*.chunks</span></tt> (i.e. <tt class="docutils literal"><span class="pre">"1.chunks"</span></tt>, <tt class="docutils literal"><span class="pre">"2.chunks"</span></tt> etc.).</div>
+</div>
+</div></blockquote>
+<p>The first time you run this function <tt class="docutils literal"><span class="pre">*.chunks</span></tt> will return an empty list because
+no <tt class="docutils literal"><span class="pre">.chunks</span></tt> files have been created, resulting in the following:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">step_5_split_numbers_into_chunks</span> <span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">,</span> <span class="p">[])</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>After that <tt class="docutils literal"><span class="pre">*.chunks</span></tt> will match the list of current <tt class="docutils literal"><span class="pre">.chunks</span></tt> files created by
+the previous pipeline run. Some of these files will be out of date or superfluous.
+These file names are usually only useful for removing detritus from previous runs
+(have a look at <a class="reference internal" href="step5_split_code.html#simple-tutorial-5th-step-code"><em>step_5_split_numbers_into_chunks(...)</em></a>).</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>The great value of specifying correctly the list of <em>output</em> files will become apparent in the next
+step of this tutorial when we shall see how pipeline tasks can be “chained” together conveniently.</p>
+<p>Remember to specify <tt class="docutils literal"><span class="pre">globs</span></tt> patterns which match <em>all</em> the files you are splitting up. You can
+cover different directories, or groups of file names by using a list of <tt class="docutils literal"><span class="pre">globs</span></tt>:
+e.g.</p>
+<div class="last highlight-python"><div class="highlight"><pre><span class="nd">@split</span><span class="p">(</span><span class="s">"input.file"</span><span class="p">,</span> <span class="p">[</span><span class="s">'a*.bits'</span><span class="p">,</span> <span class="s">'b*.pieces'</span><span class="p">,</span> <span class="s">'somewhere_else/c*.stuff'</span><span class="p">])</span>
+<span class="k">def</span> <span class="nf">split_function</span> <span class="p">(</span><span class="n">input_filename</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="s">"Code to split up 'input.file'"</span>
+</pre></div>
+</div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 5: Splitting up large tasks / files</a><ul>
+<li><a class="reference internal" href="#splitting-up-a-long-list-of-random-numbers-to-calculate-their-variance">Splitting up a long list of random numbers to calculate their variance</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step4_run_pipeline_graphically.html"
+ title="previous chapter">Step 4: Displaying the pipeline visually</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step6_transform.html"
+ title="next chapter">Step 6: Running jobs in parallel</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step5_split.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step6_transform.html" title="Step 6: Running jobs in parallel"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically.html" title="Step 4: Displaying the pipeline visually"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step5_split_code.html b/doc/_build/html/tutorials/simple_tutorial/step5_split_code.html
new file mode 100644
index 0000000..6bdbe66
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step5_split_code.html
@@ -0,0 +1,277 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 5: Splitting up large tasks / files — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 6: Running jobs in parallel" href="step6_transform_code.html" />
+ <link rel="prev" title="Code for Step 4: Displaying the pipeline visually" href="step4_run_pipeline_graphically_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step6_transform_code.html" title="Code for Step 6: Running jobs in parallel"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically_code.html" title="Code for Step 4: Displaying the pipeline visually"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="simple-tutorial-5th-step-code"></span><div class="section" id="code-for-step-5-splitting-up-large-tasks-files">
+<h1>Code for Step 5: Splitting up large tasks / files<a class="headerlink" href="#code-for-step-5-splitting-up-large-tasks-files" title="Permalink to this headline">¶</a></h1>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/split.html#decorators-split"><em>@split in detail</em></a></li>
+<li><a class="reference internal" href="step5_split.html#simple-tutorial-5th-step"><em>back to step 5</em></a></li>
+</ul>
+</div></blockquote>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">NUMBER_OF_RANDOMS</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="kn">import</span> <span class="nn">random</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Create random numbers</span>
+<span class="c">#</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"random_numbers.list"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_random_numbers</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%g</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Split initial file</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">)</span>
+<span class="nd">@split</span><span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_5_split_numbers_into_chunks</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits random numbers file into XXX files of CHUNK_SIZE each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># clean up files from previous runs</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="s">"*.chunks"</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># create new file every CHUNK_SIZE lines and</span>
+ <span class="c"># copy each line into current file</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">cnt_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="n">CHUNK_SIZE</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.chunks"</span> <span class="o">%</span> <span class="n">cnt_files</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">step_5_split_numbers_into_chunks</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">step_5_split_numbers_into_chunks</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span>
+
+<span class="go"> Start Task = create_random_numbers</span>
+
+<span class="go"> Job = [None -> random_numbers.list] Missing file random_numbers.list</span>
+<span class="go"> Job = [None -> random_numbers.list] completed</span>
+<span class="go"> Completed Task = create_random_numbers</span>
+<span class="go"> Start Task = step_5_split_numbers_into_chunks</span>
+<span class="go"> Splits random numbers file into XXX files of CHUNK_SIZE each</span>
+<span class="go"> Job = [random_numbers.list -> *.chunks] Missing output file</span>
+<span class="go"> Job = [random_numbers.list -> *.chunks] completed</span>
+<span class="go"> Completed Task = step_5_split_numbers_into_chunks</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 5: Splitting up large tasks / files</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step4_run_pipeline_graphically_code.html"
+ title="previous chapter">Code for Step 4: Displaying the pipeline visually</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step6_transform_code.html"
+ title="next chapter">Code for Step 6: Running jobs in parallel</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step5_split_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step6_transform_code.html" title="Code for Step 6: Running jobs in parallel"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step4_run_pipeline_graphically_code.html" title="Code for Step 4: Displaying the pipeline visually"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step6_transform.html b/doc/_build/html/tutorials/simple_tutorial/step6_transform.html
new file mode 100644
index 0000000..8f7577e
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step6_transform.html
@@ -0,0 +1,258 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 6: Running jobs in parallel — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 7: Merging results back together" href="step7_merge.html" />
+ <link rel="prev" title="Step 5: Splitting up large tasks / files" href="step5_split.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step7_merge.html" title="Step 7: Merging results back together"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step5_split.html" title="Step 5: Splitting up large tasks / files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="tutorial-transform"><span id="simple-tutorial-6th-step"></span></span><div class="section" id="step-6-running-jobs-in-parallel">
+<span id="index-0"></span><h1>Step 6: Running jobs in parallel<a class="headerlink" href="#step-6-running-jobs-in-parallel" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform in detail</em></a></li>
+</ul>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step6_transform_code.html#simple-tutorial-6th-step-code"><em>Python Code for step 6</em></a></li>
+</ul>
+</div>
+<div class="section" id="calculating-sums-and-sum-of-squares-in-parallel">
+<h2>Calculating sums and sum of squares in parallel<a class="headerlink" href="#calculating-sums-and-sum-of-squares-in-parallel" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>Now that we have many smaller lists of numbers in separate files, we can calculate their sums and
+sum of squares in parallel.</p>
+<p>All we need is a function which takes a <tt class="docutils literal"><span class="pre">*.chunk</span></tt> file, reads the numbers, calculates
+the answers and writes them back out to a corresponding <tt class="docutils literal"><span class="pre">*.sums</span></tt> file.</p>
+<p><em>Ruffus</em> magically takes care of applying this task function to all the different
+data files in parallel.</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_transform1.png" src="../../_images/simple_tutorial_transform1.png" />
+</div></blockquote>
+<div class="line-block">
+<div class="line">The first thing to note about this example is that the <em>input</em> files are not specified
+as a <a class="reference external" href="http://docs.python.org/library/glob.html"><cite>glob</cite></a> (e.g. <tt class="docutils literal"><span class="pre">*.chunk</span></tt>) but as the preceding task.</div>
+<div class="line"><em>Ruffus</em> will take all
+the files produced by <tt class="docutils literal"><span class="pre">step_5_split_numbers_into_chunks()</span></tt> and feed them as the <em>input</em>
+into step 6.</div>
+</div>
+<p>This handy shortcut also means that <strong>Ruffus</strong> knows that <tt class="docutils literal"><span class="pre">step_6_calculate_sum_of_squares</span></tt>
+depends on <tt class="docutils literal"><span class="pre">step_5_split_numbers_into_chunks</span></tt> and an additional <tt class="docutils literal"><span class="pre">@follows</span></tt> directive
+is unnecessary.</p>
+<p>The use of <a class="reference internal" href="../../decorators/transform_ex.html#decorators-transform-suffix-string"><em>suffix</em></a> within the decorator tells
+<em>Ruffus</em> to take all <em>input</em> files with the <tt class="docutils literal"><span class="pre">.chunks</span></tt> suffix and substitute a <tt class="docutils literal"><span class="pre">.sums</span></tt>
+suffix to generate the corresponding <em>output</em> file name.</p>
+<dl class="docutils">
+<dt>Thus if <tt class="docutils literal"><span class="pre">step_5_split_numbers_into_chunks</span></tt> created</dt>
+<dd><div class="first last highlight-python"><div class="highlight"><pre><span class="s">"1.chunks"</span>
+<span class="s">"2.chunks"</span>
+<span class="s">"3.chunks"</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+<p>This would result in the following function calls:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">step_6_calculate_sum_of_squares</span> <span class="p">(</span><span class="s">"1.chunk"</span><span class="p">,</span> <span class="s">"1.sums"</span><span class="p">)</span>
+<span class="n">step_6_calculate_sum_of_squares</span> <span class="p">(</span><span class="s">"2.chunk"</span><span class="p">,</span> <span class="s">"2.sums"</span><span class="p">)</span>
+<span class="n">step_6_calculate_sum_of_squares</span> <span class="p">(</span><span class="s">"3.chunk"</span><span class="p">,</span> <span class="s">"3.sums"</span><span class="p">)</span>
+
+<span class="c"># etc...</span>
+</pre></div>
+</div>
+</div></blockquote>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">It is possible to generate <em>output</em> filenames using more powerful regular expressions
+as well. See the <a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform</em></a> syntax documentation for more details.</p>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 6: Running jobs in parallel</a><ul>
+<li><a class="reference internal" href="#calculating-sums-and-sum-of-squares-in-parallel">Calculating sums and sum of squares in parallel</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step5_split.html"
+ title="previous chapter">Step 5: Splitting up large tasks / files</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step7_merge.html"
+ title="next chapter">Step 7: Merging results back together</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step6_transform.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step7_merge.html" title="Step 7: Merging results back together"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step5_split.html" title="Step 5: Splitting up large tasks / files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step6_transform_code.html b/doc/_build/html/tutorials/simple_tutorial/step6_transform_code.html
new file mode 100644
index 0000000..ca4b7f1
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step6_transform_code.html
@@ -0,0 +1,297 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 6: Running jobs in parallel — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 7: Merging results back together" href="step7_merge_code.html" />
+ <link rel="prev" title="Code for Step 5: Splitting up large tasks / files" href="step5_split_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step7_merge_code.html" title="Code for Step 7: Merging results back together"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step5_split_code.html" title="Code for Step 5: Splitting up large tasks / files"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-step-6-running-jobs-in-parallel">
+<span id="simple-tutorial-6th-step-code"></span><h1>Code for Step 6: Running jobs in parallel<a class="headerlink" href="#code-for-step-6-running-jobs-in-parallel" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/transform.html#decorators-transform"><em>@transform in detail</em></a></li>
+<li><a class="reference internal" href="step6_transform.html#simple-tutorial-6th-step"><em>back to step 6</em></a></li>
+</ul>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">NUMBER_OF_RANDOMS</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="kn">import</span> <span class="nn">random</span>
+<span class="kn">import</span> <span class="nn">glob</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Create random numbers</span>
+<span class="c">#</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"random_numbers.list"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_random_numbers</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%g</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Split initial file</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">)</span>
+<span class="nd">@split</span><span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_5_split_numbers_into_chunks</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits random numbers file into XXX files of CHUNK_SIZE each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># clean up files from previous runs</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="s">"*.chunks"</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c">#</span>
+ <span class="c"># create new file every CHUNK_SIZE lines and</span>
+ <span class="c"># copy each line into current file</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">cnt_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="n">CHUNK_SIZE</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.chunks"</span> <span class="o">%</span> <span class="n">cnt_files</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk file</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">step_5_split_numbers_into_chunks</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_6_calculate_sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">]</span>
+ <span class="n">cnt_values</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">):</span>
+ <span class="n">cnt_values</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">())</span>
+ <span class="n">sum_squared</span> <span class="o">+=</span> <span class="n">val</span> <span class="o">*</span> <span class="n">val</span>
+ <span class="nb">sum</span> <span class="o">+=</span> <span class="n">val</span>
+ <span class="n">output</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="si">%s</span><span class="se">\n</span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">sum_squared</span><span class="p">),</span> <span c [...]
+
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">step_6_calculate_sum_of_squares</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="gp">>>> </span><span class="n">pipeline_run</span><span class="p">([</span><span class="n">step_6_calculate_sum_of_squares</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+<span class="go"> Job = [None -> random_numbers.list] unnecessary: already up to date</span>
+<span class="go">Completed Task = create_random_numbers</span>
+<span class="go"> Job = [random_numbers.list -> *.chunks] unnecessary: already up to date</span>
+<span class="go">Completed Task = step_5_split_numbers_into_chunks</span>
+<span class="go"> Job = [6.chunks -> 6.sums] completed</span>
+<span class="go"> Job = [1.chunks -> 1.sums] completed</span>
+<span class="go"> Job = [4.chunks -> 4.sums] completed</span>
+<span class="go"> Job = [7.chunks -> 7.sums] completed</span>
+<span class="go"> Job = [2.chunks -> 2.sums] completed</span>
+<span class="go"> Job = [9.chunks -> 9.sums] completed</span>
+<span class="go"> Job = [10.chunks -> 10.sums] completed</span>
+<span class="go"> Job = [3.chunks -> 3.sums] completed</span>
+<span class="go"> Job = [5.chunks -> 5.sums] completed</span>
+<span class="go"> Job = [8.chunks -> 8.sums] completed</span>
+<span class="go">Completed Task = step_6_calculate_sum_of_squares</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 6: Running jobs in parallel</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step5_split_code.html"
+ title="previous chapter">Code for Step 5: Splitting up large tasks / files</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step7_merge_code.html"
+ title="next chapter">Code for Step 7: Merging results back together</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step6_transform_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step7_merge_code.html" title="Code for Step 7: Merging results back together"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step5_split_code.html" title="Code for Step 5: Splitting up large tasks / files"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step7_merge.html b/doc/_build/html/tutorials/simple_tutorial/step7_merge.html
new file mode 100644
index 0000000..32fe308
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step7_merge.html
@@ -0,0 +1,244 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 7: Merging results back together — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Step 8: Signal the completion of each stage of our pipeline" href="step8_posttask.html" />
+ <link rel="prev" title="Step 6: Running jobs in parallel" href="step6_transform.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step8_posttask.html" title="Step 8: Signal the completion of each stage of our pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step6_transform.html" title="Step 6: Running jobs in parallel"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <span class="target" id="simple-tutorial-7th-step"></span><div class="section" id="step-7-merging-results-back-together">
+<span id="index-0"></span><h1>Step 7: Merging results back together<a class="headerlink" href="#step-7-merging-results-back-together" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge in detail</em></a></li>
+</ul>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step7_merge_code.html#simple-tutorial-7th-step-code"><em>Python Code for step 7</em></a></li>
+</ul>
+</div>
+<p>Now that we have all the partial solutions in <tt class="docutils literal"><span class="pre">*.sums</span></tt>, we can merge them
+together to generate the final answer: the variance of all 100,000 random
+numbers.</p>
+<div class="section" id="calculating-variances-from-the-sums-and-sum-of-squares-of-all-chunks">
+<h2>Calculating variances from the sums and sum of squares of all chunks<a class="headerlink" href="#calculating-variances-from-the-sums-and-sum-of-squares-of-all-chunks" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>If we add up all the sums, and sum of squares we calculated previously, we can
+obtain the variance as follows:</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">sum_squared</span> <span class="o">-</span> <span class="nb">sum</span> <span class="o">*</span> <span class="nb">sum</span> <span class="o">/</span> <span class="n">N</span><span class="p">)</span><span class="o">/</span><span class="n">N</span>
+</pre></div>
+</div>
+<p>where <tt class="docutils literal"><span class="pre">N</span></tt> is the number of values</p>
+<p>See the <a class="reference external" href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">wikipedia</a> entry for a discussion of
+why this is a very naive approach!</p>
+<p>To do this, all we have to do is merge together all the values in <tt class="docutils literal"><span class="pre">*.sums</span></tt>, i.e.
+add up the <tt class="docutils literal"><span class="pre">sums</span></tt> and <tt class="docutils literal"><span class="pre">sum_squared</span></tt> for each chunk. We can then apply the above (naive) formula.</p>
+<p>Merging files is straightforward in <strong>Ruffus</strong>:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_merge1.png" src="../../_images/simple_tutorial_merge1.png" />
+</div></blockquote>
+<p>The <a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge</em></a> decorator tells <em>Ruffus</em> to take all the files from the step 6 task (i.e. <tt class="docutils literal"><span class="pre">*.sums</span></tt>),
+and produced a merged file in the form of <tt class="docutils literal"><span class="pre">"variance.result"</span></tt>.</p>
+<dl class="docutils">
+<dt>Thus if <tt class="docutils literal"><span class="pre">step_6_calculate_sum_of_squares</span></tt> created</dt>
+<dd><div class="first last line-block">
+<div class="line"><tt class="docutils literal"><span class="pre">1.sums</span></tt> and</div>
+<div class="line"><tt class="docutils literal"><span class="pre">2.sums</span></tt> etc.</div>
+</div>
+</dd>
+</dl>
+<p>This would result in the following function call:</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_merge2.png" src="../../_images/simple_tutorial_merge2.png" />
+</div></blockquote>
+<p>The final result is, of course, in <tt class="docutils literal"><span class="pre">"variance.result"</span></tt>.</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 7: Merging results back together</a><ul>
+<li><a class="reference internal" href="#calculating-variances-from-the-sums-and-sum-of-squares-of-all-chunks">Calculating variances from the sums and sum of squares of all chunks</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step6_transform.html"
+ title="previous chapter">Step 6: Running jobs in parallel</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step8_posttask.html"
+ title="next chapter">Step 8: Signal the completion of each stage of our pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step7_merge.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step8_posttask.html" title="Step 8: Signal the completion of each stage of our pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step6_transform.html" title="Step 6: Running jobs in parallel"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step7_merge_code.html b/doc/_build/html/tutorials/simple_tutorial/step7_merge_code.html
new file mode 100644
index 0000000..1be0018
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step7_merge_code.html
@@ -0,0 +1,332 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 7: Merging results back together — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Step 8: Signal the completion of each stage of our pipeline" href="step8_posttask_code.html" />
+ <link rel="prev" title="Code for Step 6: Running jobs in parallel" href="step6_transform_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step8_posttask_code.html" title="Code for Step 8: Signal the completion of each stage of our pipeline"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step6_transform_code.html" title="Code for Step 6: Running jobs in parallel"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-step-7-merging-results-back-together">
+<span id="simple-tutorial-7th-step-code"></span><h1>Code for Step 7: Merging results back together<a class="headerlink" href="#code-for-step-7-merging-results-back-together" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/merge.html#decorators-merge"><em>@merge in detail</em></a></li>
+<li><a class="reference internal" href="step7_merge.html#simple-tutorial-7th-step"><em>back to step 7</em></a></li>
+</ul>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">NUMBER_OF_RANDOMS</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
+
+
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="kn">import</span> <span class="nn">random</span>
+<span class="kn">import</span> <span class="nn">glob</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Create random numbers</span>
+<span class="c">#</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="s">"random_numbers.list"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_random_numbers</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%g</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Split initial file</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">)</span>
+<span class="nd">@split</span><span class="p">(</span><span class="s">"random_numbers.list"</span><span class="p">,</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_5_split_numbers_into_chunks</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits random numbers file into XXX files of CHUNK_SIZE each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># clean up files from previous runs</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="s">"*.chunks"</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># create new file every CHUNK_SIZE lines and</span>
+ <span class="c"># copy each line into current file</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">cnt_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="n">CHUNK_SIZE</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s">"</span><span class="si">%d</span><span class="s">.chunks"</span> <span class="o">%</span> <span class="n">cnt_files</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk file</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">step_5_split_numbers_into_chunks</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_6_calculate_sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">]</span>
+ <span class="n">cnt_values</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">):</span>
+ <span class="n">cnt_values</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">())</span>
+ <span class="n">sum_squared</span> <span class="o">+=</span> <span class="n">val</span> <span class="o">*</span> <span class="n">val</span>
+ <span class="nb">sum</span> <span class="o">+=</span> <span class="n">val</span>
+ <span class="n">output</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="si">%s</span><span class="se">\n</span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">sum_squared</span><span class="p">),</span> <span c [...]
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk</span>
+<span class="c">#</span>
+<span class="nd">@merge</span><span class="p">(</span><span class="n">step_6_calculate_sum_of_squares</span><span class="p">,</span> <span class="s">"variance.result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_7_calculate_variance</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Calculate variance naively</span>
+<span class="sd"> """</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># initialise variables</span>
+ <span class="c">#</span>
+ <span class="n">all_sum_squared</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_sum</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_cnt_values</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="c">#</span>
+ <span class="c"># added up all the sum_squared, and sum and cnt_values from all the chunks</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">input_file_name</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span><span class="p">,</span> <span class="n">cnt_values</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">())</span>
+ <span class="n">all_sum_squared</span> <span class="o">+=</span> <span class="n">sum_squared</span>
+ <span class="n">all_sum</span> <span class="o">+=</span> <span class="nb">sum</span>
+ <span class="n">all_cnt_values</span> <span class="o">+=</span> <span class="n">cnt_values</span>
+ <span class="n">all_mean</span> <span class="o">=</span> <span class="n">all_sum</span> <span class="o">/</span> <span class="n">all_cnt_values</span>
+ <span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">all_sum_squared</span> <span class="o">-</span> <span class="n">all_sum</span> <span class="o">*</span> <span class="n">all_mean</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">all_cnt_values</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># print output</span>
+ <span class="c">#</span>
+ <span class="k">print</span> <span class="o">>></span><span class="n">output</span><span class="p">,</span> <span class="n">variance</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">step_7_calculate_variance</span><span class="p">],</span> <span class="p">[</span><span class="n">create_random_numbers</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><pre>pipeline_run([step_7_calculate_variance], [create_random_numbers], verbose = 1)
+ Job = [None -> random_numbers.list] completed
+Completed Task = create_random_numbers
+ Job = [random_numbers.list -> *.chunks] completed
+Completed Task = step_5_split_numbers_into_chunks
+ Job = [6.chunks -> 6.sums] completed
+ Job = [1.chunks -> 1.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+Completed Task = step_6_calculate_sum_of_squares
+ Job = [[6.sums, 5.sums, 1.sums, 4.sums, 3.sums, 2.sums, 8.sums, 7.sums, 10.sums, 9.sums] -> variance.result] completed
+Completed Task = step_7_calculate_variance</pre>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 7: Merging results back together</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step6_transform_code.html"
+ title="previous chapter">Code for Step 6: Running jobs in parallel</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="step8_posttask_code.html"
+ title="next chapter">Code for Step 8: Signal the completion of each stage of our pipeline</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step7_merge_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step8_posttask_code.html" title="Code for Step 8: Signal the completion of each stage of our pipeline"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step6_transform_code.html" title="Code for Step 6: Running jobs in parallel"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step8_posttask.html b/doc/_build/html/tutorials/simple_tutorial/step8_posttask.html
new file mode 100644
index 0000000..d7cc75c
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step8_posttask.html
@@ -0,0 +1,292 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Step 8: Signal the completion of each stage of our pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="next" title="Code for Chapter 9: Checking dependencies to run tasks in order" href="../manual/dependencies_code.html" />
+ <link rel="prev" title="Step 7: Merging results back together" href="step7_merge.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="../manual/dependencies_code.html" title="Code for Chapter 9: Checking dependencies to run tasks in order"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="step7_merge.html" title="Step 7: Merging results back together"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="step-8-signal-the-completion-of-each-stage-of-our-pipeline">
+<span id="simple-tutorial-8th-step"></span><span id="index-0"></span><h1>Step 8: Signal the completion of each stage of our pipeline<a class="headerlink" href="#step-8-signal-the-completion-of-each-stage-of-our-pipeline" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> in detail</li>
+</ul>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p>Remember to look at the example code:</p>
+<ul class="last simple">
+<li><a class="reference internal" href="step8_posttask_code.html#simple-tutorial-8th-step-code"><em>Python Code for step 8</em></a></li>
+</ul>
+</div>
+<p>Let us finish by celebrating the success of our modest pipeline example.</p>
+<div class="section" id="running-some-code-to-show-that-a-stage-of-the-pipeline-has-finished">
+<h2>Running some code to show that a stage of the pipeline has finished<a class="headerlink" href="#running-some-code-to-show-that-a-stage-of-the-pipeline-has-finished" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>A common requirement is to take some extra action when a particular
+<a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> or stage of a pipeline is complete.</p>
+<p>This can range from printing out some message, or <tt class="docutils literal"><span class="pre">touching</span></tt> some sentinel file,
+to emailing the author.</p>
+<p>This is particular useful if the <a class="reference internal" href="../../glossary.html#term-task"><em class="xref std std-term">task</em></a> is a recipe apply to an unspecified number
+of parameters in parallel in different <a class="reference internal" href="../../glossary.html#term-job"><em class="xref std std-term">job</em></a>s</p>
+<p>The “extra action” can be added to a <em>Ruffus</em> pipeline using the <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a>
+decorator.</p>
+<p>Let us print a “hooray” message to show that we have finished calculating variances.</p>
+<blockquote>
+<div><img alt="../../_images/simple_tutorial_posttask.png" src="../../_images/simple_tutorial_posttask.png" />
+</div></blockquote>
+<p>This is such a short function, we can even write it in-line:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"hooray</span><span class="se">\n</span><span class="s">"</span><span class="p">))</span>
+<span class="nd">@merge</span><span class="p">(</span><span class="n">step_6_calculate_sum_of_squares</span><span class="p">,</span> <span class="s">"variance.result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_7_calculate_variance</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="touching-a-sentinel-file-after-finishing-a-pipeline-stage">
+<span id="index-1"></span><h2><em>Touching</em> a sentinel file after finishing a pipeline stage<a class="headerlink" href="#touching-a-sentinel-file-after-finishing-a-pipeline-stage" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="line-block">
+<div class="line">Very often we would like to mark the competion of a pipeline stage by using the
+date/time stamp of a “sentinel” file.</div>
+<div class="line">This is such a common requirement that <em>Ruffus</em> even has special syntax for this
+in the form of <a class="reference internal" href="../../decorators/indicator_objects.html#decorators-touch-file"><em>touch_file</em></a>.</div>
+</div>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="n">touch_file</span><span class="p">(</span><span class="s">"sentinel_flag"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">your_pipeline_function</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The file <tt class="docutils literal"><span class="pre">sentinel_flag</span></tt> will be created (if it did not exist) or its
+date/time stamp changed to the current time whenever this stage of the pipeline is
+completed.</p>
+</div></blockquote>
+</div>
+<div class="section" id="adding-several-post-task-actions">
+<h2>Adding several post task actions<a class="headerlink" href="#adding-several-post-task-actions" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>You can, of course, add more than one different action to be taken on completion of the
+task, either by stacking up <a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask</em></a> decorators or by including
+several functions in the same <strong>@posttask</strong>:</p>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="nd">@posttask</span><span class="p">(</span><span class="n">print_hooray</span><span class="p">,</span> <span class="n">print_whopee</span><span class="p">)</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">touch_file</span><span class="p">(</span><span class="s">"sentinel_flag"</span><span class="p">))</span>
+<span class="k">def</span> <span class="nf">your_pipeline_function</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="s">""</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+</div>
+<div class="section" id="finding-out-more-about-ruffus">
+<h2>Finding out more about <strong>Ruffus</strong><a class="headerlink" href="#finding-out-more-about-ruffus" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><p>This wraps up our short tutorial on the <strong>Ruffus</strong>.</p>
+<p>Here are a few useful topics you may be interested in:</p>
+<blockquote>
+<div><ul class="simple">
+<li><a class="reference internal" href="../manual/collate.html#manual-collate"><em>How to summarise disparate input by category</em></a></li>
+<li><a class="reference internal" href="../manual/logging.html#manual-logging"><em>How to log pipeline progress</em></a></li>
+<li><a class="reference internal" href="../manual/exceptions.html#manual-exceptions"><em>How exceptions are handled</em></a></li>
+</ul>
+</div></blockquote>
+<p>To find out more about <strong>Ruffus</strong>, you can read the <a class="reference internal" href="../manual/manual_introduction.html#manual-introduction"><em>manual</em></a>
+or just start using <strong>Ruffus</strong>.</p>
+<p>Email the authors at ruffus_lib at llew.org.uk if you have any comments or suggestions.</p>
+<p>Happy pipelining!</p>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Step 8: Signal the completion of each stage of our pipeline</a><ul>
+<li><a class="reference internal" href="#running-some-code-to-show-that-a-stage-of-the-pipeline-has-finished">Running some code to show that a stage of the pipeline has finished</a></li>
+<li><a class="reference internal" href="#touching-a-sentinel-file-after-finishing-a-pipeline-stage"><em>Touching</em> a sentinel file after finishing a pipeline stage</a></li>
+<li><a class="reference internal" href="#adding-several-post-task-actions">Adding several post task actions</a></li>
+<li><a class="reference internal" href="#finding-out-more-about-ruffus">Finding out more about <strong>Ruffus</strong></a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step7_merge.html"
+ title="previous chapter">Step 7: Merging results back together</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="../manual/dependencies_code.html"
+ title="next chapter">Code for Chapter 9: Checking dependencies to run tasks in order</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step8_posttask.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="../manual/dependencies_code.html" title="Code for Chapter 9: Checking dependencies to run tasks in order"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="step7_merge.html" title="Step 7: Merging results back together"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/tutorials/simple_tutorial/step8_posttask_code.html b/doc/_build/html/tutorials/simple_tutorial/step8_posttask_code.html
new file mode 100644
index 0000000..88b30df
--- /dev/null
+++ b/doc/_build/html/tutorials/simple_tutorial/step8_posttask_code.html
@@ -0,0 +1,343 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Code for Step 8: Signal the completion of each stage of our pipeline — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="../../_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="../../index.html" />
+ <link rel="prev" title="Code for Step 7: Merging results back together" href="step7_merge_code.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="step7_merge_code.html" title="Code for Step 7: Merging results back together"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="code-for-step-8-signal-the-completion-of-each-stage-of-our-pipeline">
+<span id="simple-tutorial-8th-step-code"></span><h1>Code for Step 8: Signal the completion of each stage of our pipeline<a class="headerlink" href="#code-for-step-8-signal-the-completion-of-each-stage-of-our-pipeline" title="Permalink to this headline">¶</a></h1>
+<ul class="simple">
+<li><a class="reference internal" href="simple_tutorial.html#simple-tutorial"><em>Simple tutorial overview</em></a></li>
+<li><a class="reference internal" href="../../decorators/posttask.html#decorators-posttask"><em>@posttask in detail</em></a></li>
+<li><a class="reference internal" href="step8_posttask.html#simple-tutorial-8th-step"><em>back to step 8</em></a></li>
+</ul>
+<div class="section" id="code">
+<h2>Code<a class="headerlink" href="#code" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><div class="highlight"><pre><span class="n">NUMBER_OF_RANDOMS</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
+<span class="n">working_dir</span> <span class="o">=</span> <span class="s">"temp_tutorial8/"</span>
+
+
+
+<span class="kn">import</span> <span class="nn">time</span><span class="o">,</span> <span class="nn">sys</span><span class="o">,</span> <span class="nn">os</span>
+<span class="kn">from</span> <span class="nn">ruffus</span> <span class="kn">import</span> <span class="o">*</span>
+
+<span class="kn">import</span> <span class="nn">random</span>
+<span class="kn">import</span> <span class="nn">glob</span>
+
+
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Create random numbers</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">mkdir</span><span class="p">(</span><span class="n">working_dir</span><span class="p">))</span>
+<span class="nd">@files</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="n">working_dir</span> <span class="o">+</span> <span class="s">"random_numbers.list"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">create_random_numbers</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUMBER_OF_RANDOMS</span><span class="p">):</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%g</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">))</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Split initial file</span>
+<span class="c">#</span>
+<span class="nd">@follows</span><span class="p">(</span><span class="n">create_random_numbers</span><span class="p">)</span>
+<span class="nd">@split</span><span class="p">(</span><span class="n">working_dir</span> <span class="o">+</span> <span class="s">"random_numbers.list"</span><span class="p">,</span> <span class="n">working_dir</span> <span class="o">+</span> <span class="s">"*.chunks"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_5_split_numbers_into_chunks</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_files</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Splits random numbers file into XXX files of CHUNK_SIZE each</span>
+<span class="sd"> """</span>
+ <span class="c">#</span>
+ <span class="c"># clean up files from previous runs</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="s">"*.chunks"</span><span class="p">):</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">unlink</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># create new file every CHUNK_SIZE lines and</span>
+ <span class="c"># copy each line into current file</span>
+ <span class="c">#</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="n">cnt_files</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="n">CHUNK_SIZE</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">cnt_files</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">output_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">working_dir</span> <span class="o">+</span> <span class="s">"</span><span class="si">%d</span><span class="s">.chunks"</span> <span class="o">%</span> <span class="n">cnt_files</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk file</span>
+<span class="c">#</span>
+<span class="nd">@transform</span><span class="p">(</span><span class="n">step_5_split_numbers_into_chunks</span><span class="p">,</span> <span class="n">suffix</span><span class="p">(</span><span class="s">".chunks"</span><span class="p">),</span> <span class="s">".sums"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_6_calculate_sum_of_squares</span> <span class="p">(</span><span class="n">input_file_name</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">]</span>
+ <span class="n">cnt_values</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">):</span>
+ <span class="n">cnt_values</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">())</span>
+ <span class="n">sum_squared</span> <span class="o">+=</span> <span class="n">val</span> <span class="o">*</span> <span class="n">val</span>
+ <span class="nb">sum</span> <span class="o">+=</span> <span class="n">val</span>
+ <span class="n">output</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"</span><span class="si">%s</span><span class="se">\n</span><span class="si">%s</span><span class="se">\n</span><span class="si">%d</span><span class="se">\n</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">sum_squared</span><span class="p">),</span> <span c [...]
+
+
+<span class="k">def</span> <span class="nf">print_hooray_again</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"hooray again"</span>
+
+<span class="k">def</span> <span class="nf">print_whoppee_again</span><span class="p">():</span>
+ <span class="k">print</span> <span class="s">"whoppee again"</span>
+
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Calculate sum and sum of squares for each chunk</span>
+<span class="c">#</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">"hooray</span><span class="se">\n</span><span class="s">"</span><span class="p">))</span>
+<span class="nd">@posttask</span><span class="p">(</span><span class="n">print_hooray_again</span><span class="p">,</span> <span class="n">print_whoppee_again</span><span class="p">,</span> <span class="n">touch_file</span><span class="p">(</span><span class="s">"done"</span><span class="p">))</span>
+<span class="nd">@merge</span><span class="p">(</span><span class="n">step_6_calculate_sum_of_squares</span><span class="p">,</span> <span class="s">"variance.result"</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">step_7_calculate_variance</span> <span class="p">(</span><span class="n">input_file_names</span><span class="p">,</span> <span class="n">output_file_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Calculate variance naively</span>
+<span class="sd"> """</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># initialise variables</span>
+ <span class="c">#</span>
+ <span class="n">all_sum_squared</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_sum</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="n">all_cnt_values</span> <span class="o">=</span> <span class="mf">0.0</span>
+ <span class="c">#</span>
+ <span class="c"># added up all the sum_squared, and sum and cnt_values from all the chunks</span>
+ <span class="c">#</span>
+ <span class="k">for</span> <span class="n">input_file_name</span> <span class="ow">in</span> <span class="n">input_file_names</span><span class="p">:</span>
+ <span class="n">sum_squared</span><span class="p">,</span> <span class="nb">sum</span><span class="p">,</span> <span class="n">cnt_values</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_file_name</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">())</span>
+ <span class="n">all_sum_squared</span> <span class="o">+=</span> <span class="n">sum_squared</span>
+ <span class="n">all_sum</span> <span class="o">+=</span> <span class="nb">sum</span>
+ <span class="n">all_cnt_values</span> <span class="o">+=</span> <span class="n">cnt_values</span>
+ <span class="n">all_mean</span> <span class="o">=</span> <span class="n">all_sum</span> <span class="o">/</span> <span class="n">all_cnt_values</span>
+ <span class="n">variance</span> <span class="o">=</span> <span class="p">(</span><span class="n">all_sum_squared</span> <span class="o">-</span> <span class="n">all_sum</span> <span class="o">*</span> <span class="n">all_mean</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">all_cnt_values</span><span class="p">)</span>
+ <span class="c">#</span>
+ <span class="c"># print output</span>
+ <span class="c">#</span>
+ <span class="k">print</span> <span class="o">>></span><span class="n">output</span><span class="p">,</span> <span class="n">variance</span>
+
+<span class="c">#---------------------------------------------------------------</span>
+<span class="c">#</span>
+<span class="c"># Run</span>
+<span class="c">#</span>
+<span class="n">pipeline_run</span><span class="p">([</span><span class="n">step_7_calculate_variance</span><span class="p">],</span> <span class="n">verbose</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div>
+<div class="section" id="resulting-output">
+<h2>Resulting Output<a class="headerlink" href="#resulting-output" title="Permalink to this headline">¶</a></h2>
+<blockquote>
+<div><div class="highlight-python"><pre>>> pipeline_run([step_7_calculate_variance], verbose = 1)
+ Make directories [temp_tutorial8/] completed
+Completed Task = create_random_numbers_mkdir_1
+ Job = [None -> temp_tutorial8/random_numbers.list] completed
+Completed Task = create_random_numbers
+ Job = [temp_tutorial8/random_numbers.list -> temp_tutorial8/*.chunks] completed
+Completed Task = step_5_split_numbers_into_chunks
+ Job = [temp_tutorial8/1.chunks -> temp_tutorial8/1.sums] completed
+ Job = [temp_tutorial8/10.chunks -> temp_tutorial8/10.sums] completed
+ Job = [temp_tutorial8/2.chunks -> temp_tutorial8/2.sums] completed
+ Job = [temp_tutorial8/3.chunks -> temp_tutorial8/3.sums] completed
+ Job = [temp_tutorial8/4.chunks -> temp_tutorial8/4.sums] completed
+ Job = [temp_tutorial8/5.chunks -> temp_tutorial8/5.sums] completed
+ Job = [temp_tutorial8/6.chunks -> temp_tutorial8/6.sums] completed
+ Job = [temp_tutorial8/7.chunks -> temp_tutorial8/7.sums] completed
+ Job = [temp_tutorial8/8.chunks -> temp_tutorial8/8.sums] completed
+ Job = [temp_tutorial8/9.chunks -> temp_tutorial8/9.sums] completed
+Completed Task = step_6_calculate_sum_of_squares
+ Job = [[temp_tutorial8/1.sums, temp_tutorial8/10.sums, temp_tutorial8/2.sums, temp_tutorial8/3.sums, temp_tutorial8/4.sums, temp_tutorial8/5.sums, temp_tutorial8/6.sums, temp_tutorial8/7.sums, temp_tutorial8/8.sums, temp_tutorial8/9.sums] -> variance.result] completed
+hooray again
+whoppee again
+hooray
+Completed Task = step_7_calculate_variance</pre>
+</div>
+</div></blockquote>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h3><a href="../../contents.html">Table Of Contents</a></h3>
+ <ul>
+<li><a class="reference internal" href="#">Code for Step 8: Signal the completion of each stage of our pipeline</a><ul>
+<li><a class="reference internal" href="#code">Code</a></li>
+<li><a class="reference internal" href="#resulting-output">Resulting Output</a></li>
+</ul>
+</li>
+</ul>
+
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="step7_merge_code.html"
+ title="previous chapter">Code for Step 7: Merging results back together</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="../../_sources/tutorials/simple_tutorial/step8_posttask_code.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="../../decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="../../decorators/originate.html">@originate</a> </li>
+ <li><a href="../../decorators/split.html">@split</a> </li>
+ <li><a href="../../decorators/transform.html">@transform</a> </li>
+ <li><a href="../../decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="../../decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="../../decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="../../decorators/collate.html">@collate</a> </li>
+ <li><a href="../../decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="../../decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="../../decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="../../decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="../../decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="../../decorators/active_if.html">@active_if</a> </li>
+ <li><a href="../../decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="../../decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="../../decorators/product.html">@product </a> </li>
+ <li><a href="../../decorators/permutations.html">@permutations </a> </li>
+ <li><a href="../../decorators/combinations.html">@combinations </a> </li>
+ <li><a href="../../decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="../../decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="../../decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="../../decorators/parallel.html">@parallel</a> </li>
+ <li><a href="../../decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="../../decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="../../pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="../../drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="../../todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="../../genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="step7_merge_code.html" title="Code for Step 7: Merging results back together"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="../../index.html">Home</a> | </li>
+ <li><a href="../../contents.html">Contents</a> | </li>
+ <li><a href="../../installation.html">Install</a> | </li>
+ <li><a href="../new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="../new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="../../faq.html">FAQ</a> | </li>
+ <li><a href="../../cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="../new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="../../gallery.html">Gallery</a> | </li>
+ <li><a href="../../history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/html/why_ruffus.html b/doc/_build/html/why_ruffus.html
new file mode 100644
index 0000000..5ff7beb
--- /dev/null
+++ b/doc/_build/html/why_ruffus.html
@@ -0,0 +1,221 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Why Ruffus? — ruffus 2.5 documentation</title>
+
+ <link rel="stylesheet" href="_static/ruffus.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: './',
+ VERSION: '2.5',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
+ <link rel="top" title="ruffus 2.5 documentation" href="index.html" />
+ <link rel="next" title="Construction of a simple pipeline to run BLAST jobs" href="examples/bioinformatics/index.html" />
+ <link rel="prev" title="Hall of Fame: User contributed flowcharts" href="gallery.html" />
+ </head>
+ <body>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="examples/bioinformatics/index.html" title="Construction of a simple pipeline to run BLAST jobs"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="gallery.html" title="Hall of Fame: User contributed flowcharts"
+ accesskey="P">previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body">
+
+ <div class="section" id="why-ruffus">
+<span id="design-why-ruffus"></span><span id="index-0"></span><h1>Why <em>Ruffus</em>?<a class="headerlink" href="#why-ruffus" title="Permalink to this headline">¶</a></h1>
+<p><strong>Cylindrophis ruffus</strong> is the name of the
+<a class="reference external" href="http://en.wikipedia.org/wiki/Cylindrophis_ruffus">red-tailed pipe snake</a> (bad python-y pun)
+which can be found in <a class="reference external" href="http://www.discoverhongkong.com/eng/index.html">Hong Kong</a> where the original author comes from.</p>
+<p><em>Ruffus</em> is a shy creature, and pretends to be a cobra or a <a class="reference external" href="http://en.wikipedia.org/wiki/File:Bandedkrait.jpg">banded krait</a> by putting up its red tail and ducking its
+head in its coils when startled.</p>
+<table border="1" class="docutils">
+<colgroup>
+<col width="28%" />
+<col width="72%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><img alt="_images/wikimedia_cyl_ruffus.jpg" class="first last" src="_images/wikimedia_cyl_ruffus.jpg" />
+</td>
+<td><a class="first last reference internal image-reference" href="_images/wikimedia_bandedkrait.jpg"><img alt="_images/wikimedia_bandedkrait.jpg" src="_images/wikimedia_bandedkrait.jpg" style="width: 462.0px; height: 346.5px;" /></a>
+</td>
+</tr>
+<tr class="row-even"><td><ul class="first last simple">
+<li>Not venomous</li>
+<li><a class="reference external" href="http://en.wikipedia.org/wiki/Mostly_Harmless">Mostly Harmless</a></li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>Deadly poisonous</li>
+<li><a class="reference external" href="http://en.wikipedia.org/wiki/List_of_races_and_species_in_The_Hitchhiker's_Guide_to_the_Galaxy#Ravenous_Bugblatter_Beast_of_Traal">Seriously unfriendly</a></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+<p>Be careful not to step on one when running down country park lanes at full speed
+in Hong Kong: this snake is a <a class="reference external" href="http://www.hkras.org/eng/info/hkspp.htm">rare breed</a>!</p>
+<p><em>Ruffus</em> does most of its work at night and sleeps during the day: typical of many (but alas not all) python programmers!</p>
+<p>The original <a class="reference external" href="http://upload.wikimedia.org/wikipedia/commons/a/a1/Cyl_ruffus_061212_2025_tdp.jpg">red-tail pipe</a> and <a class="reference external" href="http://en.wikipedia.org/wiki/File:AB_054_Banded_Krait.JPG">banded krait</a> images are from wikimedia.</p>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar">
+ <div class="sphinxsidebarwrapper">
+ <h4>Previous topic</h4>
+ <p class="topless"><a href="gallery.html"
+ title="previous chapter">Hall of Fame: User contributed flowcharts</a></p>
+ <h4>Next topic</h4>
+ <p class="topless"><a href="examples/bioinformatics/index.html"
+ title="next chapter">Construction of a simple pipeline to run BLAST jobs</a></p>
+ <h3>This Page</h3>
+ <ul class="this-page-menu">
+ <li><a href="_sources/why_ruffus.txt"
+ rel="nofollow">Show Source</a></li>
+ </ul>
+
+<div id="searchbox" style="display: none">
+ <h3>Quick search</h3>
+ <form class="search" action="search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="decorators/decorators.html#core">Core:</a></h4>
+ <ul>
+ <li><a href="decorators/originate.html">@originate</a> </li>
+ <li><a href="decorators/split.html">@split</a> </li>
+ <li><a href="decorators/transform.html">@transform</a> </li>
+ <li><a href="decorators/merge.html">@merge</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="decorators/subdivide.html">@subdivide</a> </li>
+ <li><a href="decorators/transform_ex.html">@transform (add_inputs) </a> </li>
+ <li><a href="decorators/collate.html">@collate</a> </li>
+ <li><a href="decorators/collate_ex.html">@collate (add_inputs)</a> </li>
+ <li><a href="decorators/graphviz.html">@graphviz</a> </li>
+ <li><a href="decorators/mkdir.html">@mkdir</a> </li>
+ <li><a href="decorators/follows.html">@follows / mkdir</a> </li>
+ <li><a href="decorators/posttask.html">@posttask touch_file</a> </li>
+ <li><a href="decorators/active_if.html">@active_if</a> </li>
+ <li><a href="decorators/jobs_limit.html">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="decorators/decorators.html#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="decorators/product.html">@product </a> </li>
+ <li><a href="decorators/permutations.html">@permutations </a> </li>
+ <li><a href="decorators/combinations.html">@combinations </a> </li>
+ <li><a href="decorators/combinations_with_replacement.html">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="decorators/decorators.html#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="decorators/files_ex.html">@files (on the fly)</a> </li>
+ <li><a href="decorators/parallel.html">@parallel</a> </li>
+ <li><a href="decorators/check_if_uptodate.html">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="decorators/indicator_objects.html">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="pipeline_functions.html#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="drmaa_wrapper_functions.html#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="todo.html">Future plans</a>
+ </ul>
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="examples/bioinformatics/index.html" title="Construction of a simple pipeline to run BLAST jobs"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="gallery.html" title="Hall of Fame: User contributed flowcharts"
+ >previous</a> |</li>
+ Ruffus v. 2.5
+ <li><a href="index.html">Home</a> | </li>
+ <li><a href="contents.html">Contents</a> | </li>
+ <li><a href="installation.html">Install</a> | </li>
+ <li><a href="tutorials/new_tutorial/introduction.html">Manual</a> / </li>
+ <li><a href="tutorials/new_tutorial/manual_contents.html">(TOC)</a> | </li>
+ <li><a href="faq.html">FAQ</a> | </li>
+ <li><a href="cheatsheet.html">Cheat sheet</a> | </li>
+ <li><a href="tutorials/new_tutorial/command_line.html">Command Line</a> | </li>
+ <li><a href="gallery.html">Gallery</a> | </li>
+ <li><a href="history.html">Latest Changes</a> » </li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2009-2013 Leo Goodstadt.
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ </div>
+ </body>
+</html>
\ No newline at end of file
diff --git a/doc/_build/latex/Makefile b/doc/_build/latex/Makefile
new file mode 100644
index 0000000..6b87ad8
--- /dev/null
+++ b/doc/_build/latex/Makefile
@@ -0,0 +1,66 @@
+# Makefile for Sphinx LaTeX output
+
+ALLDOCS = $(basename $(wildcard *.tex))
+ALLPDF = $(addsuffix .pdf,$(ALLDOCS))
+ALLDVI = $(addsuffix .dvi,$(ALLDOCS))
+
+# Prefix for archive names
+ARCHIVEPRREFIX =
+# Additional LaTeX options
+LATEXOPTS =
+
+all: $(ALLPDF)
+all-pdf: $(ALLPDF)
+all-dvi: $(ALLDVI)
+all-ps: all-dvi
+ for f in *.dvi; do dvips $$f; done
+
+all-pdf-ja:
+ for f in *.pdf *.png *.gif *.jpg *.jpeg; do extractbb $$f; done
+ for f in *.tex; do platex -kanji=utf8 $(LATEXOPTS) $$f; done
+ for f in *.tex; do platex -kanji=utf8 $(LATEXOPTS) $$f; done
+ for f in *.tex; do platex -kanji=utf8 $(LATEXOPTS) $$f; done
+ -for f in *.idx; do mendex -U -f -d "`basename $$f .idx`.dic" -s python.ist $$f; done
+ for f in *.tex; do platex -kanji=utf8 $(LATEXOPTS) $$f; done
+ for f in *.tex; do platex -kanji=utf8 $(LATEXOPTS) $$f; done
+ for f in *.dvi; do dvipdfmx $$f; done
+
+zip: all-$(FMT)
+ mkdir $(ARCHIVEPREFIX)docs-$(FMT)
+ cp $(ALLPDF) $(ARCHIVEPREFIX)docs-$(FMT)
+ zip -q -r -9 $(ARCHIVEPREFIX)docs-$(FMT).zip $(ARCHIVEPREFIX)docs-$(FMT)
+ rm -r $(ARCHIVEPREFIX)docs-$(FMT)
+
+tar: all-$(FMT)
+ mkdir $(ARCHIVEPREFIX)docs-$(FMT)
+ cp $(ALLPDF) $(ARCHIVEPREFIX)docs-$(FMT)
+ tar cf $(ARCHIVEPREFIX)docs-$(FMT).tar $(ARCHIVEPREFIX)docs-$(FMT)
+ rm -r $(ARCHIVEPREFIX)docs-$(FMT)
+
+bz2: tar
+ bzip2 -9 -k $(ARCHIVEPREFIX)docs-$(FMT).tar
+
+# The number of LaTeX runs is quite conservative, but I don't expect it
+# to get run often, so the little extra time won't hurt.
+%.dvi: %.tex
+ latex $(LATEXOPTS) '$<'
+ latex $(LATEXOPTS) '$<'
+ latex $(LATEXOPTS) '$<'
+ -makeindex -s python.ist '$(basename $<).idx'
+ latex $(LATEXOPTS) '$<'
+ latex $(LATEXOPTS) '$<'
+
+%.pdf: %.tex
+ pdflatex $(LATEXOPTS) '$<'
+ pdflatex $(LATEXOPTS) '$<'
+ pdflatex $(LATEXOPTS) '$<'
+ -makeindex -s python.ist '$(basename $<).idx'
+ pdflatex $(LATEXOPTS) '$<'
+ pdflatex $(LATEXOPTS) '$<'
+
+clean:
+ rm -f *.dvi *.log *.ind *.aux *.toc *.syn *.idx *.out *.ilg *.pla
+
+.PHONY: all all-pdf all-dvi all-ps clean
+.PHONY: all-pdf-ja
+
diff --git a/doc/_build/latex/bestiary_combinatorics.png b/doc/_build/latex/bestiary_combinatorics.png
new file mode 100644
index 0000000..da876ce
Binary files /dev/null and b/doc/_build/latex/bestiary_combinatorics.png differ
diff --git a/doc/_build/latex/bestiary_decorators.png b/doc/_build/latex/bestiary_decorators.png
new file mode 100644
index 0000000..a4bb53d
Binary files /dev/null and b/doc/_build/latex/bestiary_decorators.png differ
diff --git a/doc/_build/latex/bestiary_transform.png b/doc/_build/latex/bestiary_transform.png
new file mode 100644
index 0000000..b184bfc
Binary files /dev/null and b/doc/_build/latex/bestiary_transform.png differ
diff --git a/doc/_build/latex/examples_bioinformatics_error.png b/doc/_build/latex/examples_bioinformatics_error.png
new file mode 100644
index 0000000..469905e
Binary files /dev/null and b/doc/_build/latex/examples_bioinformatics_error.png differ
diff --git a/doc/_build/latex/examples_bioinformatics_merge.jpg b/doc/_build/latex/examples_bioinformatics_merge.jpg
new file mode 100644
index 0000000..a83a17f
Binary files /dev/null and b/doc/_build/latex/examples_bioinformatics_merge.jpg differ
diff --git a/doc/_build/latex/examples_bioinformatics_pipeline.jpg b/doc/_build/latex/examples_bioinformatics_pipeline.jpg
new file mode 100644
index 0000000..fdd3839
Binary files /dev/null and b/doc/_build/latex/examples_bioinformatics_pipeline.jpg differ
diff --git a/doc/_build/latex/examples_bioinformatics_split.jpg b/doc/_build/latex/examples_bioinformatics_split.jpg
new file mode 100644
index 0000000..4a9c428
Binary files /dev/null and b/doc/_build/latex/examples_bioinformatics_split.jpg differ
diff --git a/doc/_build/latex/examples_bioinformatics_transform.jpg b/doc/_build/latex/examples_bioinformatics_transform.jpg
new file mode 100644
index 0000000..7a5aac5
Binary files /dev/null and b/doc/_build/latex/examples_bioinformatics_transform.jpg differ
diff --git a/doc/_build/latex/flowchart_colour_schemes.png b/doc/_build/latex/flowchart_colour_schemes.png
new file mode 100644
index 0000000..a576cf9
Binary files /dev/null and b/doc/_build/latex/flowchart_colour_schemes.png differ
diff --git a/doc/_build/latex/fncychap.sty b/doc/_build/latex/fncychap.sty
new file mode 100644
index 0000000..9a56c04
--- /dev/null
+++ b/doc/_build/latex/fncychap.sty
@@ -0,0 +1,683 @@
+%%% Copyright Ulf A. Lindgren
+%%%
+%%% Note Premission is granted to modify this file under
+%%% the condition that it is saved using another
+%%% file and package name.
+%%%
+%%% Revision 1.1 (1997)
+%%%
+%%% Jan. 8th Modified package name base date option
+%%% Jan. 22th Modified FmN and FmTi for error in book.cls
+%%% \MakeUppercase{#}->{\MakeUppercase#}
+%%% Apr. 6th Modified Lenny option to prevent undesired
+%%% skip of line.
+%%% Nov. 8th Fixed \@chapapp for AMS
+%%%
+%%% Revision 1.2 (1998)
+%%%
+%%% Feb. 11th Fixed appendix problem related to Bjarne
+%%% Aug. 11th Fixed problem related to 11pt and 12pt
+%%% suggested by Tomas Lundberg. THANKS!
+%%%
+%%% Revision 1.3 (2004)
+%%% Sep. 20th problem with frontmatter, mainmatter and
+%%% backmatter, pointed out by Lapo Mori
+%%%
+%%% Revision 1.31 (2004)
+%%% Sep. 21th problem with the Rejne definition streched text
+%%% caused ugly gaps in the vrule aligned with the title
+%%% text. Kindly pointed out to me by Hendri Adriaens
+%%%
+%%% Revision 1.32 (2005)
+%%% Jun. 23th compatibility problem with the KOMA class 'scrbook.cls'
+%%% a remedy is a redefinition of '\@schapter' in
+%%% line with that used in KOMA. The problem was pointed
+%%% out to me by Mikkel Holm Olsen
+%%%
+%%% Revision 1.33 (2005)
+%%% Aug. 9th misspelled ``TWELV'' corrected, the error was pointed
+%%% out to me by George Pearson
+%%%
+%%% Revision 1.34 (2007)
+%%% Added an alternative to Lenny provided by Peter
+%%% Osborne (2005-11-28)
+%%% Corrected front, main and back matter, based on input
+%%% from Bas van Gils (2006-04-24)
+%%% Jul. 30th Added Bjornstrup option provided by Jean-Marc
+%%% Francois (2007-01-05).
+%%% Reverted to \MakeUppercase{#} see rev 1.1, solved
+%%% problem with MakeUppercase and MakeLowercase pointed
+%%% out by Marco Feuerstein (2007-06-06)
+
+
+%%% Last modified Jul. 2007
+
+\NeedsTeXFormat{LaTeX2e}[1995/12/01]
+\ProvidesPackage{fncychap}
+ [2007/07/30 v1.34
+ LaTeX package (Revised chapters)]
+
+%%%% For conditional inclusion of color
+\newif\ifusecolor
+\usecolorfalse
+
+
+
+%%%% DEFINITION OF Chapapp variables
+\newcommand{\CNV}{\huge\bfseries}
+\newcommand{\ChNameVar}[1]{\renewcommand{\CNV}{#1}}
+
+
+%%%% DEFINITION OF TheChapter variables
+\newcommand{\CNoV}{\huge\bfseries}
+\newcommand{\ChNumVar}[1]{\renewcommand{\CNoV}{#1}}
+
+\newif\ifUCN
+\UCNfalse
+\newif\ifLCN
+\LCNfalse
+\def\ChNameLowerCase{\LCNtrue\UCNfalse}
+\def\ChNameUpperCase{\UCNtrue\LCNfalse}
+\def\ChNameAsIs{\UCNfalse\LCNfalse}
+
+%%%%% Fix for AMSBook 971008
+
+\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}
+
+
+%%%%% Fix for Bjarne and appendix 980211
+
+\newif\ifinapp
+\inappfalse
+\renewcommand\appendix{\par
+ \setcounter{chapter}{0}%
+ \setcounter{section}{0}%
+ \inapptrue%
+ \renewcommand\@chapapp{\appendixname}%
+ \renewcommand\thechapter{\@Alph\c at chapter}}
+
+%%%%% Fix for frontmatter, mainmatter, and backmatter 040920
+
+\@ifundefined{@mainmatter}{\newif\if at mainmatter \@mainmattertrue}{}
+
+%%%%%
+
+
+
+\newcommand{\FmN}[1]{%
+\ifUCN
+ {\MakeUppercase{#1}}\LCNfalse
+\else
+ \ifLCN
+ {\MakeLowercase{#1}}\UCNfalse
+ \else #1
+ \fi
+\fi}
+
+
+%%%% DEFINITION OF Title variables
+\newcommand{\CTV}{\Huge\bfseries}
+\newcommand{\ChTitleVar}[1]{\renewcommand{\CTV}{#1}}
+
+%%%% DEFINITION OF the basic rule width
+\newlength{\RW}
+\setlength{\RW}{1pt}
+\newcommand{\ChRuleWidth}[1]{\setlength{\RW}{#1}}
+
+\newif\ifUCT
+\UCTfalse
+\newif\ifLCT
+\LCTfalse
+\def\ChTitleLowerCase{\LCTtrue\UCTfalse}
+\def\ChTitleUpperCase{\UCTtrue\LCTfalse}
+\def\ChTitleAsIs{\UCTfalse\LCTfalse}
+\newcommand{\FmTi}[1]{%
+\ifUCT
+ {\MakeUppercase{#1}}\LCTfalse
+\else
+ \ifLCT
+ {\MakeLowercase{#1}}\UCTfalse
+ \else {#1}
+ \fi
+\fi}
+
+
+
+\newlength{\mylen}
+\newlength{\myhi}
+\newlength{\px}
+\newlength{\py}
+\newlength{\pyy}
+\newlength{\pxx}
+
+
+\def\mghrulefill#1{\leavevmode\leaders\hrule\@height #1\hfill\kern\z@}
+
+\newcommand{\DOCH}{%
+ \CNV\FmN{\@chapapp}\space \CNoV\thechapter
+ \par\nobreak
+ \vskip 20\p@
+ }
+\newcommand{\DOTI}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+\newcommand{\DOTIS}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+
+%%%%%% SONNY DEF
+
+\DeclareOption{Sonny}{%
+ \ChNameVar{\Large\sf}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\Large\sf}
+ \ChRuleWidth{0.5pt}
+ \ChNameUpperCase
+ \renewcommand{\DOCH}{%
+ \raggedleft
+ \CNV\FmN{\@chapapp}\space \CNoV\thechapter
+ \par\nobreak
+ \vskip 40\p@}
+ \renewcommand{\DOTI}[1]{%
+ \CTV\raggedleft\mghrulefill{\RW}\par\nobreak
+ \vskip 5\p@
+ \CTV\FmTi{#1}\par\nobreak
+ \mghrulefill{\RW}\par\nobreak
+ \vskip 40\p@}
+ \renewcommand{\DOTIS}[1]{%
+ \CTV\raggedleft\mghrulefill{\RW}\par\nobreak
+ \vskip 5\p@
+ \CTV\FmTi{#1}\par\nobreak
+ \mghrulefill{\RW}\par\nobreak
+ \vskip 40\p@}
+}
+
+%%%%%% LENNY DEF
+
+\DeclareOption{Lenny}{%
+
+ \ChNameVar{\fontsize{14}{16}\usefont{OT1}{phv}{m}{n}\selectfont}
+ \ChNumVar{\fontsize{60}{62}\usefont{OT1}{ptm}{m}{n}\selectfont}
+ \ChTitleVar{\Huge\bfseries\rm}
+ \ChRuleWidth{1pt}
+ \renewcommand{\DOCH}{%
+ \settowidth{\px}{\CNV\FmN{\@chapapp}}
+ \addtolength{\px}{2pt}
+ \settoheight{\py}{\CNV\FmN{\@chapapp}}
+ \addtolength{\py}{1pt}
+
+ \settowidth{\mylen}{\CNV\FmN{\@chapapp}\space\CNoV\thechapter}
+ \addtolength{\mylen}{1pt}
+ \settowidth{\pxx}{\CNoV\thechapter}
+ \addtolength{\pxx}{-1pt}
+
+ \settoheight{\pyy}{\CNoV\thechapter}
+ \addtolength{\pyy}{-2pt}
+ \setlength{\myhi}{\pyy}
+ \addtolength{\myhi}{-1\py}
+ \par
+ \parbox[b]{\textwidth}{%
+ \rule[\py]{\RW}{\myhi}%
+ \hskip -\RW%
+ \rule[\pyy]{\px}{\RW}%
+ \hskip -\px%
+ \raggedright%
+ \CNV\FmN{\@chapapp}\space\CNoV\thechapter%
+ \hskip1pt%
+ \mghrulefill{\RW}%
+ \rule{\RW}{\pyy}\par\nobreak%
+ \vskip -\baselineskip%
+ \vskip -\pyy%
+ \hskip \mylen%
+ \mghrulefill{\RW}\par\nobreak%
+ \vskip \pyy}%
+ \vskip 20\p@}
+
+
+ \renewcommand{\DOTI}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+
+ \renewcommand{\DOTIS}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+ }
+
+%%%%%% Peter Osbornes' version of LENNY DEF
+
+\DeclareOption{PetersLenny}{%
+
+% five new lengths
+\newlength{\bl} % bottom left : orig \space
+\setlength{\bl}{6pt}
+\newcommand{\BL}[1]{\setlength{\bl}{#1}}
+\newlength{\br} % bottom right : orig 1pt
+\setlength{\br}{1pt}
+\newcommand{\BR}[1]{\setlength{\br}{#1}}
+\newlength{\tl} % top left : orig 2pt
+\setlength{\tl}{2pt}
+\newcommand{\TL}[1]{\setlength{\tl}{#1}}
+\newlength{\trr} % top right :orig 1pt
+\setlength{\trr}{1pt}
+\newcommand{\TR}[1]{\setlength{\trr}{#1}}
+\newlength{\blrule} % top right :orig 1pt
+\setlength{\trr}{0pt}
+\newcommand{\BLrule}[1]{\setlength{\blrule}{#1}}
+
+
+ \ChNameVar{\fontsize{14}{16}\usefont{OT1}{phv}{m}{n}\selectfont}
+ \ChNumVar{\fontsize{60}{62}\usefont{OT1}{ptm}{m}{n}\selectfont}
+ \ChTitleVar{\Huge\bfseries\rm}
+ \ChRuleWidth{1pt}
+\renewcommand{\DOCH}{%
+
+
+%%%%%%% tweaks for 1--9 and A--Z
+\ifcase\c at chapter\relax%
+\or\BL{-3pt}\TL{-4pt}\BR{0pt}\TR{-6pt}%1
+\or\BL{0pt}\TL{-4pt}\BR{2pt}\TR{-4pt}%2
+\or\BL{0pt}\TL{-4pt}\BR{2pt}\TR{-4pt}%3
+\or\BL{0pt}\TL{5pt}\BR{2pt}\TR{-4pt}%4
+\or\BL{0pt}\TL{3pt}\BR{2pt}\TR{-4pt}%5
+\or\BL{-1pt}\TL{0pt}\BR{2pt}\TR{-2pt}%6
+\or\BL{0pt}\TL{-3pt}\BR{2pt}\TR{-2pt}%7
+\or\BL{0pt}\TL{-3pt}\BR{2pt}\TR{-2pt}%8
+\or\BL{0pt}\TL{-3pt}\BR{-4pt}\TR{-2pt}%9
+\or\BL{-3pt}\TL{-3pt}\BR{2pt}\TR{-7pt}%10
+\or\BL{-6pt}\TL{-6pt}\BR{0pt}\TR{-9pt}%11
+\or\BL{-6pt}\TL{-6pt}\BR{2pt}\TR{-7pt}%12
+\or\BL{-5pt}\TL{-5pt}\BR{0pt}\TR{-9pt}%13
+\or\BL{-6pt}\TL{-6pt}\BR{0pt}\TR{-9pt}%14
+\or\BL{-3pt}\TL{-3pt}\BR{3pt}\TR{-6pt}%15
+\or\BL{-3pt}\TL{-3pt}\BR{3pt}\TR{-6pt}%16
+\or\BL{-5pt}\TL{-3pt}\BR{-8pt}\TR{-6pt}%17
+\or\BL{-5pt}\TL{-5pt}\BR{0pt}\TR{-9pt}%18
+\or\BL{-3pt}\TL{-3pt}\BR{-6pt}\TR{-9pt}%19
+\or\BL{0pt}\TL{0pt}\BR{0pt}\TR{-5pt}%20
+\fi
+
+\ifinapp\ifcase\c at chapter\relax%
+\or\BL{0pt}\TL{14pt}\BR{5pt}\TR{-19pt}%A
+\or\BL{0pt}\TL{-5pt}\BR{-3pt}\TR{-8pt}%B
+\or\BL{-3pt}\TL{-2pt}\BR{1pt}\TR{-6pt}\BLrule{0pt}%C
+\or\BL{0pt}\TL{-5pt}\BR{-3pt}\TR{-8pt}\BLrule{0pt}%D
+\or\BL{0pt}\TL{-5pt}\BR{2pt}\TR{-3pt}%E
+\or\BL{0pt}\TL{-5pt}\BR{-10pt}\TR{-1pt}%F
+\or\BL{-3pt}\TL{0pt}\BR{0pt}\TR{-7pt}%G
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%H
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%I
+\or\BL{2pt}\TL{0pt}\BR{-3pt}\TR{1pt}%J
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%K
+\or\BL{0pt}\TL{-5pt}\BR{2pt}\TR{-19pt}%L
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%M
+\or\BL{0pt}\TL{-5pt}\BR{-2pt}\TR{-1pt}%N
+\or\BL{-3pt}\TL{-2pt}\BR{-3pt}\TR{-11pt}%O
+\or\BL{0pt}\TL{-5pt}\BR{-9pt}\TR{-3pt}%P
+\or\BL{-3pt}\TL{-2pt}\BR{-3pt}\TR{-11pt}%Q
+\or\BL{0pt}\TL{-5pt}\BR{4pt}\TR{-8pt}%R
+\or\BL{-2pt}\TL{-2pt}\BR{-2pt}\TR{-7pt}%S
+\or\BL{-3pt}\TL{0pt}\BR{-5pt}\TR{4pt}\BLrule{8pt}%T
+\or\BL{-7pt}\TL{-11pt}\BR{-5pt}\TR{-7pt}\BLrule{0pt}%U
+\or\BL{-14pt}\TL{-5pt}\BR{-14pt}\TR{-1pt}\BLrule{14pt}%V
+\or\BL{-10pt}\TL{-9pt}\BR{-13pt}\TR{-3pt}\BLrule{7pt}%W
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}\BLrule{0pt}%X
+\or\BL{-6pt}\TL{-4pt}\BR{-7pt}\TR{1pt}\BLrule{7pt}%Y
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}\BLrule{0pt}%Z
+\fi\fi
+%%%%%%%
+ \settowidth{\px}{\CNV\FmN{\@chapapp}}
+ \addtolength{\px}{\tl} %MOD change 2pt to \tl
+ \settoheight{\py}{\CNV\FmN{\@chapapp}}
+ \addtolength{\py}{1pt}
+
+ \settowidth{\mylen}{\CNV\FmN{\@chapapp}\space\CNoV\thechapter}
+ \addtolength{\mylen}{\trr}% MOD change 1pt to \tr
+ \settowidth{\pxx}{\CNoV\thechapter}
+ \addtolength{\pxx}{-1pt}
+
+ \settoheight{\pyy}{\CNoV\thechapter}
+ \addtolength{\pyy}{-2pt}
+ \setlength{\myhi}{\pyy}
+ \addtolength{\myhi}{-1\py}
+ \par
+ \parbox[b]{\textwidth}{%
+ \rule[\py]{\RW}{\myhi}%
+ \hskip -\RW%
+ \rule[\pyy]{\px}{\RW}%
+ \hskip -\px%
+ \raggedright%
+ \CNV\FmN{\@chapapp}\rule{\blrule}{\RW}\hskip\bl\CNoV\thechapter%MOD
+% \CNV\FmN{\@chapapp}\space\CNoV\thechapter %ORIGINAL
+ \hskip\br% %MOD 1pt to \br
+ \mghrulefill{\RW}%
+ \rule{\RW}{\pyy}\par\nobreak%
+ \vskip -\baselineskip%
+ \vskip -\pyy%
+ \hskip \mylen%
+ \mghrulefill{\RW}\par\nobreak%
+ \vskip \pyy}%
+ \vskip 20\p@}
+
+
+ \renewcommand{\DOTI}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+
+ \renewcommand{\DOTIS}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+ }
+
+
+%
+
+
+%%%%%% BJORNSTRUP DEF
+
+\DeclareOption{Bjornstrup}{%
+ \usecolortrue
+ % pzc (Zapf Chancelery) is nice. ppl (Palatino) is cool too.
+ \ChNumVar{\fontsize{76}{80}\usefont{OT1}{pzc}{m}{n}\selectfont}
+ \ChTitleVar{\raggedleft\Large\sffamily\bfseries}
+
+ \setlength{\myhi}{10pt} % Space between grey box border and text
+ \setlength{\mylen}{\textwidth}
+ \addtolength{\mylen}{-2\myhi}
+ \renewcommand{\DOCH}{%
+ \settowidth{\py}{\CNoV\thechapter}
+ \addtolength{\py}{-10pt} % Amount of space by which the
+% % number is shifted right
+ \fboxsep=0pt%
+ \colorbox[gray]{.85}{\rule{0pt}{40pt}\parbox[b]{\textwidth}{\hfill}}%
+ \kern-\py\raise20pt%
+ \hbox{\color[gray]{.5}\CNoV\thechapter}\\%
+ }
+
+ \renewcommand{\DOTI}[1]{%
+ \nointerlineskip\raggedright%
+ \fboxsep=\myhi%
+ \vskip-1ex%
+ \colorbox[gray]{.85}{\parbox[t]{\mylen}{\CTV\FmTi{#1}}}\par\nobreak%
+ \vskip 40\p@%
+ }
+
+ \renewcommand{\DOTIS}[1]{%
+ \fboxsep=0pt
+ \colorbox[gray]{.85}{\rule{0pt}{40pt}\parbox[b]{\textwidth}{\hfill}}\\%
+ \nointerlineskip\raggedright%
+ \fboxsep=\myhi%
+ \colorbox[gray]{.85}{\parbox[t]{\mylen}{\CTV\FmTi{#1}}}\par\nobreak%
+ \vskip 40\p@%
+ }
+}
+
+
+%%%%%%% GLENN DEF
+
+
+\DeclareOption{Glenn}{%
+ \ChNameVar{\bfseries\Large\sf}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\bfseries\Large\rm}
+ \ChRuleWidth{1pt}
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \renewcommand{\DOCH}{%
+ \settoheight{\myhi}{\CTV\FmTi{Test}}
+ \setlength{\py}{\baselineskip}
+ \addtolength{\py}{\RW}
+ \addtolength{\py}{\myhi}
+ \setlength{\pyy}{\py}
+ \addtolength{\pyy}{-1\RW}
+
+ \raggedright
+ \CNV\FmN{\@chapapp}\space\CNoV\thechapter
+ \hskip 3pt\mghrulefill{\RW}\rule[-1\pyy]{2\RW}{\py}\par\nobreak}
+
+ \renewcommand{\DOTI}[1]{%
+ \addtolength{\pyy}{-4pt}
+ \settoheight{\myhi}{\CTV\FmTi{#1}}
+ \addtolength{\myhi}{\py}
+ \addtolength{\myhi}{-1\RW}
+ \vskip -1\pyy
+ \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 2pt
+ \raggedleft\CTV\FmTi{#1}\par\nobreak
+ \vskip 80\p@}
+
+\newlength{\backskip}
+ \renewcommand{\DOTIS}[1]{%
+% \setlength{\py}{10pt}
+% \setlength{\pyy}{\py}
+% \addtolength{\pyy}{\RW}
+% \setlength{\myhi}{\baselineskip}
+% \addtolength{\myhi}{\pyy}
+% \mghrulefill{\RW}\rule[-1\py]{2\RW}{\pyy}\par\nobreak
+% \addtolength{}{}
+%\vskip -1\baselineskip
+% \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 2pt
+% \raggedleft\CTV\FmTi{#1}\par\nobreak
+% \vskip 60\p@}
+%% Fix suggested by Tomas Lundberg
+ \setlength{\py}{25pt} % eller vad man vill
+ \setlength{\pyy}{\py}
+ \setlength{\backskip}{\py}
+ \addtolength{\backskip}{2pt}
+ \addtolength{\pyy}{\RW}
+ \setlength{\myhi}{\baselineskip}
+ \addtolength{\myhi}{\pyy}
+ \mghrulefill{\RW}\rule[-1\py]{2\RW}{\pyy}\par\nobreak
+ \vskip -1\backskip
+ \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 3pt %
+ \raggedleft\CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+ }
+
+%%%%%%% CONNY DEF
+
+\DeclareOption{Conny}{%
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \ChNameVar{\centering\Huge\rm\bfseries}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\centering\Huge\rm}
+ \ChRuleWidth{2pt}
+
+ \renewcommand{\DOCH}{%
+ \mghrulefill{3\RW}\par\nobreak
+ \vskip -0.5\baselineskip
+ \mghrulefill{\RW}\par\nobreak
+ \CNV\FmN{\@chapapp}\space \CNoV\thechapter
+ \par\nobreak
+ \vskip -0.5\baselineskip
+ }
+ \renewcommand{\DOTI}[1]{%
+ \mghrulefill{\RW}\par\nobreak
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 60\p@
+ }
+ \renewcommand{\DOTIS}[1]{%
+ \mghrulefill{\RW}\par\nobreak
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 60\p@
+ }
+ }
+
+%%%%%%% REJNE DEF
+
+\DeclareOption{Rejne}{%
+
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \ChNameVar{\centering\Large\rm}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\centering\Huge\rm}
+ \ChRuleWidth{1pt}
+ \renewcommand{\DOCH}{%
+ \settoheight{\py}{\CNoV\thechapter}
+ \parskip=0pt plus 1pt % Set parskip to default, just in case v1.31
+ \addtolength{\py}{-1pt}
+ \CNV\FmN{\@chapapp}\par\nobreak
+ \vskip 20\p@
+ \setlength{\myhi}{2\baselineskip}
+ \setlength{\px}{\myhi}
+ \addtolength{\px}{-1\RW}
+ \rule[-1\px]{\RW}{\myhi}\mghrulefill{\RW}\hskip
+ 10pt\raisebox{-0.5\py}{\CNoV\thechapter}\hskip 10pt\mghrulefill{\RW}\rule[-1\px]{\RW}{\myhi}\par\nobreak
+ \vskip -3\p@% Added -2pt vskip to correct for streched text v1.31
+ }
+ \renewcommand{\DOTI}[1]{%
+ \setlength{\mylen}{\textwidth}
+ \parskip=0pt plus 1pt % Set parskip to default, just in case v1.31
+ \addtolength{\mylen}{-2\RW}
+ {\vrule width\RW}\parbox{\mylen}{\CTV\FmTi{#1}}{\vrule width\RW}\par\nobreak%
+ \vskip -3pt\rule{\RW}{2\baselineskip}\mghrulefill{\RW}\rule{\RW}{2\baselineskip}%
+ \vskip 60\p@% Added -2pt in vskip to correct for streched text v1.31
+ }
+ \renewcommand{\DOTIS}[1]{%
+ \setlength{\py}{\fboxrule}
+ \setlength{\fboxrule}{\RW}
+ \setlength{\mylen}{\textwidth}
+ \addtolength{\mylen}{-2\RW}
+ \fbox{\parbox{\mylen}{\vskip 2\baselineskip\CTV\FmTi{#1}\par\nobreak\vskip \baselineskip}}
+ \setlength{\fboxrule}{\py}
+ \vskip 60\p@
+ }
+ }
+
+
+%%%%%%% BJARNE DEF
+
+\DeclareOption{Bjarne}{%
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \ChNameVar{\raggedleft\normalsize\rm}
+ \ChNumVar{\raggedleft \bfseries\Large}
+ \ChTitleVar{\raggedleft \Large\rm}
+ \ChRuleWidth{1pt}
+
+
+%% Note thechapter -> c at chapter fix appendix bug
+%% Fixed misspelled 12
+
+ \newcounter{AlphaCnt}
+ \newcounter{AlphaDecCnt}
+ \newcommand{\AlphaNo}{%
+ \ifcase\number\theAlphaCnt
+ \ifnum\c at chapter=0
+ ZERO\else{}\fi
+ \or ONE\or TWO\or THREE\or FOUR\or FIVE
+ \or SIX\or SEVEN\or EIGHT\or NINE\or TEN
+ \or ELEVEN\or TWELVE\or THIRTEEN\or FOURTEEN\or FIFTEEN
+ \or SIXTEEN\or SEVENTEEN\or EIGHTEEN\or NINETEEN\fi
+}
+
+ \newcommand{\AlphaDecNo}{%
+ \setcounter{AlphaDecCnt}{0}
+ \@whilenum\number\theAlphaCnt>0\do
+ {\addtocounter{AlphaCnt}{-10}
+ \addtocounter{AlphaDecCnt}{1}}
+ \ifnum\number\theAlphaCnt=0
+ \else
+ \addtocounter{AlphaDecCnt}{-1}
+ \addtocounter{AlphaCnt}{10}
+ \fi
+
+
+ \ifcase\number\theAlphaDecCnt\or TEN\or TWENTY\or THIRTY\or
+ FORTY\or FIFTY\or SIXTY\or SEVENTY\or EIGHTY\or NINETY\fi
+ }
+ \newcommand{\TheAlphaChapter}{%
+
+ \ifinapp
+ \thechapter
+ \else
+ \setcounter{AlphaCnt}{\c at chapter}
+ \ifnum\c at chapter<20
+ \AlphaNo
+ \else
+ \AlphaDecNo\AlphaNo
+ \fi
+ \fi
+ }
+ \renewcommand{\DOCH}{%
+ \mghrulefill{\RW}\par\nobreak
+ \CNV\FmN{\@chapapp}\par\nobreak
+ \CNoV\TheAlphaChapter\par\nobreak
+ \vskip -1\baselineskip\vskip 5pt\mghrulefill{\RW}\par\nobreak
+ \vskip 20\p@
+ }
+ \renewcommand{\DOTI}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+ \renewcommand{\DOTIS}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+}
+
+\DeclareOption*{%
+ \PackageWarning{fancychapter}{unknown style option}
+ }
+
+\ProcessOptions* \relax
+
+\ifusecolor
+ \RequirePackage{color}
+\fi
+\def\@makechapterhead#1{%
+ \vspace*{50\p@}%
+ {\parindent \z@ \raggedright \normalfont
+ \ifnum \c at secnumdepth >\m at ne
+ \if at mainmatter%%%%% Fix for frontmatter, mainmatter, and backmatter 040920
+ \DOCH
+ \fi
+ \fi
+ \interlinepenalty\@M
+ \if at mainmatter%%%%% Fix for frontmatter, mainmatter, and backmatter 060424
+ \DOTI{#1}%
+ \else%
+ \DOTIS{#1}%
+ \fi
+ }}
+
+
+%%% Begin: To avoid problem with scrbook.cls (fncychap version 1.32)
+
+%%OUT:
+%\def\@schapter#1{\if at twocolumn
+% \@topnewpage[\@makeschapterhead{#1}]%
+% \else
+% \@makeschapterhead{#1}%
+% \@afterheading
+% \fi}
+
+%%IN:
+\def\@schapter#1{%
+\if at twocolumn%
+ \@makeschapterhead{#1}%
+\else%
+ \@makeschapterhead{#1}%
+ \@afterheading%
+\fi}
+
+%%% End: To avoid problem with scrbook.cls (fncychap version 1.32)
+
+\def\@makeschapterhead#1{%
+ \vspace*{50\p@}%
+ {\parindent \z@ \raggedright
+ \normalfont
+ \interlinepenalty\@M
+ \DOTIS{#1}
+ \vskip 40\p@
+ }}
+
+\endinput
+
+
diff --git a/doc/_build/latex/front_page_flowchart.png b/doc/_build/latex/front_page_flowchart.png
new file mode 100644
index 0000000..2c0a8d2
Binary files /dev/null and b/doc/_build/latex/front_page_flowchart.png differ
diff --git a/doc/_build/latex/gallery_big_pipeline.png b/doc/_build/latex/gallery_big_pipeline.png
new file mode 100644
index 0000000..28da4be
Binary files /dev/null and b/doc/_build/latex/gallery_big_pipeline.png differ
diff --git a/doc/_build/latex/gallery_dless.png b/doc/_build/latex/gallery_dless.png
new file mode 100644
index 0000000..e05c9e2
Binary files /dev/null and b/doc/_build/latex/gallery_dless.png differ
diff --git a/doc/_build/latex/gallery_rna_seq.png b/doc/_build/latex/gallery_rna_seq.png
new file mode 100644
index 0000000..19f16ed
Binary files /dev/null and b/doc/_build/latex/gallery_rna_seq.png differ
diff --git a/doc/_build/latex/gallery_snp_annotation.png b/doc/_build/latex/gallery_snp_annotation.png
new file mode 100644
index 0000000..c0e7ba3
Binary files /dev/null and b/doc/_build/latex/gallery_snp_annotation.png differ
diff --git a/doc/_build/latex/gallery_snp_annotation_consequences.png b/doc/_build/latex/gallery_snp_annotation_consequences.png
new file mode 100644
index 0000000..8e223bd
Binary files /dev/null and b/doc/_build/latex/gallery_snp_annotation_consequences.png differ
diff --git a/doc/_build/latex/history_html_flowchart.png b/doc/_build/latex/history_html_flowchart.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/_build/latex/history_html_flowchart.png differ
diff --git a/doc/_build/latex/history_html_flowchart1.png b/doc/_build/latex/history_html_flowchart1.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/_build/latex/history_html_flowchart1.png differ
diff --git a/doc/_build/latex/history_html_flowchart2.png b/doc/_build/latex/history_html_flowchart2.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/_build/latex/history_html_flowchart2.png differ
diff --git a/doc/_build/latex/jobs_limit.png b/doc/_build/latex/jobs_limit.png
new file mode 100644
index 0000000..4caac7d
Binary files /dev/null and b/doc/_build/latex/jobs_limit.png differ
diff --git a/doc/_build/latex/logo.jpg b/doc/_build/latex/logo.jpg
new file mode 100644
index 0000000..4d4b1ab
Binary files /dev/null and b/doc/_build/latex/logo.jpg differ
diff --git a/doc/_build/latex/manual_dependencies_flowchart_intro.png b/doc/_build/latex/manual_dependencies_flowchart_intro.png
new file mode 100644
index 0000000..d95dfe9
Binary files /dev/null and b/doc/_build/latex/manual_dependencies_flowchart_intro.png differ
diff --git a/doc/_build/latex/manual_dependencies_flowchart_intro1.png b/doc/_build/latex/manual_dependencies_flowchart_intro1.png
new file mode 100644
index 0000000..d95dfe9
Binary files /dev/null and b/doc/_build/latex/manual_dependencies_flowchart_intro1.png differ
diff --git a/doc/_build/latex/manual_split_merge_example.jpg b/doc/_build/latex/manual_split_merge_example.jpg
new file mode 100644
index 0000000..ab9e5bb
Binary files /dev/null and b/doc/_build/latex/manual_split_merge_example.jpg differ
diff --git a/doc/_build/latex/pretty_flowchart.png b/doc/_build/latex/pretty_flowchart.png
new file mode 100644
index 0000000..200338a
Binary files /dev/null and b/doc/_build/latex/pretty_flowchart.png differ
diff --git a/doc/_build/latex/python.ist b/doc/_build/latex/python.ist
new file mode 100644
index 0000000..9ffa0f9
--- /dev/null
+++ b/doc/_build/latex/python.ist
@@ -0,0 +1,11 @@
+line_max 100
+headings_flag 1
+heading_prefix " \\bigletter "
+
+preamble "\\begin{theindex}
+\\def\\bigletter#1{{\\Large\\sffamily#1}\\nopagebreak\\vspace{1mm}}
+
+"
+
+symhead_positive "{Symbols}"
+numhead_positive "{Numbers}"
diff --git a/doc/_build/latex/ruffus.aux b/doc/_build/latex/ruffus.aux
new file mode 100644
index 0000000..84ae53d
--- /dev/null
+++ b/doc/_build/latex/ruffus.aux
@@ -0,0 +1,1937 @@
+\relax
+\providecommand\hyper at newdestlabel[2]{}
+\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
+\HyperFirstAtBeginDocument{\ifx\hyper at anchor\@undefined
+\global\let\oldcontentsline\contentsline
+\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
+\global\let\oldnewlabel\newlabel
+\gdef\newlabel#1#2{\newlabelxx{#1}#2}
+\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
+\AtEndDocument{\ifx\hyper at anchor\@undefined
+\let\contentsline\oldcontentsline
+\let\newlabel\oldnewlabel
+\fi}
+\fi}
+\global\let\hyper at last\relax
+\gdef\HyperFirstAtBeginDocument#1{#1}
+\providecommand\HyField at AuxAddToFields[1]{}
+\providecommand\HyField at AuxAddToCoFields[2]{}
+\select at language{english}
+\@writefile{toc}{\select at language{english}}
+\@writefile{lof}{\select at language{english}}
+\@writefile{lot}{\select at language{english}}
+\newlabel{contents::doc}{{}{1}{}{section*.2}{}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {1}Start Here:}{1}{chapter.1}}
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\newlabel{contents:glob}{{1}{1}{Start Here:}{chapter.1}{}}
+\newlabel{contents:ruffus-documentation}{{1}{1}{Start Here:}{chapter.1}{}}
+\newlabel{contents:start-here}{{1}{1}{Start Here:}{chapter.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.1}Installation}{1}{section.1.1}}
+\newlabel{installation:installation}{{1.1}{1}{Installation}{section.1.1}{}}
+\newlabel{installation::doc}{{1.1}{1}{Installation}{section.1.1}{}}
+\newlabel{installation:id1}{{1.1}{1}{Installation}{section.1.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.1}The easy way}{1}{subsection.1.1.1}}
+\newlabel{installation:the-easy-way}{{1.1.1}{1}{The easy way}{subsection.1.1.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.2}The most up-to-date code:}{1}{subsection.1.1.2}}
+\newlabel{installation:the-most-up-to-date-code}{{1.1.2}{1}{The most up-to-date code:}{subsection.1.1.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Graphical flowcharts}{1}{subsubsection*.3}}
+\newlabel{installation:graphical-flowcharts}{{1.1.2}{1}{Graphical flowcharts}{subsubsection*.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.2}\textbf {Ruffus} Manual: List of Chapters and Example code}{2}{section.1.2}}
+\newlabel{tutorials/new_tutorial/manual_contents:ruffus-manual-list-of-chapters-and-example-code}{{1.2}{2}{\textbf {Ruffus} Manual: List of Chapters and Example code}{section.1.2}{}}
+\newlabel{tutorials/new_tutorial/manual_contents::doc}{{1.2}{2}{\textbf {Ruffus} Manual: List of Chapters and Example code}{section.1.2}{}}
+\newlabel{tutorials/new_tutorial/manual_contents:new-manual-table-of-contents}{{1.2}{2}{\textbf {Ruffus} Manual: List of Chapters and Example code}{section.1.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.3}\textbf {Chapter 1}: An introduction to basic \emph {Ruffus} syntax}{3}{section.1.3}}
+\newlabel{tutorials/new_tutorial/introduction:new-manual-introduction}{{1.3}{3}{\textbf {Chapter 1}: An introduction to basic \emph {Ruffus} syntax}{section.1.3}{}}
+\newlabel{tutorials/new_tutorial/introduction:index-0}{{1.3}{3}{\textbf {Chapter 1}: An introduction to basic \emph {Ruffus} syntax}{section.1.3}{}}
+\newlabel{tutorials/new_tutorial/introduction::doc}{{1.3}{3}{\textbf {Chapter 1}: An introduction to basic \emph {Ruffus} syntax}{section.1.3}{}}
+\newlabel{tutorials/new_tutorial/introduction:new-manual-introduction-chapter-num-an-introduction-to-basic-ruffus-syntax}{{1.3}{3}{\textbf {Chapter 1}: An introduction to basic \emph {Ruffus} syntax}{section.1.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}Overview}{3}{subsection.1.3.1}}
+\newlabel{tutorials/new_tutorial/introduction:overview}{{1.3.1}{3}{Overview}{subsection.1.3.1}{}}
+\newlabel{tutorials/new_tutorial/introduction:new-manual-introduction-import}{{1.3.1}{4}{Overview}{section*.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}Importing \emph {Ruffus}}{4}{subsection.1.3.2}}
+\newlabel{tutorials/new_tutorial/introduction:index-1}{{1.3.2}{4}{Importing \emph {Ruffus}}{subsection.1.3.2}{}}
+\newlabel{tutorials/new_tutorial/introduction:importing-ruffus}{{1.3.2}{4}{Importing \emph {Ruffus}}{subsection.1.3.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}\emph {Ruffus} decorators}{4}{subsection.1.3.3}}
+\newlabel{tutorials/new_tutorial/introduction:ruffus-decorators}{{1.3.3}{4}{\emph {Ruffus} decorators}{subsection.1.3.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.4}Your first \emph {Ruffus} pipeline}{5}{subsection.1.3.4}}
+\newlabel{tutorials/new_tutorial/introduction:your-first-ruffus-pipeline}{{1.3.4}{5}{Your first \emph {Ruffus} pipeline}{subsection.1.3.4}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1. Write down the file names}{5}{subsubsection*.5}}
+\newlabel{tutorials/new_tutorial/introduction:write-down-the-file-names}{{1.3.4}{5}{1. Write down the file names}{subsubsection*.5}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2. Write the python functions for each stage}{5}{subsubsection*.6}}
+\newlabel{tutorials/new_tutorial/introduction:write-the-python-functions-for-each-stage}{{1.3.4}{5}{2. Write the python functions for each stage}{subsubsection*.6}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3. Link the python functions into a pipeline}{7}{subsubsection*.7}}
+\newlabel{tutorials/new_tutorial/introduction:link-the-python-functions-into-a-pipeline}{{1.3.4}{7}{3. Link the python functions into a pipeline}{subsubsection*.7}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4. @transform syntax}{7}{subsubsection*.8}}
+\newlabel{tutorials/new_tutorial/introduction:transform-syntax}{{1.3.4}{7}{4. @transform syntax}{subsubsection*.8}{}}
+\@writefile{toc}{\contentsline {subsubsection}{5. Run the pipeline!}{8}{subsubsection*.9}}
+\newlabel{tutorials/new_tutorial/introduction:run-the-pipeline}{{1.3.4}{8}{5. Run the pipeline!}{subsubsection*.9}{}}
+\newlabel{tutorials/new_tutorial/introduction:index-2}{{1.3.4}{8}{5. Run the pipeline!}{subsubsection*.9}{}}
+\newlabel{tutorials/new_tutorial/introduction:new-manual-pipeline-run}{{1.3.4}{8}{5. Run the pipeline!}{subsubsection*.9}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.4}\textbf {Chapter 2}: Transforming data in a pipeline with \emph {@transform}}{9}{section.1.4}}
+\newlabel{tutorials/new_tutorial/transform:index-0}{{1.4}{9}{\textbf {Chapter 2}: Transforming data in a pipeline with \emph {@transform}}{section.1.4}{}}
+\newlabel{tutorials/new_tutorial/transform:new-manual-transform-chapter-num-transforming-data-in-a-pipeline-with-transform}{{1.4}{9}{\textbf {Chapter 2}: Transforming data in a pipeline with \emph {@transform}}{section.1.4}{}}
+\newlabel{tutorials/new_tutorial/transform::doc}{{1.4}{9}{\textbf {Chapter 2}: Transforming data in a pipeline with \emph {@transform}}{section.1.4}{}}
+\newlabel{tutorials/new_tutorial/transform:new-manual-transform}{{1.4}{9}{\textbf {Chapter 2}: Transforming data in a pipeline with \emph {@transform}}{section.1.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.4.1}Review}{9}{subsection.1.4.1}}
+\newlabel{tutorials/new_tutorial/transform:review}{{1.4.1}{9}{Review}{subsection.1.4.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.4.2}Task functions as recipes}{9}{subsection.1.4.2}}
+\newlabel{tutorials/new_tutorial/transform:task-functions-as-recipes}{{1.4.2}{9}{Task functions as recipes}{subsection.1.4.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.4.3}\emph {@transform} is a 1 to 1 operation}{9}{subsection.1.4.3}}
+\newlabel{tutorials/new_tutorial/transform:transform-is-a-1-to-1-operation}{{1.4.3}{9}{\emph {@transform} is a 1 to 1 operation}{subsection.1.4.3}{}}
+\newlabel{tutorials/new_tutorial/transform:index-1}{{1.4.3}{9}{\emph {@transform} is a 1 to 1 operation}{subsection.1.4.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{A pair of files as the \textbf {Input}}{10}{subsubsection*.10}}
+\newlabel{tutorials/new_tutorial/transform:a-pair-of-files-as-the-input}{{1.4.3}{10}{A pair of files as the \textbf {Input}}{subsubsection*.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.4.4}\textbf {Input} and \textbf {Output} parameters}{10}{subsection.1.4.4}}
+\newlabel{tutorials/new_tutorial/transform:input-and-output-parameters}{{1.4.4}{10}{\textbf {Input} and \textbf {Output} parameters}{subsection.1.4.4}{}}
+\newlabel{tutorials/new_tutorial/transform:index-2}{{1.4.4}{10}{\textbf {Input} and \textbf {Output} parameters}{subsection.1.4.4}{}}
+\@writefile{lot}{\contentsline {table}{\numberline {1.1}{\ignorespaces Parameters for summarise\_bam\_file()}}{11}{table.1.1}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.5}\textbf {Chapter 3}: More on \texttt {@transform}-ing data}{11}{section.1.5}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel-chapter-num-more-on-transform-ing-data}{{1.5}{11}{\textbf {Chapter 3}: More on \texttt {@transform}-ing data}{section.1.5}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-0}{{1.5}{11}{\textbf {Chapter 3}: More on \texttt {@transform}-ing data}{section.1.5}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel::doc}{{1.5}{11}{\textbf {Chapter 3}: More on \texttt {@transform}-ing data}{section.1.5}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel}{{1.5}{11}{\textbf {Chapter 3}: More on \texttt {@transform}-ing data}{section.1.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.1}Review}{11}{subsection.1.5.1}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:review}{{1.5.1}{11}{Review}{subsection.1.5.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.2}Running pipelines in parallel}{12}{subsection.1.5.2}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:running-pipelines-in-parallel}{{1.5.2}{12}{Running pipelines in parallel}{subsection.1.5.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.3}Up-to-date jobs are not re-run unnecessarily}{12}{subsection.1.5.3}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-only-rerun-out-of-date}{{1.5.3}{12}{Up-to-date jobs are not re-run unnecessarily}{subsection.1.5.3}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:up-to-date-jobs-are-not-re-run-unnecessarily}{{1.5.3}{12}{Up-to-date jobs are not re-run unnecessarily}{subsection.1.5.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.4}Defining pipeline tasks out of order}{13}{subsection.1.5.4}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-output-from}{{1.5.4}{13}{Defining pipeline tasks out of order}{subsection.1.5.4}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-1}{{1.5.4}{13}{Defining pipeline tasks out of order}{subsection.1.5.4}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:defining-pipeline-tasks-out-of-order}{{1.5.4}{13}{Defining pipeline tasks out of order}{subsection.1.5.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.5}Multiple dependencies}{14}{subsection.1.5.5}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-transform-multiple-dependencies}{{1.5.5}{14}{Multiple dependencies}{subsection.1.5.5}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-2}{{1.5.5}{14}{Multiple dependencies}{subsection.1.5.5}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:multiple-dependencies}{{1.5.5}{14}{Multiple dependencies}{subsection.1.5.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.6}\emph {@follows}}{14}{subsection.1.5.6}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:follows}{{1.5.6}{14}{\emph {@follows}}{subsection.1.5.6}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-follows}{{1.5.6}{14}{\emph {@follows}}{subsection.1.5.6}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-3}{{1.5.6}{14}{\emph {@follows}}{subsection.1.5.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.7}Making directories automatically with \emph {@follows} and \emph {mkdir}}{15}{subsection.1.5.7}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-4}{{1.5.7}{15}{Making directories automatically with \emph {@follows} and \emph {mkdir}}{subsection.1.5.7}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-follows-mkdir}{{1.5.7}{15}{Making directories automatically with \emph {@follows} and \emph {mkdir}}{subsection.1.5.7}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:making-directories-automatically-with-follows-and-mkdir}{{1.5.7}{15}{Making directories automatically with \emph {@follows} and \emph {mkdir}}{subsection.1.5.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.8}Globs in the \textbf {Input} parameter}{15}{subsection.1.5.8}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:globs-in-the-input-parameter}{{1.5.8}{15}{Globs in the \textbf {Input} parameter}{subsection.1.5.8}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-5}{{1.5.8}{15}{Globs in the \textbf {Input} parameter}{subsection.1.5.8}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:new-manual-globs-as-input}{{1.5.8}{15}{Globs in the \textbf {Input} parameter}{subsection.1.5.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.5.9}Mixing Tasks and Globs in the \textbf {Input} parameter}{16}{subsection.1.5.9}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:index-6}{{1.5.9}{16}{Mixing Tasks and Globs in the \textbf {Input} parameter}{subsection.1.5.9}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel:mixing-tasks-and-globs-in-the-input-parameter}{{1.5.9}{16}{Mixing Tasks and Globs in the \textbf {Input} parameter}{subsection.1.5.9}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.6}\textbf {Chapter 4}: Creating files with \texttt {@originate}}{16}{section.1.6}}
+\newlabel{tutorials/new_tutorial/originate:index-0}{{1.6}{16}{\textbf {Chapter 4}: Creating files with \texttt {@originate}}{section.1.6}{}}
+\newlabel{tutorials/new_tutorial/originate:new-manual-originate}{{1.6}{16}{\textbf {Chapter 4}: Creating files with \texttt {@originate}}{section.1.6}{}}
+\newlabel{tutorials/new_tutorial/originate:new-manual-originate-chapter-num-creating-files-with-originate}{{1.6}{16}{\textbf {Chapter 4}: Creating files with \texttt {@originate}}{section.1.6}{}}
+\newlabel{tutorials/new_tutorial/originate::doc}{{1.6}{16}{\textbf {Chapter 4}: Creating files with \texttt {@originate}}{section.1.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.6.1}Simplifying our example with \emph {@originate}}{16}{subsection.1.6.1}}
+\newlabel{tutorials/new_tutorial/originate:simplifying-our-example-with-originate}{{1.6.1}{16}{Simplifying our example with \emph {@originate}}{subsection.1.6.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.7}\textbf {Chapter 5}: Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{17}{section.1.7}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:index-0}{{1.7}{17}{\textbf {Chapter 5}: Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.7}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout}{{1.7}{17}{\textbf {Chapter 5}: Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.7}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout::doc}{{1.7}{17}{\textbf {Chapter 5}: Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.7}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout-chapter-num-understanding-how-your-pipeline-works-with-pipeline-printout}{{1.7}{17}{\textbf {Chapter 5}: Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.7.1}Printing out which jobs will be run}{18}{subsection.1.7.1}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:printing-out-which-jobs-will-be-run}{{1.7.1}{18}{Printing out which jobs will be run}{subsection.1.7.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.7.2}Determining which jobs are out-of-date or not}{18}{subsection.1.7.2}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:determining-which-jobs-are-out-of-date-or-not}{{1.7.2}{18}{Determining which jobs are out-of-date or not}{subsection.1.7.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.7.3}Verbosity levels}{19}{subsection.1.7.3}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:verbosity-levels}{{1.7.3}{19}{Verbosity levels}{subsection.1.7.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.7.4}Abbreviating long file paths with \texttt {verbose\_abbreviated\_path}}{19}{subsection.1.7.4}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:abbreviating-long-file-paths-with-verbose-abbreviated-path}{{1.7.4}{19}{Abbreviating long file paths with \texttt {verbose\_abbreviated\_path}}{subsection.1.7.4}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout-verbose-abbreviated-path}{{1.7.4}{19}{Abbreviating long file paths with \texttt {verbose\_abbreviated\_path}}{subsection.1.7.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.7.5}Getting a list of all tasks in a pipeline}{20}{subsection.1.7.5}}
+\newlabel{tutorials/new_tutorial/pipeline_printout:getting-a-list-of-all-tasks-in-a-pipeline}{{1.7.5}{20}{Getting a list of all tasks in a pipeline}{subsection.1.7.5}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.8}\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{20}{section.1.8}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-chapter-num-running-ruffus-from-the-command-line-with-ruffus-cmdline}{{1.8}{20}{\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{section.1.8}{}}
+\newlabel{tutorials/new_tutorial/command_line:index-0}{{1.8}{20}{\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{section.1.8}{}}
+\newlabel{tutorials/new_tutorial/command_line::doc}{{1.8}{20}{\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{section.1.8}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline}{{1.8}{20}{\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{section.1.8}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-get-argparse}{{1.8}{20}{\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{section*.11}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-run}{{1.8}{20}{\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{section*.12}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.1}Template for argparse}{21}{subsection.1.8.1}}
+\newlabel{tutorials/new_tutorial/command_line:template-for-argparse}{{1.8.1}{21}{Template for argparse}{subsection.1.8.1}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-run}{{1.8.1}{21}{Template for argparse}{subsection.1.8.1}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-setup-logging}{{1.8.1}{21}{Template for argparse}{subsection.1.8.1}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-get-argparse}{{1.8.1}{21}{Template for argparse}{subsection.1.8.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.2}Command Line Arguments}{21}{subsection.1.8.2}}
+\newlabel{tutorials/new_tutorial/command_line:command-line-arguments}{{1.8.2}{21}{Command Line Arguments}{subsection.1.8.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.3}1) Logging}{21}{subsection.1.8.3}}
+\newlabel{tutorials/new_tutorial/command_line:logging}{{1.8.3}{21}{1) Logging}{subsection.1.8.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{A) Only to the log file:}{22}{subsubsection*.13}}
+\newlabel{tutorials/new_tutorial/command_line:a-only-to-the-log-file}{{1.8.3}{22}{A) Only to the log file:}{subsubsection*.13}{}}
+\@writefile{toc}{\contentsline {subsubsection}{B) Only to the display:}{22}{subsubsection*.14}}
+\newlabel{tutorials/new_tutorial/command_line:b-only-to-the-display}{{1.8.3}{22}{B) Only to the display:}{subsubsection*.14}{}}
+\@writefile{toc}{\contentsline {subsubsection}{C) To both simultaneously:}{22}{subsubsection*.15}}
+\newlabel{tutorials/new_tutorial/command_line:c-to-both-simultaneously}{{1.8.3}{22}{C) To both simultaneously:}{subsubsection*.15}{}}
+\newlabel{tutorials/new_tutorial/command_line:new-manual-cmdline-message}{{1.8.3}{22}{C) To both simultaneously:}{subsubsection*.15}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.4}2) Tracing pipeline progress}{22}{subsection.1.8.4}}
+\newlabel{tutorials/new_tutorial/command_line:tracing-pipeline-progress}{{1.8.4}{22}{2) Tracing pipeline progress}{subsection.1.8.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.5}3) Printing a flowchart}{23}{subsection.1.8.5}}
+\newlabel{tutorials/new_tutorial/command_line:printing-a-flowchart}{{1.8.5}{23}{3) Printing a flowchart}{subsection.1.8.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.6}4) Running in parallel on multiple processors}{23}{subsection.1.8.6}}
+\newlabel{tutorials/new_tutorial/command_line:running-in-parallel-on-multiple-processors}{{1.8.6}{23}{4) Running in parallel on multiple processors}{subsection.1.8.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.7}5) Setup checkpointing so that \emph {Ruffus} knows which files are out of date}{23}{subsection.1.8.7}}
+\newlabel{tutorials/new_tutorial/command_line:setup-checkpointing-so-that-ruffus-knows-which-files-are-out-of-date}{{1.8.7}{23}{5) Setup checkpointing so that \emph {Ruffus} knows which files are out of date}{subsection.1.8.7}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Recreating checkpoints}{23}{subsubsection*.16}}
+\newlabel{tutorials/new_tutorial/command_line:recreating-checkpoints}{{1.8.7}{23}{Recreating checkpoints}{subsubsection*.16}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Touch files}{23}{subsubsection*.17}}
+\newlabel{tutorials/new_tutorial/command_line:touch-files}{{1.8.7}{23}{Touch files}{subsubsection*.17}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.8}6) Skipping specified options}{23}{subsection.1.8.8}}
+\newlabel{tutorials/new_tutorial/command_line:skipping-specified-options}{{1.8.8}{23}{6) Skipping specified options}{subsection.1.8.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.9}7) Specifying verbosity and abbreviating long paths}{24}{subsection.1.8.9}}
+\newlabel{tutorials/new_tutorial/command_line:specifying-verbosity-and-abbreviating-long-paths}{{1.8.9}{24}{7) Specifying verbosity and abbreviating long paths}{subsection.1.8.9}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.10}8) Displaying the version}{24}{subsection.1.8.10}}
+\newlabel{tutorials/new_tutorial/command_line:displaying-the-version}{{1.8.10}{24}{8) Displaying the version}{subsection.1.8.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.8.11}Template for optparse}{24}{subsection.1.8.11}}
+\newlabel{tutorials/new_tutorial/command_line:code-template-optparse}{{1.8.11}{24}{Template for optparse}{subsection.1.8.11}{}}
+\newlabel{tutorials/new_tutorial/command_line:template-for-optparse}{{1.8.11}{24}{Template for optparse}{subsection.1.8.11}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.9}\textbf {Chapter 7}: Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{25}{section.1.9}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:index-0}{{1.9}{25}{\textbf {Chapter 7}: Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.9}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph-chapter-num-displaying-the-pipeline-visually-with-pipeline-printout-graph}{{1.9}{25}{\textbf {Chapter 7}: Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.9}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph::doc}{{1.9}{25}{\textbf {Chapter 7}: Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.9}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph}{{1.9}{25}{\textbf {Chapter 7}: Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.9}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.9.1}Printing out a flowchart of our pipeline}{25}{subsection.1.9.1}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:printing-out-a-flowchart-of-our-pipeline}{{1.9.1}{25}{Printing out a flowchart of our pipeline}{subsection.1.9.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.9.2}Command line options made easier with \texttt {ruffus.cmdline}}{26}{subsection.1.9.2}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:command-line-options-made-easier-with-ruffus-cmdline}{{1.9.2}{26}{Command line options made easier with \texttt {ruffus.cmdline}}{subsection.1.9.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.9.3}Horribly complicated pipelines!}{26}{subsection.1.9.3}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:horribly-complicated-pipelines}{{1.9.3}{26}{Horribly complicated pipelines!}{subsection.1.9.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.9.4}Circular dependency errors in pipelines!}{27}{subsection.1.9.4}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:circular-dependency-errors-in-pipelines}{{1.9.4}{27}{Circular dependency errors in pipelines!}{subsection.1.9.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.9.5}\texttt {@graphviz}: Customising the appearance of each task}{28}{subsection.1.9.5}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph:graphviz-customising-the-appearance-of-each-task}{{1.9.5}{28}{\texttt {@graphviz}: Customising the appearance of each task}{subsection.1.9.5}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.10}\textbf {Chapter 8}: Specifying output file names with \emph {formatter()} and \emph {regex()}}{29}{section.1.10}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-output-file-names-chapter-num-specifying-output-file-names-with-formatter-and-regex}{{1.10}{29}{\textbf {Chapter 8}: Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.10}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:index-0}{{1.10}{29}{\textbf {Chapter 8}: Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.10}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-output-file-names}{{1.10}{29}{\textbf {Chapter 8}: Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.10}{}}
+\newlabel{tutorials/new_tutorial/output_file_names::doc}{{1.10}{29}{\textbf {Chapter 8}: Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.10.1}Review}{29}{subsection.1.10.1}}
+\newlabel{tutorials/new_tutorial/output_file_names:review}{{1.10.1}{29}{Review}{subsection.1.10.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.10.2}A different file name \emph {suffix()} for each pipeline stage}{30}{subsection.1.10.2}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-suffix}{{1.10.2}{30}{A different file name \emph {suffix()} for each pipeline stage}{subsection.1.10.2}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:a-different-file-name-suffix-for-each-pipeline-stage}{{1.10.2}{30}{A different file name \emph {suffix()} for each pipeline stage}{subsection.1.10.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.10.3}\emph {formatter()} manipulates pathnames and regular expression}{31}{subsection.1.10.3}}
+\newlabel{tutorials/new_tutorial/output_file_names:formatter-manipulates-pathnames-and-regular-expression}{{1.10.3}{31}{\emph {formatter()} manipulates pathnames and regular expression}{subsection.1.10.3}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-formatter}{{1.10.3}{31}{\emph {formatter()} manipulates pathnames and regular expression}{subsection.1.10.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Path name components}{32}{subsubsection*.18}}
+\newlabel{tutorials/new_tutorial/output_file_names:path-name-components}{{1.10.3}{32}{Path name components}{subsubsection*.18}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Filter and parse using regular expressions}{33}{subsubsection*.19}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-formatter-regex}{{1.10.3}{33}{Filter and parse using regular expressions}{subsubsection*.19}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:filter-and-parse-using-regular-expressions}{{1.10.3}{33}{Filter and parse using regular expressions}{subsubsection*.19}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Using \emph {@transform()} with \emph {formatter()}}{33}{subsubsection*.20}}
+\newlabel{tutorials/new_tutorial/output_file_names:using-transform-with-formatter}{{1.10.3}{33}{Using \emph {@transform()} with \emph {formatter()}}{subsubsection*.20}{}}
+\@writefile{toc}{\contentsline {subsubsection}{string substitution for ``extra'' arguments}{34}{subsubsection*.21}}
+\newlabel{tutorials/new_tutorial/output_file_names:string-substitution-for-extra-arguments}{{1.10.3}{34}{string substitution for ``extra'' arguments}{subsubsection*.21}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Changing directories using \emph {formatter()} in a zoo...}{35}{subsubsection*.22}}
+\newlabel{tutorials/new_tutorial/output_file_names:changing-directories-using-formatter-in-a-zoo}{{1.10.3}{35}{Changing directories using \emph {formatter()} in a zoo..}{subsubsection*.22}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-output-file-names-formatter-zoo}{{1.10.3}{35}{Changing directories using \emph {formatter()} in a zoo..}{subsubsection*.22}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.10.4}\emph {regex()} manipulates via regular expressions}{36}{subsection.1.10.4}}
+\newlabel{tutorials/new_tutorial/output_file_names:regex-manipulates-via-regular-expressions}{{1.10.4}{36}{\emph {regex()} manipulates via regular expressions}{subsection.1.10.4}{}}
+\newlabel{tutorials/new_tutorial/output_file_names:new-manual-regex}{{1.10.4}{36}{\emph {regex()} manipulates via regular expressions}{subsection.1.10.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.11}\textbf {Chapter 9}: Preparing directories for output with \emph {@mkdir()}}{37}{section.1.11}}
+\newlabel{tutorials/new_tutorial/mkdir:index-0}{{1.11}{37}{\textbf {Chapter 9}: Preparing directories for output with \emph {@mkdir()}}{section.1.11}{}}
+\newlabel{tutorials/new_tutorial/mkdir::doc}{{1.11}{37}{\textbf {Chapter 9}: Preparing directories for output with \emph {@mkdir()}}{section.1.11}{}}
+\newlabel{tutorials/new_tutorial/mkdir:new-manual-mkdir-chapter-num-preparing-directories-for-output-with-mkdir}{{1.11}{37}{\textbf {Chapter 9}: Preparing directories for output with \emph {@mkdir()}}{section.1.11}{}}
+\newlabel{tutorials/new_tutorial/mkdir:new-manual-mkdir}{{1.11}{37}{\textbf {Chapter 9}: Preparing directories for output with \emph {@mkdir()}}{section.1.11}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.11.1}Overview}{37}{subsection.1.11.1}}
+\newlabel{tutorials/new_tutorial/mkdir:overview}{{1.11.1}{37}{Overview}{subsection.1.11.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.11.2}Creating directories after string substitution in a zoo...}{38}{subsection.1.11.2}}
+\newlabel{tutorials/new_tutorial/mkdir:creating-directories-after-string-substitution-in-a-zoo}{{1.11.2}{38}{Creating directories after string substitution in a zoo..}{subsection.1.11.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{using \emph {formatter()}}{38}{subsubsection*.23}}
+\newlabel{tutorials/new_tutorial/mkdir:using-formatter}{{1.11.2}{38}{using \emph {formatter()}}{subsubsection*.23}{}}
+\@writefile{toc}{\contentsline {subsubsection}{using \emph {regex()}}{39}{subsubsection*.24}}
+\newlabel{tutorials/new_tutorial/mkdir:using-regex}{{1.11.2}{39}{using \emph {regex()}}{subsubsection*.24}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.12}\textbf {Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}{39}{section.1.12}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-checkpointing}{{1.12}{39}{\textbf {Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}{section.1.12}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-0}{{1.12}{39}{\textbf {Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}{section.1.12}{}}
+\newlabel{tutorials/new_tutorial/checkpointing::doc}{{1.12}{39}{\textbf {Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}{section.1.12}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-checkpointing-chapter-num-checkpointing-interrupted-pipelines-and-exceptions}{{1.12}{39}{\textbf {Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}{section.1.12}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.1}Overview}{40}{subsection.1.12.1}}
+\newlabel{tutorials/new_tutorial/checkpointing:overview}{{1.12.1}{40}{Overview}{subsection.1.12.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.2}Interrupting tasks}{40}{subsection.1.12.2}}
+\newlabel{tutorials/new_tutorial/checkpointing:interrupting-tasks}{{1.12.2}{40}{Interrupting tasks}{subsection.1.12.2}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-interrupting-tasks}{{1.12.2}{40}{Interrupting tasks}{subsection.1.12.2}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-1}{{1.12.2}{40}{Interrupting tasks}{subsection.1.12.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.3}Checkpointing: only log completed jobs}{41}{subsection.1.12.3}}
+\newlabel{tutorials/new_tutorial/checkpointing:checkpointing-only-log-completed-jobs}{{1.12.3}{41}{Checkpointing: only log completed jobs}{subsection.1.12.3}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-logging-completed-jobs}{{1.12.3}{41}{Checkpointing: only log completed jobs}{subsection.1.12.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.4}Do not share the same checkpoint file across for multiple pipelines!}{41}{subsection.1.12.4}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-history-files-cannot-be-shared}{{1.12.4}{41}{Do not share the same checkpoint file across for multiple pipelines!}{subsection.1.12.4}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:do-not-share-the-same-checkpoint-file-across-for-multiple-pipelines}{{1.12.4}{41}{Do not share the same checkpoint file across for multiple pipelines!}{subsection.1.12.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.5}Setting checkpoint file names}{41}{subsection.1.12.5}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-changing-history-file-name}{{1.12.5}{41}{Setting checkpoint file names}{subsection.1.12.5}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:setting-checkpoint-file-names}{{1.12.5}{41}{Setting checkpoint file names}{subsection.1.12.5}{}}
+\@writefile{toc}{\contentsline {subsubsection}{environment variable \texttt {DEFAULT\_RUFFUS\_HISTORY\_FILE}}{42}{subsubsection*.25}}
+\newlabel{tutorials/new_tutorial/checkpointing:environment-variable-default-ruffus-history-file}{{1.12.5}{42}{environment variable \texttt {DEFAULT\_RUFFUS\_HISTORY\_FILE}}{subsubsection*.25}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Setting the checkpoint file name manually}{42}{subsubsection*.26}}
+\newlabel{tutorials/new_tutorial/checkpointing:setting-the-checkpoint-file-name-manually}{{1.12.5}{42}{Setting the checkpoint file name manually}{subsubsection*.26}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.6}Useful checkpoint file name policies \texttt {DEFAULT\_RUFFUS\_HISTORY\_FILE}}{42}{subsection.1.12.6}}
+\newlabel{tutorials/new_tutorial/checkpointing:useful-checkpoint-file-name-policies-default-ruffus-history-file}{{1.12.6}{42}{Useful checkpoint file name policies \texttt {DEFAULT\_RUFFUS\_HISTORY\_FILE}}{subsection.1.12.6}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Example 1: same directory, different name}{42}{subsubsection*.27}}
+\newlabel{tutorials/new_tutorial/checkpointing:example-1-same-directory-different-name}{{1.12.6}{42}{Example 1: same directory, different name}{subsubsection*.27}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Example 2: Different directory, same name}{42}{subsubsection*.28}}
+\newlabel{tutorials/new_tutorial/checkpointing:example-2-different-directory-same-name}{{1.12.6}{42}{Example 2: Different directory, same name}{subsubsection*.28}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Example 2: Different directory, same name but keep one level of subdirectory to disambiguate}{43}{subsubsection*.29}}
+\newlabel{tutorials/new_tutorial/checkpointing:example-2-different-directory-same-name-but-keep-one-level-of-subdirectory-to-disambiguate}{{1.12.6}{43}{Example 2: Different directory, same name but keep one level of subdirectory to disambiguate}{subsubsection*.29}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Example 2: nested in common directory}{43}{subsubsection*.30}}
+\newlabel{tutorials/new_tutorial/checkpointing:example-2-nested-in-common-directory}{{1.12.6}{43}{Example 2: nested in common directory}{subsubsection*.30}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.7}Regenerating the checkpoint file}{43}{subsection.1.12.7}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-regenerating-history-file}{{1.12.7}{43}{Regenerating the checkpoint file}{subsection.1.12.7}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:regenerating-the-checkpoint-file}{{1.12.7}{43}{Regenerating the checkpoint file}{subsection.1.12.7}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-2}{{1.12.7}{43}{Regenerating the checkpoint file}{subsection.1.12.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.8}Rules for determining if files are up to date}{43}{subsection.1.12.8}}
+\newlabel{tutorials/new_tutorial/checkpointing:new-manual-skip-up-to-date-rules}{{1.12.8}{43}{Rules for determining if files are up to date}{subsection.1.12.8}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:rules-for-determining-if-files-are-up-to-date}{{1.12.8}{43}{Rules for determining if files are up to date}{subsection.1.12.8}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-3}{{1.12.8}{43}{Rules for determining if files are up to date}{subsection.1.12.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.9}Missing files generate exceptions}{44}{subsection.1.12.9}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-4}{{1.12.9}{44}{Missing files generate exceptions}{subsection.1.12.9}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:missing-files-generate-exceptions}{{1.12.9}{44}{Missing files generate exceptions}{subsection.1.12.9}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.10}Caveats: Coarse Timestamp resolution}{44}{subsection.1.12.10}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-5}{{1.12.10}{44}{Caveats: Coarse Timestamp resolution}{subsection.1.12.10}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:caveats-coarse-timestamp-resolution}{{1.12.10}{44}{Caveats: Coarse Timestamp resolution}{subsection.1.12.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.12.11}Flag files: Checkpointing for the paranoid}{44}{subsection.1.12.11}}
+\newlabel{tutorials/new_tutorial/checkpointing:index-6}{{1.12.11}{44}{Flag files: Checkpointing for the paranoid}{subsection.1.12.11}{}}
+\newlabel{tutorials/new_tutorial/checkpointing:flag-files-checkpointing-for-the-paranoid}{{1.12.11}{44}{Flag files: Checkpointing for the paranoid}{subsection.1.12.11}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.13}\textbf {Chapter 11}: Pipeline topologies and a compendium of \emph {Ruffus} decorators}{44}{section.1.13}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:index-0}{{1.13}{44}{\textbf {Chapter 11}: Pipeline topologies and a compendium of \emph {Ruffus} decorators}{section.1.13}{}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:new-manual-decorators-compendium}{{1.13}{44}{\textbf {Chapter 11}: Pipeline topologies and a compendium of \emph {Ruffus} decorators}{section.1.13}{}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:new-manual-decorators-compendium-chapter-num-pipeline-topologies-and-a-compendium-of-ruffus-decorators}{{1.13}{44}{\textbf {Chapter 11}: Pipeline topologies and a compendium of \emph {Ruffus} decorators}{section.1.13}{}}
+\newlabel{tutorials/new_tutorial/decorators_compendium::doc}{{1.13}{44}{\textbf {Chapter 11}: Pipeline topologies and a compendium of \emph {Ruffus} decorators}{section.1.13}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.1}Overview}{44}{subsection.1.13.1}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:overview}{{1.13.1}{44}{Overview}{subsection.1.13.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.2}\emph {@transform}}{45}{subsection.1.13.2}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:transform}{{1.13.2}{45}{\emph {@transform}}{subsection.1.13.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.3}A bestiary of \emph {Ruffus} decorators}{45}{subsection.1.13.3}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:a-bestiary-of-ruffus-decorators}{{1.13.3}{45}{A bestiary of \emph {Ruffus} decorators}{subsection.1.13.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.4}\emph {@originate}}{45}{subsection.1.13.4}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:originate}{{1.13.4}{45}{\emph {@originate}}{subsection.1.13.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.5}\emph {@merge}}{45}{subsection.1.13.5}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:merge}{{1.13.5}{45}{\emph {@merge}}{subsection.1.13.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.6}\emph {@split}}{45}{subsection.1.13.6}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:split}{{1.13.6}{45}{\emph {@split}}{subsection.1.13.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.7}\emph {@subdivide}}{46}{subsection.1.13.7}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:subdivide}{{1.13.7}{46}{\emph {@subdivide}}{subsection.1.13.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.8}\emph {@collate}}{46}{subsection.1.13.8}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:collate}{{1.13.8}{46}{\emph {@collate}}{subsection.1.13.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.9}Combinatorics}{46}{subsection.1.13.9}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:combinatorics}{{1.13.9}{46}{Combinatorics}{subsection.1.13.9}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.10}\emph {@product}}{47}{subsection.1.13.10}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:product}{{1.13.10}{47}{\emph {@product}}{subsection.1.13.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.11}\emph {@combinations}}{47}{subsection.1.13.11}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:combinations}{{1.13.11}{47}{\emph {@combinations}}{subsection.1.13.11}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.12}\emph {@combinations\_with\_replacement}}{47}{subsection.1.13.12}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:combinations-with-replacement}{{1.13.12}{47}{\emph {@combinations\_with\_replacement}}{subsection.1.13.12}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.13.13}\emph {@permutations}}{47}{subsection.1.13.13}}
+\newlabel{tutorials/new_tutorial/decorators_compendium:permutations}{{1.13.13}{47}{\emph {@permutations}}{subsection.1.13.13}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.14}\textbf {Chapter 12}: Splitting up large tasks / files with \textbf {@split}}{47}{section.1.14}}
+\newlabel{tutorials/new_tutorial/split:index-0}{{1.14}{47}{\textbf {Chapter 12}: Splitting up large tasks / files with \textbf {@split}}{section.1.14}{}}
+\newlabel{tutorials/new_tutorial/split:new-manual-split}{{1.14}{47}{\textbf {Chapter 12}: Splitting up large tasks / files with \textbf {@split}}{section.1.14}{}}
+\newlabel{tutorials/new_tutorial/split::doc}{{1.14}{47}{\textbf {Chapter 12}: Splitting up large tasks / files with \textbf {@split}}{section.1.14}{}}
+\newlabel{tutorials/new_tutorial/split:new-manual-split-chapter-num-splitting-up-large-tasks-files-with-split}{{1.14}{47}{\textbf {Chapter 12}: Splitting up large tasks / files with \textbf {@split}}{section.1.14}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.1}Overview}{48}{subsection.1.14.1}}
+\newlabel{tutorials/new_tutorial/split:overview}{{1.14.1}{48}{Overview}{subsection.1.14.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.2}Example: Calculate variance for a large list of numbers in parallel}{48}{subsection.1.14.2}}
+\newlabel{tutorials/new_tutorial/split:example-calculate-variance-for-a-large-list-of-numbers-in-parallel}{{1.14.2}{48}{Example: Calculate variance for a large list of numbers in parallel}{subsection.1.14.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.3}Output files for \emph {@split}}{48}{subsection.1.14.3}}
+\newlabel{tutorials/new_tutorial/split:output-files-for-split}{{1.14.3}{48}{Output files for \emph {@split}}{subsection.1.14.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.4}Be careful in specifying \textbf {Output} globs}{49}{subsection.1.14.4}}
+\newlabel{tutorials/new_tutorial/split:be-careful-in-specifying-output-globs}{{1.14.4}{49}{Be careful in specifying \textbf {Output} globs}{subsection.1.14.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.5}Clean up previous pipeline runs}{49}{subsection.1.14.5}}
+\newlabel{tutorials/new_tutorial/split:clean-up-previous-pipeline-runs}{{1.14.5}{49}{Clean up previous pipeline runs}{subsection.1.14.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.6}1 to many}{49}{subsection.1.14.6}}
+\newlabel{tutorials/new_tutorial/split:to-many}{{1.14.6}{49}{1 to many}{subsection.1.14.6}{}}
+\newlabel{tutorials/new_tutorial/split:new-manual-split-one-to-many}{{1.14.6}{49}{1 to many}{subsection.1.14.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.14.7}Nothing to many}{50}{subsection.1.14.7}}
+\newlabel{tutorials/new_tutorial/split:new-manual-split-nothing-to-many}{{1.14.7}{50}{Nothing to many}{subsection.1.14.7}{}}
+\newlabel{tutorials/new_tutorial/split:nothing-to-many}{{1.14.7}{50}{Nothing to many}{subsection.1.14.7}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.15}\textbf {Chapter 13}: \texttt {@merge} multiple input into a single result}{51}{section.1.15}}
+\newlabel{tutorials/new_tutorial/merge:index-0}{{1.15}{51}{\textbf {Chapter 13}: \texttt {@merge} multiple input into a single result}{section.1.15}{}}
+\newlabel{tutorials/new_tutorial/merge:new-manual-merge}{{1.15}{51}{\textbf {Chapter 13}: \texttt {@merge} multiple input into a single result}{section.1.15}{}}
+\newlabel{tutorials/new_tutorial/merge::doc}{{1.15}{51}{\textbf {Chapter 13}: \texttt {@merge} multiple input into a single result}{section.1.15}{}}
+\newlabel{tutorials/new_tutorial/merge:new-manual-merge-chapter-num-merge-multiple-input-into-a-single-result}{{1.15}{51}{\textbf {Chapter 13}: \texttt {@merge} multiple input into a single result}{section.1.15}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.15.1}Overview of \emph {@merge}}{51}{subsection.1.15.1}}
+\newlabel{tutorials/new_tutorial/merge:overview-of-merge}{{1.15.1}{51}{Overview of \emph {@merge}}{subsection.1.15.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.15.2}\emph {@merge} is a many to one operator}{51}{subsection.1.15.2}}
+\newlabel{tutorials/new_tutorial/merge:merge-is-a-many-to-one-operator}{{1.15.2}{51}{\emph {@merge} is a many to one operator}{subsection.1.15.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.15.3}Example: Combining partial solutions: Calculating variances}{51}{subsection.1.15.3}}
+\newlabel{tutorials/new_tutorial/merge:example-combining-partial-solutions-calculating-variances}{{1.15.3}{51}{Example: Combining partial solutions: Calculating variances}{subsection.1.15.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.16}\textbf {Chapter 14}: Multiprocessing, \texttt {drmaa} and Computation Clusters}{53}{section.1.16}}
+\newlabel{tutorials/new_tutorial/multiprocessing:index-0}{{1.16}{53}{\textbf {Chapter 14}: Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.16}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing::doc}{{1.16}{53}{\textbf {Chapter 14}: Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.16}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:new-manual-multiprocessing}{{1.16}{53}{\textbf {Chapter 14}: Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.16}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:new-manual-multiprocessing-chapter-num-multiprocessing-drmaa-and-computation-clusters}{{1.16}{53}{\textbf {Chapter 14}: Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.16}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.16.1}Overview}{53}{subsection.1.16.1}}
+\newlabel{tutorials/new_tutorial/multiprocessing:overview}{{1.16.1}{53}{Overview}{subsection.1.16.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Multi Processing}{53}{subsubsection*.31}}
+\newlabel{tutorials/new_tutorial/multiprocessing:multi-processing}{{1.16.1}{53}{Multi Processing}{subsubsection*.31}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:index-1}{{1.16.1}{53}{Multi Processing}{subsubsection*.31}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Data sharing}{53}{subsubsection*.32}}
+\newlabel{tutorials/new_tutorial/multiprocessing:index-2}{{1.16.1}{53}{Data sharing}{subsubsection*.32}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:data-sharing}{{1.16.1}{53}{Data sharing}{subsubsection*.32}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.16.2}Restricting parallelism with \emph {@jobs\_limit}}{53}{subsection.1.16.2}}
+\newlabel{tutorials/new_tutorial/multiprocessing:new-manual-jobs-limit}{{1.16.2}{53}{Restricting parallelism with \emph {@jobs\_limit}}{subsection.1.16.2}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:restricting-parallelism-with-jobs-limit}{{1.16.2}{53}{Restricting parallelism with \emph {@jobs\_limit}}{subsection.1.16.2}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:index-3}{{1.16.2}{53}{Restricting parallelism with \emph {@jobs\_limit}}{subsection.1.16.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.16.3}Using \texttt {drmaa} to dispatch work to Computational Clusters or Grid engines from Ruffus jobs}{54}{subsection.1.16.3}}
+\newlabel{tutorials/new_tutorial/multiprocessing:using-drmaa-to-dispatch-work-to-computational-clusters-or-grid-engines-from-ruffus-jobs}{{1.16.3}{54}{Using \texttt {drmaa} to dispatch work to Computational Clusters or Grid engines from Ruffus jobs}{subsection.1.16.3}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:new-manual-ruffus-drmaa-wrapper-run-job}{{1.16.3}{54}{Using \texttt {drmaa} to dispatch work to Computational Clusters or Grid engines from Ruffus jobs}{subsection.1.16.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1) Use a shared drmaa session:}{54}{subsubsection*.33}}
+\newlabel{tutorials/new_tutorial/multiprocessing:use-a-shared-drmaa-session}{{1.16.3}{54}{1) Use a shared drmaa session:}{subsubsection*.33}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2) import \texttt {ruffus.drmaa\_wrapper}}{54}{subsubsection*.34}}
+\newlabel{tutorials/new_tutorial/multiprocessing:import-ruffus-drmaa-wrapper}{{1.16.3}{54}{2) import \texttt {ruffus.drmaa\_wrapper}}{subsubsection*.34}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3) call \emph {drmaa\_wrapper.run\_job()}}{54}{subsubsection*.35}}
+\newlabel{tutorials/new_tutorial/multiprocessing:call-drmaa-wrapper-run-job}{{1.16.3}{54}{3) call \emph {drmaa\_wrapper.run\_job()}}{subsubsection*.35}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4) Use multithread: \emph {pipeline\_run(multithread = NNN)}}{55}{subsubsection*.36}}
+\newlabel{tutorials/new_tutorial/multiprocessing:use-multithread-pipeline-run-multithread-nnn}{{1.16.3}{55}{4) Use multithread: \emph {pipeline\_run(multithread = NNN)}}{subsubsection*.36}{}}
+\@writefile{toc}{\contentsline {subsubsection}{5) Develop locally}{55}{subsubsection*.37}}
+\newlabel{tutorials/new_tutorial/multiprocessing:develop-locally}{{1.16.3}{55}{5) Develop locally}{subsubsection*.37}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.16.4}Forcing a pipeline to appear up to date}{56}{subsection.1.16.4}}
+\newlabel{tutorials/new_tutorial/multiprocessing:index-4}{{1.16.4}{56}{Forcing a pipeline to appear up to date}{subsection.1.16.4}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:forcing-a-pipeline-to-appear-up-to-date}{{1.16.4}{56}{Forcing a pipeline to appear up to date}{subsection.1.16.4}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing:new-manual-pipeline-run-touch}{{1.16.4}{56}{Forcing a pipeline to appear up to date}{subsection.1.16.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.17}\textbf {Chapter 15}: Logging progress through a pipeline}{56}{section.1.17}}
+\newlabel{tutorials/new_tutorial/logging:new-manual-logging-chapter-num-logging-progress-through-a-pipeline}{{1.17}{56}{\textbf {Chapter 15}: Logging progress through a pipeline}{section.1.17}{}}
+\newlabel{tutorials/new_tutorial/logging:index-0}{{1.17}{56}{\textbf {Chapter 15}: Logging progress through a pipeline}{section.1.17}{}}
+\newlabel{tutorials/new_tutorial/logging::doc}{{1.17}{56}{\textbf {Chapter 15}: Logging progress through a pipeline}{section.1.17}{}}
+\newlabel{tutorials/new_tutorial/logging:new-manual-logging}{{1.17}{56}{\textbf {Chapter 15}: Logging progress through a pipeline}{section.1.17}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.17.1}Overview}{56}{subsection.1.17.1}}
+\newlabel{tutorials/new_tutorial/logging:overview}{{1.17.1}{56}{Overview}{subsection.1.17.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.17.2}Logging task/job completion}{57}{subsection.1.17.2}}
+\newlabel{tutorials/new_tutorial/logging:new-manual-logging-pipeline}{{1.17.2}{57}{Logging task/job completion}{subsection.1.17.2}{}}
+\newlabel{tutorials/new_tutorial/logging:logging-task-job-completion}{{1.17.2}{57}{Logging task/job completion}{subsection.1.17.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Controlling logging verbosity}{57}{subsubsection*.38}}
+\newlabel{tutorials/new_tutorial/logging:controlling-logging-verbosity}{{1.17.2}{57}{Controlling logging verbosity}{subsubsection*.38}{}}
+\newlabel{tutorials/new_tutorial/logging:index-1}{{1.17.2}{57}{Controlling logging verbosity}{subsubsection*.38}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.17.3}Use \emph {ruffus.cmdline}}{57}{subsection.1.17.3}}
+\newlabel{tutorials/new_tutorial/logging:use-ruffus-cmdline}{{1.17.3}{57}{Use \emph {ruffus.cmdline}}{subsection.1.17.3}{}}
+\newlabel{tutorials/new_tutorial/logging:index-2}{{1.17.3}{57}{Use \emph {ruffus.cmdline}}{subsection.1.17.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.17.4}Customising logging}{57}{subsection.1.17.4}}
+\newlabel{tutorials/new_tutorial/logging:customising-logging}{{1.17.4}{57}{Customising logging}{subsection.1.17.4}{}}
+\newlabel{tutorials/new_tutorial/logging:index-3}{{1.17.4}{57}{Customising logging}{subsection.1.17.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.17.5}Log your own messages}{58}{subsection.1.17.5}}
+\newlabel{tutorials/new_tutorial/logging:log-your-own-messages}{{1.17.5}{58}{Log your own messages}{subsection.1.17.5}{}}
+\newlabel{tutorials/new_tutorial/logging:index-4}{{1.17.5}{58}{Log your own messages}{subsection.1.17.5}{}}
+\newlabel{tutorials/new_tutorial/logging:new-manual-logging-per-job}{{1.17.5}{58}{Log your own messages}{subsection.1.17.5}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1. Set up logging}{58}{subsubsection*.39}}
+\newlabel{tutorials/new_tutorial/logging:new-manual-sharing-proxy-object}{{1.17.5}{58}{1. Set up logging}{subsubsection*.39}{}}
+\newlabel{tutorials/new_tutorial/logging:set-up-logging}{{1.17.5}{58}{1. Set up logging}{subsubsection*.39}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2. Share the proxy}{58}{subsubsection*.40}}
+\newlabel{tutorials/new_tutorial/logging:share-the-proxy}{{1.17.5}{58}{2. Share the proxy}{subsubsection*.40}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.18}\textbf {Chapter 16}: \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{59}{section.1.18}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:new-manual-subdivide-collate-chapter-num-subdivide-tasks-to-run-efficiently-and-regroup-with-collate}{{1.18}{59}{\textbf {Chapter 16}: \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.18}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:index-0}{{1.18}{59}{\textbf {Chapter 16}: \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.18}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate::doc}{{1.18}{59}{\textbf {Chapter 16}: \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.18}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:new-manual-subdivide-collate}{{1.18}{59}{\textbf {Chapter 16}: \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.18}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.18.1}Overview}{59}{subsection.1.18.1}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:overview}{{1.18.1}{59}{Overview}{subsection.1.18.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.18.2}\emph {@subdivide} in parallel}{59}{subsection.1.18.2}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:new-manual-subdivide}{{1.18.2}{59}{\emph {@subdivide} in parallel}{subsection.1.18.2}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:subdivide-in-parallel}{{1.18.2}{59}{\emph {@subdivide} in parallel}{subsection.1.18.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.18.3}Grouping using \emph {@collate}}{61}{subsection.1.18.3}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:new-manual-collate}{{1.18.3}{61}{Grouping using \emph {@collate}}{subsection.1.18.3}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate:grouping-using-collate}{{1.18.3}{61}{Grouping using \emph {@collate}}{subsection.1.18.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.19}\textbf {Chapter 17}: \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{62}{section.1.19}}
+\newlabel{tutorials/new_tutorial/combinatorics:index-0}{{1.19}{62}{\textbf {Chapter 17}: \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.19}{}}
+\newlabel{tutorials/new_tutorial/combinatorics:new-manual-combinatorics}{{1.19}{62}{\textbf {Chapter 17}: \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.19}{}}
+\newlabel{tutorials/new_tutorial/combinatorics::doc}{{1.19}{62}{\textbf {Chapter 17}: \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.19}{}}
+\newlabel{tutorials/new_tutorial/combinatorics:new-manual-combinatorics-chapter-num-combinations-permutations-and-all-versus-all-product}{{1.19}{62}{\textbf {Chapter 17}: \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.19}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.19.1}Overview}{62}{subsection.1.19.1}}
+\newlabel{tutorials/new_tutorial/combinatorics:overview}{{1.19.1}{62}{Overview}{subsection.1.19.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.19.2}Generating output with \emph {formatter()}}{63}{subsection.1.19.2}}
+\newlabel{tutorials/new_tutorial/combinatorics:generating-output-with-formatter}{{1.19.2}{63}{Generating output with \emph {formatter()}}{subsection.1.19.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.19.3}All vs all comparisons with \emph {@product}}{63}{subsection.1.19.3}}
+\newlabel{tutorials/new_tutorial/combinatorics:all-vs-all-comparisons-with-product}{{1.19.3}{63}{All vs all comparisons with \emph {@product}}{subsection.1.19.3}{}}
+\newlabel{tutorials/new_tutorial/combinatorics:new-manual-product}{{1.19.3}{63}{All vs all comparisons with \emph {@product}}{subsection.1.19.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.19.4}Permute all k-tuple orderings of inputs without repeats using \emph {@permutations}}{65}{subsection.1.19.4}}
+\newlabel{tutorials/new_tutorial/combinatorics:permute-all-k-tuple-orderings-of-inputs-without-repeats-using-permutations}{{1.19.4}{65}{Permute all k-tuple orderings of inputs without repeats using \emph {@permutations}}{subsection.1.19.4}{}}
+\newlabel{tutorials/new_tutorial/combinatorics:new-manual-permutations}{{1.19.4}{65}{Permute all k-tuple orderings of inputs without repeats using \emph {@permutations}}{subsection.1.19.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.19.5}Select unordered k-tuples within inputs excluding repeated elements using \emph {@combinations}}{66}{subsection.1.19.5}}
+\newlabel{tutorials/new_tutorial/combinatorics:new-manual-combinations}{{1.19.5}{66}{Select unordered k-tuples within inputs excluding repeated elements using \emph {@combinations}}{subsection.1.19.5}{}}
+\newlabel{tutorials/new_tutorial/combinatorics:select-unordered-k-tuples-within-inputs-excluding-repeated-elements-using-combinations}{{1.19.5}{66}{Select unordered k-tuples within inputs excluding repeated elements using \emph {@combinations}}{subsection.1.19.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.19.6}Select unordered k-tuples within inputs \emph {including} repeated elements with \emph {@combinations\_with\_replacement}}{67}{subsection.1.19.6}}
+\newlabel{tutorials/new_tutorial/combinatorics:select-unordered-k-tuples-within-inputs-including-repeated-elements-with-combinations-with-replacement}{{1.19.6}{67}{Select unordered k-tuples within inputs \emph {including} repeated elements with \emph {@combinations\_with\_replacement}}{subsection.1.19.6}{}}
+\newlabel{tutorials/new_tutorial/combinatorics:new-manual-combinations-with-replacement}{{1.19.6}{67}{Select unordered k-tuples within inputs \emph {including} repeated elements with \emph {@combinations\_with\_replacement}}{subsection.1.19.6}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.20}\textbf {Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph {@active\_if}}{69}{section.1.20}}
+\newlabel{tutorials/new_tutorial/active_if:new-manual-active-if}{{1.20}{69}{\textbf {Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph {@active\_if}}{section.1.20}{}}
+\newlabel{tutorials/new_tutorial/active_if:index-0}{{1.20}{69}{\textbf {Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph {@active\_if}}{section.1.20}{}}
+\newlabel{tutorials/new_tutorial/active_if::doc}{{1.20}{69}{\textbf {Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph {@active\_if}}{section.1.20}{}}
+\newlabel{tutorials/new_tutorial/active_if:new-manual-active-if-chapter-num-turning-parts-of-the-pipeline-on-and-off-at-runtime-with-active-if}{{1.20}{69}{\textbf {Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph {@active\_if}}{section.1.20}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.20.1}Overview}{69}{subsection.1.20.1}}
+\newlabel{tutorials/new_tutorial/active_if:overview}{{1.20.1}{69}{Overview}{subsection.1.20.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.20.2}\emph {@active\_if} controls the state of tasks}{69}{subsection.1.20.2}}
+\newlabel{tutorials/new_tutorial/active_if:active-if-controls-the-state-of-tasks}{{1.20.2}{69}{\emph {@active\_if} controls the state of tasks}{subsection.1.20.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.21}\textbf {Chapter 19}: Signal the completion of each stage of our pipeline with \emph {@posttask}}{71}{section.1.21}}
+\newlabel{tutorials/new_tutorial/posttask:new-manual-posttask}{{1.21}{71}{\textbf {Chapter 19}: Signal the completion of each stage of our pipeline with \emph {@posttask}}{section.1.21}{}}
+\newlabel{tutorials/new_tutorial/posttask:index-0}{{1.21}{71}{\textbf {Chapter 19}: Signal the completion of each stage of our pipeline with \emph {@posttask}}{section.1.21}{}}
+\newlabel{tutorials/new_tutorial/posttask::doc}{{1.21}{71}{\textbf {Chapter 19}: Signal the completion of each stage of our pipeline with \emph {@posttask}}{section.1.21}{}}
+\newlabel{tutorials/new_tutorial/posttask:new-manual-posttask-chapter-num-signal-the-completion-of-each-stage-of-our-pipeline-with-posttask}{{1.21}{71}{\textbf {Chapter 19}: Signal the completion of each stage of our pipeline with \emph {@posttask}}{section.1.21}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.21.1}Overview}{71}{subsection.1.21.1}}
+\newlabel{tutorials/new_tutorial/posttask:overview}{{1.21.1}{71}{Overview}{subsection.1.21.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\textbf {@posttask}}{71}{subsubsection*.41}}
+\newlabel{tutorials/new_tutorial/posttask:posttask}{{1.21.1}{71}{\textbf {@posttask}}{subsubsection*.41}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {touch\_file}}{72}{subsubsection*.42}}
+\newlabel{tutorials/new_tutorial/posttask:touch-file}{{1.21.1}{72}{\emph {touch\_file}}{subsubsection*.42}{}}
+\newlabel{tutorials/new_tutorial/posttask:index-1}{{1.21.1}{72}{\emph {touch\_file}}{subsubsection*.42}{}}
+\newlabel{tutorials/new_tutorial/posttask:new-manual-posttask-touch-file}{{1.21.1}{72}{\emph {touch\_file}}{subsubsection*.42}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Adding several post task actions}{72}{subsubsection*.43}}
+\newlabel{tutorials/new_tutorial/posttask:adding-several-post-task-actions}{{1.21.1}{72}{Adding several post task actions}{subsubsection*.43}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.22}\textbf {Chapter 20}: Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{72}{section.1.22}}
+\newlabel{tutorials/new_tutorial/inputs:new-manual-inputs}{{1.22}{72}{\textbf {Chapter 20}: Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.22}{}}
+\newlabel{tutorials/new_tutorial/inputs:index-0}{{1.22}{72}{\textbf {Chapter 20}: Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.22}{}}
+\newlabel{tutorials/new_tutorial/inputs:new-manual-inputs-chapter-num-manipulating-task-inputs-via-string-substitution-using-inputs-and-add-inputs}{{1.22}{72}{\textbf {Chapter 20}: Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.22}{}}
+\newlabel{tutorials/new_tutorial/inputs::doc}{{1.22}{72}{\textbf {Chapter 20}: Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.22}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.22.1}Overview}{72}{subsection.1.22.1}}
+\newlabel{tutorials/new_tutorial/inputs:overview}{{1.22.1}{72}{Overview}{subsection.1.22.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.22.2}Adding additional \emph {input} prerequisites per job with \emph {add\_inputs()}}{73}{subsection.1.22.2}}
+\newlabel{tutorials/new_tutorial/inputs:adding-additional-input-prerequisites-per-job-with-add-inputs}{{1.22.2}{73}{Adding additional \emph {input} prerequisites per job with \emph {add\_inputs()}}{subsection.1.22.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1. Example: compiling c++ code}{73}{subsubsection*.44}}
+\newlabel{tutorials/new_tutorial/inputs:example-compiling-c-code}{{1.22.2}{73}{1. Example: compiling c++ code}{subsubsection*.44}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2. Example: Adding a common header file with \emph {add\_inputs()}}{73}{subsubsection*.45}}
+\newlabel{tutorials/new_tutorial/inputs:example-adding-a-common-header-file-with-add-inputs}{{1.22.2}{73}{2. Example: Adding a common header file with \emph {add\_inputs()}}{subsubsection*.45}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3. Example: Additional \emph {Input} can be tasks}{74}{subsubsection*.46}}
+\newlabel{tutorials/new_tutorial/inputs:example-additional-input-can-be-tasks}{{1.22.2}{74}{3. Example: Additional \emph {Input} can be tasks}{subsubsection*.46}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{74}{subsubsection*.47}}
+\newlabel{tutorials/new_tutorial/inputs:example-add-corresponding-files-using-add-inputs-with-formatter-or-regex}{{1.22.2}{74}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{subsubsection*.47}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.22.3}Replacing all input parameters with \emph {inputs()}}{75}{subsection.1.22.3}}
+\newlabel{tutorials/new_tutorial/inputs:replacing-all-input-parameters-with-inputs}{{1.22.3}{75}{Replacing all input parameters with \emph {inputs()}}{subsection.1.22.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{5. Example: Running matching python scripts using \emph {inputs()}}{75}{subsubsection*.48}}
+\newlabel{tutorials/new_tutorial/inputs:example-running-matching-python-scripts-using-inputs}{{1.22.3}{75}{5. Example: Running matching python scripts using \emph {inputs()}}{subsubsection*.48}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.23}\textbf {Chapter 21}: Esoteric: Generating parameters on the fly with \emph {@files}}{75}{section.1.23}}
+\newlabel{tutorials/new_tutorial/onthefly:index-0}{{1.23}{75}{\textbf {Chapter 21}: Esoteric: Generating parameters on the fly with \emph {@files}}{section.1.23}{}}
+\newlabel{tutorials/new_tutorial/onthefly::doc}{{1.23}{75}{\textbf {Chapter 21}: Esoteric: Generating parameters on the fly with \emph {@files}}{section.1.23}{}}
+\newlabel{tutorials/new_tutorial/onthefly:new-manual-on-the-fly-chapter-num-esoteric-generating-parameters-on-the-fly-with-files}{{1.23}{75}{\textbf {Chapter 21}: Esoteric: Generating parameters on the fly with \emph {@files}}{section.1.23}{}}
+\newlabel{tutorials/new_tutorial/onthefly:new-manual-on-the-fly}{{1.23}{75}{\textbf {Chapter 21}: Esoteric: Generating parameters on the fly with \emph {@files}}{section.1.23}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.23.1}Overview}{76}{subsection.1.23.1}}
+\newlabel{tutorials/new_tutorial/onthefly:overview}{{1.23.1}{76}{Overview}{subsection.1.23.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.23.2}\emph {@files} syntax}{76}{subsection.1.23.2}}
+\newlabel{tutorials/new_tutorial/onthefly:index-1}{{1.23.2}{76}{\emph {@files} syntax}{subsection.1.23.2}{}}
+\newlabel{tutorials/new_tutorial/onthefly:files-syntax}{{1.23.2}{76}{\emph {@files} syntax}{subsection.1.23.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.23.3}A Cartesian Product, all vs all example}{77}{subsection.1.23.3}}
+\newlabel{tutorials/new_tutorial/onthefly:a-cartesian-product-all-vs-all-example}{{1.23.3}{77}{A Cartesian Product, all vs all example}{subsection.1.23.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.24}\textbf {Chapter 22}: Esoteric: Running jobs in parallel without files using \emph {@parallel}}{78}{section.1.24}}
+\newlabel{tutorials/new_tutorial/parallel:new-manual-parallel-chapter-num-esoteric-running-jobs-in-parallel-without-files-using-parallel}{{1.24}{78}{\textbf {Chapter 22}: Esoteric: Running jobs in parallel without files using \emph {@parallel}}{section.1.24}{}}
+\newlabel{tutorials/new_tutorial/parallel:index-0}{{1.24}{78}{\textbf {Chapter 22}: Esoteric: Running jobs in parallel without files using \emph {@parallel}}{section.1.24}{}}
+\newlabel{tutorials/new_tutorial/parallel::doc}{{1.24}{78}{\textbf {Chapter 22}: Esoteric: Running jobs in parallel without files using \emph {@parallel}}{section.1.24}{}}
+\newlabel{tutorials/new_tutorial/parallel:new-manual-deprecated-parallel}{{1.24}{78}{\textbf {Chapter 22}: Esoteric: Running jobs in parallel without files using \emph {@parallel}}{section.1.24}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.24.1}\textbf {@parallel}}{78}{subsection.1.24.1}}
+\newlabel{tutorials/new_tutorial/parallel:parallel}{{1.24.1}{78}{\textbf {@parallel}}{subsection.1.24.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.25}\textbf {Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph {@check\_if\_uptodate}}{79}{section.1.25}}
+\newlabel{tutorials/new_tutorial/check_if_uptodate:index-0}{{1.25}{79}{\textbf {Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph {@check\_if\_uptodate}}{section.1.25}{}}
+\newlabel{tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate}{{1.25}{79}{\textbf {Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph {@check\_if\_uptodate}}{section.1.25}{}}
+\newlabel{tutorials/new_tutorial/check_if_uptodate::doc}{{1.25}{79}{\textbf {Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph {@check\_if\_uptodate}}{section.1.25}{}}
+\newlabel{tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate-chapter-num-esoteric-writing-custom-functions-to-decide-which-jobs-are-up-to-date-with-check-if-uptodate}{{1.25}{79}{\textbf {Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph {@check\_if\_uptodate}}{section.1.25}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.25.1}\textbf {@check\_if\_uptodate} : Manual dependency checking}{79}{subsection.1.25.1}}
+\newlabel{tutorials/new_tutorial/check_if_uptodate:check-if-uptodate-manual-dependency-checking}{{1.25.1}{79}{\textbf {@check\_if\_uptodate} : Manual dependency checking}{subsection.1.25.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.26}\textbf {Appendix 1}: Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{80}{section.1.26}}
+\newlabel{tutorials/new_tutorial/flowchart_colours:index-0}{{1.26}{80}{\textbf {Appendix 1}: Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.26}{}}
+\newlabel{tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours-chapter-num-flow-chart-colours-with-pipeline-printout-graph}{{1.26}{80}{\textbf {Appendix 1}: Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.26}{}}
+\newlabel{tutorials/new_tutorial/flowchart_colours::doc}{{1.26}{80}{\textbf {Appendix 1}: Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.26}{}}
+\newlabel{tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours}{{1.26}{80}{\textbf {Appendix 1}: Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.26}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.26.1}Flowchart colours}{80}{subsection.1.26.1}}
+\newlabel{tutorials/new_tutorial/flowchart_colours:flowchart-colours}{{1.26.1}{80}{Flowchart colours}{subsection.1.26.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.27}\textbf {Appendix 2}: How dependency is checked}{81}{section.1.27}}
+\newlabel{tutorials/new_tutorial/dependencies:new-manual-dependencies-chapter-num-how-dependency-is-checked}{{1.27}{81}{\textbf {Appendix 2}: How dependency is checked}{section.1.27}{}}
+\newlabel{tutorials/new_tutorial/dependencies:index-0}{{1.27}{81}{\textbf {Appendix 2}: How dependency is checked}{section.1.27}{}}
+\newlabel{tutorials/new_tutorial/dependencies::doc}{{1.27}{81}{\textbf {Appendix 2}: How dependency is checked}{section.1.27}{}}
+\newlabel{tutorials/new_tutorial/dependencies:new-manual-dependencies}{{1.27}{81}{\textbf {Appendix 2}: How dependency is checked}{section.1.27}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.27.1}Overview}{81}{subsection.1.27.1}}
+\newlabel{tutorials/new_tutorial/dependencies:overview}{{1.27.1}{81}{Overview}{subsection.1.27.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Running all out-of-date tasks and dependents}{81}{subsubsection*.49}}
+\newlabel{tutorials/new_tutorial/dependencies:running-all-out-of-date-tasks-and-dependents}{{1.27.1}{81}{Running all out-of-date tasks and dependents}{subsubsection*.49}{}}
+\newlabel{tutorials/new_tutorial/dependencies:new-manual-dependencies-checking-multiple-times}{{1.27.1}{82}{Running all out-of-date tasks and dependents}{section*.50}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Forced Reruns}{82}{subsubsection*.51}}
+\newlabel{tutorials/new_tutorial/dependencies:forced-reruns}{{1.27.1}{82}{Forced Reruns}{subsubsection*.51}{}}
+\newlabel{tutorials/new_tutorial/dependencies:new-manual-dependencies-forced-reruns}{{1.27.1}{82}{Forced Reruns}{subsubsection*.51}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Esoteric option: Minimal Reruns}{82}{subsubsection*.52}}
+\newlabel{tutorials/new_tutorial/dependencies:new-manual-dependencies-minimal-reruns}{{1.27.1}{82}{Esoteric option: Minimal Reruns}{subsubsection*.52}{}}
+\newlabel{tutorials/new_tutorial/dependencies:esoteric-option-minimal-reruns}{{1.27.1}{82}{Esoteric option: Minimal Reruns}{subsubsection*.52}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.28}\textbf {Appendix 3}: Exceptions thrown inside pipelines}{83}{section.1.28}}
+\newlabel{tutorials/new_tutorial/exceptions:new-manual-exceptions-chapter-num-exceptions-thrown-inside-pipelines}{{1.28}{83}{\textbf {Appendix 3}: Exceptions thrown inside pipelines}{section.1.28}{}}
+\newlabel{tutorials/new_tutorial/exceptions:new-manual-exceptions}{{1.28}{83}{\textbf {Appendix 3}: Exceptions thrown inside pipelines}{section.1.28}{}}
+\newlabel{tutorials/new_tutorial/exceptions:index-0}{{1.28}{83}{\textbf {Appendix 3}: Exceptions thrown inside pipelines}{section.1.28}{}}
+\newlabel{tutorials/new_tutorial/exceptions::doc}{{1.28}{83}{\textbf {Appendix 3}: Exceptions thrown inside pipelines}{section.1.28}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.28.1}Overview}{83}{subsection.1.28.1}}
+\newlabel{tutorials/new_tutorial/exceptions:overview}{{1.28.1}{83}{Overview}{subsection.1.28.1}{}}
+\newlabel{tutorials/new_tutorial/exceptions:new-manual-exceptions-multiple-errors}{{1.28.1}{84}{Overview}{section*.53}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.28.2}Pipelines running in parallel accumulate Exceptions}{84}{subsection.1.28.2}}
+\newlabel{tutorials/new_tutorial/exceptions:index-1}{{1.28.2}{84}{Pipelines running in parallel accumulate Exceptions}{subsection.1.28.2}{}}
+\newlabel{tutorials/new_tutorial/exceptions:pipelines-running-in-parallel-accumulate-exceptions}{{1.28.2}{84}{Pipelines running in parallel accumulate Exceptions}{subsection.1.28.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.28.3}Terminate pipeline immediately upon Exceptions}{84}{subsection.1.28.3}}
+\newlabel{tutorials/new_tutorial/exceptions:terminate-pipeline-immediately-upon-exceptions}{{1.28.3}{84}{Terminate pipeline immediately upon Exceptions}{subsection.1.28.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Set \emph {pipeline\_run(exceptions\_terminate\_immediately = True)}}{84}{subsubsection*.54}}
+\newlabel{tutorials/new_tutorial/exceptions:set-pipeline-run-exceptions-terminate-immediately-true}{{1.28.3}{84}{Set \emph {pipeline\_run(exceptions\_terminate\_immediately = True)}}{subsubsection*.54}{}}
+\@writefile{toc}{\contentsline {subsubsection}{raise \texttt {Ruffus.JobSignalledBreak}}{84}{subsubsection*.55}}
+\newlabel{tutorials/new_tutorial/exceptions:raise-ruffus-jobsignalledbreak}{{1.28.3}{84}{raise \texttt {Ruffus.JobSignalledBreak}}{subsubsection*.55}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.28.4}Display exceptions as they occur}{85}{subsection.1.28.4}}
+\newlabel{tutorials/new_tutorial/exceptions:display-exceptions-as-they-occur}{{1.28.4}{85}{Display exceptions as they occur}{subsection.1.28.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.29}\textbf {Appendix 4}: Names exported from Ruffus}{85}{section.1.29}}
+\newlabel{tutorials/new_tutorial/list_of_ruffus_names:new-manual-ruffus-names-chapter-num-names-exported-from-ruffus}{{1.29}{85}{\textbf {Appendix 4}: Names exported from Ruffus}{section.1.29}{}}
+\newlabel{tutorials/new_tutorial/list_of_ruffus_names:index-0}{{1.29}{85}{\textbf {Appendix 4}: Names exported from Ruffus}{section.1.29}{}}
+\newlabel{tutorials/new_tutorial/list_of_ruffus_names::doc}{{1.29}{85}{\textbf {Appendix 4}: Names exported from Ruffus}{section.1.29}{}}
+\newlabel{tutorials/new_tutorial/list_of_ruffus_names:new-manual-ruffus-names}{{1.29}{85}{\textbf {Appendix 4}: Names exported from Ruffus}{section.1.29}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.29.1}Ruffus Names}{85}{subsection.1.29.1}}
+\newlabel{tutorials/new_tutorial/list_of_ruffus_names:ruffus-names}{{1.29.1}{85}{Ruffus Names}{subsection.1.29.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.30}\textbf {Appendix 5}: \textbf {@files}: Deprecated syntax}{87}{section.1.30}}
+\newlabel{tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files}{{1.30}{87}{\textbf {Appendix 5}: \textbf {@files}: Deprecated syntax}{section.1.30}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:index-0}{{1.30}{87}{\textbf {Appendix 5}: \textbf {@files}: Deprecated syntax}{section.1.30}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files::doc}{{1.30}{87}{\textbf {Appendix 5}: \textbf {@files}: Deprecated syntax}{section.1.30}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files-chapter-num-files-deprecated-syntax}{{1.30}{87}{\textbf {Appendix 5}: \textbf {@files}: Deprecated syntax}{section.1.30}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.30.1}Overview}{87}{subsection.1.30.1}}
+\newlabel{tutorials/new_tutorial/deprecated_files:overview}{{1.30.1}{87}{Overview}{subsection.1.30.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.30.2}\textbf {@files}}{87}{subsection.1.30.2}}
+\newlabel{tutorials/new_tutorial/deprecated_files:files}{{1.30.2}{87}{\textbf {@files}}{subsection.1.30.2}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:index-1}{{1.30.2}{87}{\textbf {@files}}{subsection.1.30.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.30.3}Running the same code on different parameters in parallel}{88}{subsection.1.30.3}}
+\newlabel{tutorials/new_tutorial/deprecated_files:new-manual-files-parallel}{{1.30.3}{88}{Running the same code on different parameters in parallel}{subsection.1.30.3}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:index-2}{{1.30.3}{88}{Running the same code on different parameters in parallel}{subsection.1.30.3}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:running-the-same-code-on-different-parameters-in-parallel}{{1.30.3}{88}{Running the same code on different parameters in parallel}{subsection.1.30.3}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:new-manual-files-is-uptodate}{{1.30.3}{89}{Running the same code on different parameters in parallel}{section*.56}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Checking if jobs are up to date}{89}{subsubsection*.57}}
+\newlabel{tutorials/new_tutorial/deprecated_files:new-manual-files-is-uptodate}{{1.30.3}{89}{Checking if jobs are up to date}{subsubsection*.57}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:checking-if-jobs-are-up-to-date}{{1.30.3}{89}{Checking if jobs are up to date}{subsubsection*.57}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:new-manual-files-example}{{1.30.3}{89}{Checking if jobs are up to date}{subsubsection*.57}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files:index-3}{{1.30.3}{89}{Checking if jobs are up to date}{subsubsection*.57}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.31}\textbf {Appendix 6}: \textbf {@files\_re}: Deprecated \emph {syntax using regular expressions}}{90}{section.1.31}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:new-manual-deprecated-files-re}{{1.31}{90}{\textbf {Appendix 6}: \textbf {@files\_re}: Deprecated \emph {syntax using regular expressions}}{section.1.31}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:index-0}{{1.31}{90}{\textbf {Appendix 6}: \textbf {@files\_re}: Deprecated \emph {syntax using regular expressions}}{section.1.31}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re::doc}{{1.31}{90}{\textbf {Appendix 6}: \textbf {@files\_re}: Deprecated \emph {syntax using regular expressions}}{section.1.31}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:new-manual-deprecated-files-re-chapter-num-files-re-deprecated-syntax-using-regular-expressions}{{1.31}{90}{\textbf {Appendix 6}: \textbf {@files\_re}: Deprecated \emph {syntax using regular expressions}}{section.1.31}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.31.1}Overview}{90}{subsection.1.31.1}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:overview}{{1.31.1}{90}{Overview}{subsection.1.31.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Transforming input and output filenames}{90}{subsubsection*.58}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:transforming-input-and-output-filenames}{{1.31.1}{90}{Transforming input and output filenames}{subsubsection*.58}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:new-manual-files-re-combine}{{1.31.1}{90}{Transforming input and output filenames}{section*.59}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Collating many \emph {inputs} into a single \emph {output}}{91}{subsubsection*.60}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:collating-many-inputs-into-a-single-output}{{1.31.1}{91}{Collating many \emph {inputs} into a single \emph {output}}{subsubsection*.60}{}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:index-1}{{1.31.1}{91}{Collating many \emph {inputs} into a single \emph {output}}{subsubsection*.60}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Generating \emph {input} and \emph {output} parameter using regular expresssions}{91}{subsubsection*.61}}
+\newlabel{tutorials/new_tutorial/deprecated_files_re:generating-input-and-output-parameter-using-regular-expresssions}{{1.31.1}{91}{Generating \emph {input} and \emph {output} parameter using regular expresssions}{subsubsection*.61}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.32}\textbf {Chapter 1}: Python Code for An introduction to basic Ruffus syntax}{92}{section.1.32}}
+\newlabel{tutorials/new_tutorial/introduction_code:new-manual-introduction-code}{{1.32}{92}{\textbf {Chapter 1}: Python Code for An introduction to basic Ruffus syntax}{section.1.32}{}}
+\newlabel{tutorials/new_tutorial/introduction_code:new-manual-introduction-chapter-num-python-code-for-an-introduction-to-basic-ruffus-syntax}{{1.32}{92}{\textbf {Chapter 1}: Python Code for An introduction to basic Ruffus syntax}{section.1.32}{}}
+\newlabel{tutorials/new_tutorial/introduction_code::doc}{{1.32}{92}{\textbf {Chapter 1}: Python Code for An introduction to basic Ruffus syntax}{section.1.32}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.32.1}Your first Ruffus script}{92}{subsection.1.32.1}}
+\newlabel{tutorials/new_tutorial/introduction_code:your-first-ruffus-script}{{1.32.1}{92}{Your first Ruffus script}{subsection.1.32.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.32.2}Resulting Output}{93}{subsection.1.32.2}}
+\newlabel{tutorials/new_tutorial/introduction_code:resulting-output}{{1.32.2}{93}{Resulting Output}{subsection.1.32.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.33}\textbf {Chapter 1}: Python Code for Transforming data in a pipeline with \texttt {@transform}}{93}{section.1.33}}
+\newlabel{tutorials/new_tutorial/transform_code:new-manual-transform-code}{{1.33}{93}{\textbf {Chapter 1}: Python Code for Transforming data in a pipeline with \texttt {@transform}}{section.1.33}{}}
+\newlabel{tutorials/new_tutorial/transform_code::doc}{{1.33}{93}{\textbf {Chapter 1}: Python Code for Transforming data in a pipeline with \texttt {@transform}}{section.1.33}{}}
+\newlabel{tutorials/new_tutorial/transform_code:new-manual-introduction-chapter-num-python-code-for-transforming-data-in-a-pipeline-with-transform}{{1.33}{93}{\textbf {Chapter 1}: Python Code for Transforming data in a pipeline with \texttt {@transform}}{section.1.33}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.33.1}Your first Ruffus script}{93}{subsection.1.33.1}}
+\newlabel{tutorials/new_tutorial/transform_code:your-first-ruffus-script}{{1.33.1}{93}{Your first Ruffus script}{subsection.1.33.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.33.2}Resulting Output}{94}{subsection.1.33.2}}
+\newlabel{tutorials/new_tutorial/transform_code:resulting-output}{{1.33.2}{94}{Resulting Output}{subsection.1.33.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.34}\textbf {Chapter 3}: Python Code for More on \texttt {@transform}-ing data}{95}{section.1.34}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-code}{{1.34}{95}{\textbf {Chapter 3}: Python Code for More on \texttt {@transform}-ing data}{section.1.34}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code::doc}{{1.34}{95}{\textbf {Chapter 3}: Python Code for More on \texttt {@transform}-ing data}{section.1.34}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-chapter-num-python-code-for-more-on-transform-ing-data}{{1.34}{95}{\textbf {Chapter 3}: Python Code for More on \texttt {@transform}-ing data}{section.1.34}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.34.1}Producing several items / files per job}{95}{subsection.1.34.1}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:producing-several-items-files-per-job}{{1.34.1}{95}{Producing several items / files per job}{subsection.1.34.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Resulting Output}{96}{subsubsection*.62}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:resulting-output}{{1.34.1}{96}{Resulting Output}{subsubsection*.62}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.34.2}Defining tasks function out of order}{96}{subsection.1.34.2}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:defining-tasks-function-out-of-order}{{1.34.2}{96}{Defining tasks function out of order}{subsection.1.34.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Resulting Output}{97}{subsubsection*.63}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:id1}{{1.34.2}{97}{Resulting Output}{subsubsection*.63}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.34.3}Multiple dependencies}{97}{subsection.1.34.3}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-multiple-dependencies-code}{{1.34.3}{97}{Multiple dependencies}{subsection.1.34.3}{}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:multiple-dependencies}{{1.34.3}{97}{Multiple dependencies}{subsection.1.34.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Resulting Output}{98}{subsubsection*.64}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:id2}{{1.34.3}{98}{Resulting Output}{subsubsection*.64}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.34.4}Multiple dependencies after @follows}{99}{subsection.1.34.4}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:multiple-dependencies-after-follows}{{1.34.4}{99}{Multiple dependencies after @follows}{subsection.1.34.4}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Resulting Output: \texttt {first\_task} completes before \texttt {second\_task}}{100}{subsubsection*.65}}
+\newlabel{tutorials/new_tutorial/transform_in_parallel_code:resulting-output-first-task-completes-before-second-task}{{1.34.4}{100}{Resulting Output: \texttt {first\_task} completes before \texttt {second\_task}}{subsubsection*.65}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.35}\textbf {Chapter 4}: Python Code for Creating files with \texttt {@originate}}{100}{section.1.35}}
+\newlabel{tutorials/new_tutorial/originate_code:new-manual-originate-code}{{1.35}{100}{\textbf {Chapter 4}: Python Code for Creating files with \texttt {@originate}}{section.1.35}{}}
+\newlabel{tutorials/new_tutorial/originate_code::doc}{{1.35}{100}{\textbf {Chapter 4}: Python Code for Creating files with \texttt {@originate}}{section.1.35}{}}
+\newlabel{tutorials/new_tutorial/originate_code:new-manual-originate-chapter-num-python-code-for-creating-files-with-originate}{{1.35}{100}{\textbf {Chapter 4}: Python Code for Creating files with \texttt {@originate}}{section.1.35}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.35.1}Using \texttt {@originate}}{101}{subsection.1.35.1}}
+\newlabel{tutorials/new_tutorial/originate_code:using-originate}{{1.35.1}{101}{Using \texttt {@originate}}{subsection.1.35.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.35.2}Resulting Output}{101}{subsection.1.35.2}}
+\newlabel{tutorials/new_tutorial/originate_code:resulting-output}{{1.35.2}{101}{Resulting Output}{subsection.1.35.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.36}\textbf {Chapter 5}: Python Code for Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{101}{section.1.36}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code::doc}{{1.36}{101}{\textbf {Chapter 5}: Python Code for Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.36}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-code}{{1.36}{101}{\textbf {Chapter 5}: Python Code for Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.36}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-chapter-num-python-code-for-understanding-how-your-pipeline-works-with-pipeline-printout}{{1.36}{101}{\textbf {Chapter 5}: Python Code for Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{section.1.36}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.36.1}Display the initial state of the pipeline}{102}{subsection.1.36.1}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code:display-the-initial-state-of-the-pipeline}{{1.36.1}{102}{Display the initial state of the pipeline}{subsection.1.36.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.36.2}Normal Output}{102}{subsection.1.36.2}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code:normal-output}{{1.36.2}{102}{Normal Output}{subsection.1.36.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.36.3}High Verbosity Output}{102}{subsection.1.36.3}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code:high-verbosity-output}{{1.36.3}{102}{High Verbosity Output}{subsection.1.36.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.36.4}Display the partially up-to-date pipeline}{103}{subsection.1.36.4}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_code:display-the-partially-up-to-date-pipeline}{{1.36.4}{103}{Display the partially up-to-date pipeline}{subsection.1.36.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.37}\textbf {Chapter 7}: Python Code for Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{105}{section.1.37}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph_code:new-manual-pipeline-printout-graph-chapter-num-python-code-for-displaying-the-pipeline-visually-with-pipeline-printout-graph}{{1.37}{105}{\textbf {Chapter 7}: Python Code for Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.37}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph_code::doc}{{1.37}{105}{\textbf {Chapter 7}: Python Code for Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.37}{}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph_code:new-manual-pipeline-printout-graph-code}{{1.37}{105}{\textbf {Chapter 7}: Python Code for Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{section.1.37}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.37.1}Code}{105}{subsection.1.37.1}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph_code:code}{{1.37.1}{105}{Code}{subsection.1.37.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.37.2}Resulting Flowcharts}{106}{subsection.1.37.2}}
+\newlabel{tutorials/new_tutorial/pipeline_printout_graph_code:resulting-flowcharts}{{1.37.2}{106}{Resulting Flowcharts}{subsection.1.37.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.38}\textbf {Chapter 8}: Python Code for Specifying output file names with \emph {formatter()} and \emph {regex()}}{107}{section.1.38}}
+\newlabel{tutorials/new_tutorial/output_file_names_code::doc}{{1.38}{107}{\textbf {Chapter 8}: Python Code for Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.38}{}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:new-manual-output-file-names-chapter-num-python-code-for-specifying-output-file-names-with-formatter-and-regex}{{1.38}{107}{\textbf {Chapter 8}: Python Code for Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.38}{}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:new-manual-output-file-names-code}{{1.38}{107}{\textbf {Chapter 8}: Python Code for Specifying output file names with \emph {formatter()} and \emph {regex()}}{section.1.38}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.38.1}Example Code for \emph {suffix()}}{107}{subsection.1.38.1}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:example-code-for-suffix}{{1.38.1}{107}{Example Code for \emph {suffix()}}{subsection.1.38.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.38.2}Example Code for \emph {formatter()}}{107}{subsection.1.38.2}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:example-code-for-formatter}{{1.38.2}{107}{Example Code for \emph {formatter()}}{subsection.1.38.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.38.3}Example Code for \emph {formatter()} with replacements in \emph {extra} arguments}{108}{subsection.1.38.3}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:example-code-for-formatter-with-replacements-in-extra-arguments}{{1.38.3}{108}{Example Code for \emph {formatter()} with replacements in \emph {extra} arguments}{subsection.1.38.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.38.4}Example Code for \emph {formatter()} in Zoos}{109}{subsection.1.38.4}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:example-code-for-formatter-in-zoos}{{1.38.4}{109}{Example Code for \emph {formatter()} in Zoos}{subsection.1.38.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.38.5}Example Code for \emph {regex()} in zoos}{110}{subsection.1.38.5}}
+\newlabel{tutorials/new_tutorial/output_file_names_code:example-code-for-regex-in-zoos}{{1.38.5}{110}{Example Code for \emph {regex()} in zoos}{subsection.1.38.5}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.39}\textbf {Chapter 9}: Python Code for Preparing directories for output with \emph {@mkdir()}}{110}{section.1.39}}
+\newlabel{tutorials/new_tutorial/mkdir_code:new-manual-mkdir-code}{{1.39}{110}{\textbf {Chapter 9}: Python Code for Preparing directories for output with \emph {@mkdir()}}{section.1.39}{}}
+\newlabel{tutorials/new_tutorial/mkdir_code::doc}{{1.39}{110}{\textbf {Chapter 9}: Python Code for Preparing directories for output with \emph {@mkdir()}}{section.1.39}{}}
+\newlabel{tutorials/new_tutorial/mkdir_code:new-manual-mkdir-chapter-num-python-code-for-preparing-directories-for-output-with-mkdir}{{1.39}{110}{\textbf {Chapter 9}: Python Code for Preparing directories for output with \emph {@mkdir()}}{section.1.39}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.39.1}Code for \emph {formatter()} Zoo example}{111}{subsection.1.39.1}}
+\newlabel{tutorials/new_tutorial/mkdir_code:code-for-formatter-zoo-example}{{1.39.1}{111}{Code for \emph {formatter()} Zoo example}{subsection.1.39.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.39.2}Code for \emph {regex()} Zoo example}{111}{subsection.1.39.2}}
+\newlabel{tutorials/new_tutorial/mkdir_code:code-for-regex-zoo-example}{{1.39.2}{111}{Code for \emph {regex()} Zoo example}{subsection.1.39.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.40}\textbf {Chapter 10}: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}{112}{section.1.40}}
+\newlabel{tutorials/new_tutorial/checkpointing_code:new-manual-checkpointing-chapter-num-python-code-for-checkpointing-interrupted-pipelines-and-exceptions}{{1.40}{112}{\textbf {Chapter 10}: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}{section.1.40}{}}
+\newlabel{tutorials/new_tutorial/checkpointing_code::doc}{{1.40}{112}{\textbf {Chapter 10}: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}{section.1.40}{}}
+\newlabel{tutorials/new_tutorial/checkpointing_code:new-manual-checkpointing-code}{{1.40}{112}{\textbf {Chapter 10}: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}{section.1.40}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.40.1}Code for .:ref:\emph {suffix() \textless {}decorators.suffix\textgreater {}} example}{112}{subsection.1.40.1}}
+\newlabel{tutorials/new_tutorial/checkpointing_code:code-for-ref-suffix-decorators-suffix-example}{{1.40.1}{112}{Code for .:ref:\emph {suffix() \textless {}decorators.suffix\textgreater {}} example}{subsection.1.40.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.41}\textbf {Chapter 12}: Python Code for Splitting up large tasks / files with \textbf {@split}}{113}{section.1.41}}
+\newlabel{tutorials/new_tutorial/split_code:new-manual-split-chapter-num-python-code-for-splitting-up-large-tasks-files-with-split}{{1.41}{113}{\textbf {Chapter 12}: Python Code for Splitting up large tasks / files with \textbf {@split}}{section.1.41}{}}
+\newlabel{tutorials/new_tutorial/split_code::doc}{{1.41}{113}{\textbf {Chapter 12}: Python Code for Splitting up large tasks / files with \textbf {@split}}{section.1.41}{}}
+\newlabel{tutorials/new_tutorial/split_code:new-manual-split-code}{{1.41}{113}{\textbf {Chapter 12}: Python Code for Splitting up large tasks / files with \textbf {@split}}{section.1.41}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.41.1}Splitting large jobs}{113}{subsection.1.41.1}}
+\newlabel{tutorials/new_tutorial/split_code:splitting-large-jobs}{{1.41.1}{113}{Splitting large jobs}{subsection.1.41.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.41.2}Resulting Output}{114}{subsection.1.41.2}}
+\newlabel{tutorials/new_tutorial/split_code:resulting-output}{{1.41.2}{114}{Resulting Output}{subsection.1.41.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.42}\textbf {Chapter 13}: Python Code for \texttt {@merge} multiple input into a single result}{114}{section.1.42}}
+\newlabel{tutorials/new_tutorial/merge_code::doc}{{1.42}{114}{\textbf {Chapter 13}: Python Code for \texttt {@merge} multiple input into a single result}{section.1.42}{}}
+\newlabel{tutorials/new_tutorial/merge_code:new-manual-merge-code}{{1.42}{114}{\textbf {Chapter 13}: Python Code for \texttt {@merge} multiple input into a single result}{section.1.42}{}}
+\newlabel{tutorials/new_tutorial/merge_code:new-manual-merge-chapter-num-python-code-for-merge-multiple-input-into-a-single-result}{{1.42}{114}{\textbf {Chapter 13}: Python Code for \texttt {@merge} multiple input into a single result}{section.1.42}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.42.1}Splitting large jobs}{115}{subsection.1.42.1}}
+\newlabel{tutorials/new_tutorial/merge_code:splitting-large-jobs}{{1.42.1}{115}{Splitting large jobs}{subsection.1.42.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.42.2}Resulting Output}{116}{subsection.1.42.2}}
+\newlabel{tutorials/new_tutorial/merge_code:resulting-output}{{1.42.2}{116}{Resulting Output}{subsection.1.42.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.43}\textbf {Chapter 14}: Python Code for Multiprocessing, \texttt {drmaa} and Computation Clusters}{117}{section.1.43}}
+\newlabel{tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-code}{{1.43}{117}{\textbf {Chapter 14}: Python Code for Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.43}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-chapter-num-python-code-for-multiprocessing-drmaa-and-computation-clusters}{{1.43}{117}{\textbf {Chapter 14}: Python Code for Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.43}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing_code::doc}{{1.43}{117}{\textbf {Chapter 14}: Python Code for Multiprocessing, \texttt {drmaa} and Computation Clusters}{section.1.43}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.43.1}\emph {@jobs\_limit}}{117}{subsection.1.43.1}}
+\newlabel{tutorials/new_tutorial/multiprocessing_code:jobs-limit}{{1.43.1}{117}{\emph {@jobs\_limit}}{subsection.1.43.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.43.2}Using \texttt {ruffus.drmaa\_wrapper}}{119}{subsection.1.43.2}}
+\newlabel{tutorials/new_tutorial/multiprocessing_code:using-ruffus-drmaa-wrapper}{{1.43.2}{119}{Using \texttt {ruffus.drmaa\_wrapper}}{subsection.1.43.2}{}}
+\newlabel{tutorials/new_tutorial/multiprocessing_code:id1}{{1.43.2}{119}{Using \texttt {ruffus.drmaa\_wrapper}}{subsection.1.43.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.44}\textbf {Chapter 15}: Python Code for Logging progress through a pipeline}{120}{section.1.44}}
+\newlabel{tutorials/new_tutorial/logging_code:new-manual-logging-chapter-num-python-code-for-logging-progress-through-a-pipeline}{{1.44}{120}{\textbf {Chapter 15}: Python Code for Logging progress through a pipeline}{section.1.44}{}}
+\newlabel{tutorials/new_tutorial/logging_code::doc}{{1.44}{120}{\textbf {Chapter 15}: Python Code for Logging progress through a pipeline}{section.1.44}{}}
+\newlabel{tutorials/new_tutorial/logging_code:new-manual-logging-code}{{1.44}{120}{\textbf {Chapter 15}: Python Code for Logging progress through a pipeline}{section.1.44}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.44.1}Rotating set of file logs}{120}{subsection.1.44.1}}
+\newlabel{tutorials/new_tutorial/logging_code:rotating-set-of-file-logs}{{1.44.1}{120}{Rotating set of file logs}{subsection.1.44.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.45}\textbf {Chapter 16}: Python Code for \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{121}{section.1.45}}
+\newlabel{tutorials/new_tutorial/subdivide_collate_code:new-manual-subdivide-collate-code}{{1.45}{121}{\textbf {Chapter 16}: Python Code for \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.45}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate_code::doc}{{1.45}{121}{\textbf {Chapter 16}: Python Code for \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.45}{}}
+\newlabel{tutorials/new_tutorial/subdivide_collate_code:new-manual-subdivide-collate-chapter-num-python-code-for-subdivide-tasks-to-run-efficiently-and-regroup-with-collate}{{1.45}{121}{\textbf {Chapter 16}: Python Code for \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{section.1.45}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.45.1}\emph {@subdivide} and regroup with \emph {@collate} example}{121}{subsection.1.45.1}}
+\newlabel{tutorials/new_tutorial/subdivide_collate_code:subdivide-and-regroup-with-collate-example}{{1.45.1}{121}{\emph {@subdivide} and regroup with \emph {@collate} example}{subsection.1.45.1}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.46}\textbf {Chapter 17}: Python Code for \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{123}{section.1.46}}
+\newlabel{tutorials/new_tutorial/combinatorics_code:new-manual-combinatorics-chapter-num-python-code-for-combinations-permutations-and-all-versus-all-product}{{1.46}{123}{\textbf {Chapter 17}: Python Code for \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.46}{}}
+\newlabel{tutorials/new_tutorial/combinatorics_code::doc}{{1.46}{123}{\textbf {Chapter 17}: Python Code for \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.46}{}}
+\newlabel{tutorials/new_tutorial/combinatorics_code:new-manual-combinatorics-code}{{1.46}{123}{\textbf {Chapter 17}: Python Code for \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{section.1.46}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.46.1}Example code for \emph {@product}}{123}{subsection.1.46.1}}
+\newlabel{tutorials/new_tutorial/combinatorics_code:example-code-for-product}{{1.46.1}{123}{Example code for \emph {@product}}{subsection.1.46.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.46.2}Example code for \emph {@permutations}}{125}{subsection.1.46.2}}
+\newlabel{tutorials/new_tutorial/combinatorics_code:example-code-for-permutations}{{1.46.2}{125}{Example code for \emph {@permutations}}{subsection.1.46.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.46.3}Example code for \emph {@combinations}}{126}{subsection.1.46.3}}
+\newlabel{tutorials/new_tutorial/combinatorics_code:example-code-for-combinations}{{1.46.3}{126}{Example code for \emph {@combinations}}{subsection.1.46.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.46.4}Example code for \emph {@combinations\_with\_replacement}}{127}{subsection.1.46.4}}
+\newlabel{tutorials/new_tutorial/combinatorics_code:example-code-for-combinations-with-replacement}{{1.46.4}{127}{Example code for \emph {@combinations\_with\_replacement}}{subsection.1.46.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.47}\textbf {Chapter 20}: Python Code for Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{128}{section.1.47}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-code}{{1.47}{128}{\textbf {Chapter 20}: Python Code for Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.47}{}}
+\newlabel{tutorials/new_tutorial/inputs_code::doc}{{1.47}{128}{\textbf {Chapter 20}: Python Code for Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.47}{}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-chapter-num-python-code-for-manipulating-task-inputs-via-string-substitution-using-inputs-and-add-inputs}{{1.47}{128}{\textbf {Chapter 20}: Python Code for Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{section.1.47}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.47.1}Example code for adding additional \emph {input} prerequisites per job with \emph {add\_inputs()}}{128}{subsection.1.47.1}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-code-for-adding-additional-input-prerequisites-per-job-with-add-inputs}{{1.47.1}{128}{Example code for adding additional \emph {input} prerequisites per job with \emph {add\_inputs()}}{subsection.1.47.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1. Example: compiling c++ code}{128}{subsubsection*.66}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-compiling-c-code}{{1.47.1}{128}{1. Example: compiling c++ code}{subsubsection*.66}{}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-example1}{{1.47.1}{128}{1. Example: compiling c++ code}{subsubsection*.66}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2. Example: Adding a common header file with \emph {add\_inputs()}}{129}{subsubsection*.67}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-example2}{{1.47.1}{129}{2. Example: Adding a common header file with \emph {add\_inputs()}}{subsubsection*.67}{}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-adding-a-common-header-file-with-add-inputs}{{1.47.1}{129}{2. Example: Adding a common header file with \emph {add\_inputs()}}{subsubsection*.67}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3. Example: Additional \emph {Input} can be tasks}{129}{subsubsection*.68}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-additional-input-can-be-tasks}{{1.47.1}{129}{3. Example: Additional \emph {Input} can be tasks}{subsubsection*.68}{}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-example3}{{1.47.1}{129}{3. Example: Additional \emph {Input} can be tasks}{subsubsection*.68}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{130}{subsubsection*.69}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-example4}{{1.47.1}{130}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{subsubsection*.69}{}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-add-corresponding-files-using-add-inputs-with-formatter-or-regex}{{1.47.1}{130}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{subsubsection*.69}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.47.2}Example code for replacing all input parameters with \emph {inputs()}}{130}{subsection.1.47.2}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-code-for-replacing-all-input-parameters-with-inputs}{{1.47.2}{130}{Example code for replacing all input parameters with \emph {inputs()}}{subsection.1.47.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{5. Example: Running matching python scripts using \emph {inputs()}}{130}{subsubsection*.70}}
+\newlabel{tutorials/new_tutorial/inputs_code:example-running-matching-python-scripts-using-inputs}{{1.47.2}{130}{5. Example: Running matching python scripts using \emph {inputs()}}{subsubsection*.70}{}}
+\newlabel{tutorials/new_tutorial/inputs_code:new-manual-inputs-example5}{{1.47.2}{130}{5. Example: Running matching python scripts using \emph {inputs()}}{subsubsection*.70}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.48}\textbf {Chapter 21}: Esoteric: Python Code for Generating parameters on the fly with \emph {@files}}{131}{section.1.48}}
+\newlabel{tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-code}{{1.48}{131}{\textbf {Chapter 21}: Esoteric: Python Code for Generating parameters on the fly with \emph {@files}}{section.1.48}{}}
+\newlabel{tutorials/new_tutorial/onthefly_code::doc}{{1.48}{131}{\textbf {Chapter 21}: Esoteric: Python Code for Generating parameters on the fly with \emph {@files}}{section.1.48}{}}
+\newlabel{tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-chapter-num-esoteric-python-code-for-generating-parameters-on-the-fly-with-files}{{1.48}{131}{\textbf {Chapter 21}: Esoteric: Python Code for Generating parameters on the fly with \emph {@files}}{section.1.48}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.48.1}Introduction}{131}{subsection.1.48.1}}
+\newlabel{tutorials/new_tutorial/onthefly_code:introduction}{{1.48.1}{131}{Introduction}{subsection.1.48.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.48.2}Code}{132}{subsection.1.48.2}}
+\newlabel{tutorials/new_tutorial/onthefly_code:code}{{1.48.2}{132}{Code}{subsection.1.48.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.48.3}Resulting Output}{136}{subsection.1.48.3}}
+\newlabel{tutorials/new_tutorial/onthefly_code:resulting-output}{{1.48.3}{136}{Resulting Output}{subsection.1.48.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.49}\textbf {Appendix 1}: Python code for Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{136}{section.1.49}}
+\newlabel{tutorials/new_tutorial/flowchart_colours_code:index-0}{{1.49}{136}{\textbf {Appendix 1}: Python code for Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.49}{}}
+\newlabel{tutorials/new_tutorial/flowchart_colours_code:new-manual-flowchart-colours-code}{{1.49}{136}{\textbf {Appendix 1}: Python code for Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.49}{}}
+\newlabel{tutorials/new_tutorial/flowchart_colours_code:new-manual-flowchart-colours-chapter-num-python-code-for-flow-chart-colours-with-pipeline-printout-graph}{{1.49}{136}{\textbf {Appendix 1}: Python code for Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.49}{}}
+\newlabel{tutorials/new_tutorial/flowchart_colours_code::doc}{{1.49}{136}{\textbf {Appendix 1}: Python code for Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{section.1.49}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.49.1}Code}{137}{subsection.1.49.1}}
+\newlabel{tutorials/new_tutorial/flowchart_colours_code:code}{{1.49.1}{137}{Code}{subsection.1.49.1}{}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {2}Overview:}{143}{chapter.2}}
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\newlabel{contents:overview}{{2}{143}{Overview:}{chapter.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.1}Cheat Sheet}{143}{section.2.1}}
+\newlabel{cheatsheet:cheat-sheet}{{2.1}{143}{Cheat Sheet}{section.2.1}{}}
+\newlabel{cheatsheet::doc}{{2.1}{143}{Cheat Sheet}{section.2.1}{}}
+\newlabel{cheatsheet:id1}{{2.1}{143}{Cheat Sheet}{section.2.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.1}1. Annotate functions with \textbf {Ruffus} decorators}{144}{subsection.2.1.1}}
+\newlabel{cheatsheet:annotate-functions-with-ruffus-decorators}{{2.1.1}{144}{1. Annotate functions with \textbf {Ruffus} decorators}{subsection.2.1.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Core}{144}{subsubsection*.71}}
+\newlabel{cheatsheet:core}{{2.1.1}{144}{Core}{subsubsection*.71}{}}
+\@writefile{toc}{\contentsline {subsubsection}{See \emph {Decorators} for a complete list of decorators}{144}{subsubsection*.72}}
+\newlabel{cheatsheet:see-decorators-for-a-complete-list-of-decorators}{{2.1.1}{144}{See \emph {Decorators} for a complete list of decorators}{subsubsection*.72}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.2}2. Print dependency graph if necessary}{144}{subsection.2.1.2}}
+\newlabel{cheatsheet:print-dependency-graph-if-necessary}{{2.1.2}{144}{2. Print dependency graph if necessary}{subsection.2.1.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.3}3. Run the pipeline}{145}{subsection.2.1.3}}
+\newlabel{cheatsheet:run-the-pipeline}{{2.1.3}{145}{3. Run the pipeline}{subsection.2.1.3}{}}
+\newlabel{pipeline_functions:pipeline-functions}{{2.1.3}{145}{3. Run the pipeline}{section*.73}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run}{{2.1.3}{145}{3. Run the pipeline}{section*.74}{}}
+\newlabel{pipeline_functions:pipeline-run}{{2.1.3}{145}{3. Run the pipeline}{section*.75}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout}{{2.1.3}{145}{3. Run the pipeline}{section*.76}{}}
+\newlabel{pipeline_functions:pipeline-printout}{{2.1.3}{145}{3. Run the pipeline}{section*.77}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph}{{2.1.3}{145}{3. Run the pipeline}{section*.78}{}}
+\newlabel{pipeline_functions:pipeline-printout-graph}{{2.1.3}{145}{3. Run the pipeline}{section*.79}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-get-task-names}{{2.1.3}{145}{3. Run the pipeline}{section*.80}{}}
+\newlabel{pipeline_functions:pipeline-get-task-names}{{2.1.3}{145}{3. Run the pipeline}{section*.81}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-target-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.82}{}}
+\newlabel{pipeline_functions:pr-target-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.83}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-forcedtorun-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.84}{}}
+\newlabel{pipeline_functions:pr-forcedtorun-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.85}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-multiprocess}{{2.1.3}{145}{3. Run the pipeline}{section*.86}{}}
+\newlabel{pipeline_functions:pr-multiprocess}{{2.1.3}{145}{3. Run the pipeline}{section*.87}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-logger}{{2.1.3}{145}{3. Run the pipeline}{section*.88}{}}
+\newlabel{pipeline_functions:pr-logger}{{2.1.3}{145}{3. Run the pipeline}{section*.89}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-gnu-make}{{2.1.3}{145}{3. Run the pipeline}{section*.90}{}}
+\newlabel{pipeline_functions:pr-gnu-make}{{2.1.3}{145}{3. Run the pipeline}{section*.91}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-verbose}{{2.1.3}{145}{3. Run the pipeline}{section*.92}{}}
+\newlabel{pipeline_functions:pr-verbose}{{2.1.3}{145}{3. Run the pipeline}{section*.93}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-runtime-data}{{2.1.3}{145}{3. Run the pipeline}{section*.94}{}}
+\newlabel{pipeline_functions:pr-runtime-data}{{2.1.3}{145}{3. Run the pipeline}{section*.95}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-one-second-per-job}{{2.1.3}{145}{3. Run the pipeline}{section*.96}{}}
+\newlabel{pipeline_functions:pr-one-second-per-job}{{2.1.3}{145}{3. Run the pipeline}{section*.97}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-touch-files-only}{{2.1.3}{145}{3. Run the pipeline}{section*.98}{}}
+\newlabel{pipeline_functions:pr-touch-files-only}{{2.1.3}{145}{3. Run the pipeline}{section*.99}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-exceptions-terminate-immediately}{{2.1.3}{145}{3. Run the pipeline}{section*.100}{}}
+\newlabel{pipeline_functions:pr-exceptions-terminate-immediately}{{2.1.3}{145}{3. Run the pipeline}{section*.101}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-log-exceptions}{{2.1.3}{145}{3. Run the pipeline}{section*.102}{}}
+\newlabel{pipeline_functions:pr-log-exceptions}{{2.1.3}{145}{3. Run the pipeline}{section*.103}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-multithread}{{2.1.3}{145}{3. Run the pipeline}{section*.104}{}}
+\newlabel{pipeline_functions:pr-multithread}{{2.1.3}{145}{3. Run the pipeline}{section*.105}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-checksum-level}{{2.1.3}{145}{3. Run the pipeline}{section*.106}{}}
+\newlabel{pipeline_functions:pr-checksum-level}{{2.1.3}{145}{3. Run the pipeline}{section*.107}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-history-file}{{2.1.3}{145}{3. Run the pipeline}{section*.108}{}}
+\newlabel{pipeline_functions:pr-history-file}{{2.1.3}{145}{3. Run the pipeline}{section*.109}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-verbose-abbreviated-path}{{2.1.3}{145}{3. Run the pipeline}{section*.110}{}}
+\newlabel{pipeline_functions:pr-verbose-abbreviated-path}{{2.1.3}{145}{3. Run the pipeline}{section*.111}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-output-stream}{{2.1.3}{145}{3. Run the pipeline}{section*.112}{}}
+\newlabel{pipeline_functions:pp-output-stream}{{2.1.3}{145}{3. Run the pipeline}{section*.113}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-target-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.114}{}}
+\newlabel{pipeline_functions:pp-target-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.115}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-forcedtorun-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.116}{}}
+\newlabel{pipeline_functions:pp-forcedtorun-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.117}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-verbose}{{2.1.3}{145}{3. Run the pipeline}{section*.118}{}}
+\newlabel{pipeline_functions:pp-verbose}{{2.1.3}{145}{3. Run the pipeline}{section*.119}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-indent}{{2.1.3}{145}{3. Run the pipeline}{section*.120}{}}
+\newlabel{pipeline_functions:pp-indent}{{2.1.3}{145}{3. Run the pipeline}{section*.121}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-wrap-width}{{2.1.3}{145}{3. Run the pipeline}{section*.122}{}}
+\newlabel{pipeline_functions:pp-wrap-width}{{2.1.3}{145}{3. Run the pipeline}{section*.123}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-gnu-make}{{2.1.3}{145}{3. Run the pipeline}{section*.124}{}}
+\newlabel{pipeline_functions:pp-gnu-make}{{2.1.3}{145}{3. Run the pipeline}{section*.125}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-runtime-data}{{2.1.3}{145}{3. Run the pipeline}{section*.126}{}}
+\newlabel{pipeline_functions:pp-runtime-data}{{2.1.3}{145}{3. Run the pipeline}{section*.127}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-checksum-level}{{2.1.3}{145}{3. Run the pipeline}{section*.128}{}}
+\newlabel{pipeline_functions:pp-checksum-level}{{2.1.3}{145}{3. Run the pipeline}{section*.129}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-history-file}{{2.1.3}{145}{3. Run the pipeline}{section*.130}{}}
+\newlabel{pipeline_functions:pp-history-file}{{2.1.3}{145}{3. Run the pipeline}{section*.131}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-verbose-abbreviated-path}{{2.1.3}{145}{3. Run the pipeline}{section*.132}{}}
+\newlabel{pipeline_functions:pp-verbose-abbreviated-path}{{2.1.3}{145}{3. Run the pipeline}{section*.133}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-stream}{{2.1.3}{145}{3. Run the pipeline}{section*.134}{}}
+\newlabel{pipeline_functions:ppg-stream}{{2.1.3}{145}{3. Run the pipeline}{section*.135}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-output-format}{{2.1.3}{145}{3. Run the pipeline}{section*.136}{}}
+\newlabel{pipeline_functions:ppg-output-format}{{2.1.3}{145}{3. Run the pipeline}{section*.137}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-target-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.138}{}}
+\newlabel{pipeline_functions:ppg-target-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.139}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-forcedtorun-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.140}{}}
+\newlabel{pipeline_functions:ppg-forcedtorun-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.141}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-draw-vertically}{{2.1.3}{145}{3. Run the pipeline}{section*.142}{}}
+\newlabel{pipeline_functions:ppg-draw-vertically}{{2.1.3}{145}{3. Run the pipeline}{section*.143}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-ignore-upstream-of-target}{{2.1.3}{145}{3. Run the pipeline}{section*.144}{}}
+\newlabel{pipeline_functions:ppg-ignore-upstream-of-target}{{2.1.3}{145}{3. Run the pipeline}{section*.145}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-skip-uptodate-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.146}{}}
+\newlabel{pipeline_functions:ppg-skip-uptodate-tasks}{{2.1.3}{145}{3. Run the pipeline}{section*.147}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-gnu-make}{{2.1.3}{145}{3. Run the pipeline}{section*.148}{}}
+\newlabel{pipeline_functions:ppg-gnu-make}{{2.1.3}{145}{3. Run the pipeline}{section*.149}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-test-all-task-for-update}{{2.1.3}{145}{3. Run the pipeline}{section*.150}{}}
+\newlabel{pipeline_functions:ppg-test-all-task-for-update}{{2.1.3}{145}{3. Run the pipeline}{section*.151}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-no-key-legend}{{2.1.3}{145}{3. Run the pipeline}{section*.152}{}}
+\newlabel{pipeline_functions:ppg-no-key-legend}{{2.1.3}{145}{3. Run the pipeline}{section*.153}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-minimal-key-legend}{{2.1.3}{145}{3. Run the pipeline}{section*.154}{}}
+\newlabel{pipeline_functions:ppg-minimal-key-legend}{{2.1.3}{145}{3. Run the pipeline}{section*.155}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-pipeline-name}{{2.1.3}{145}{3. Run the pipeline}{section*.156}{}}
+\newlabel{pipeline_functions:ppg-pipeline-name}{{2.1.3}{145}{3. Run the pipeline}{section*.157}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-user-colour-scheme}{{2.1.3}{145}{3. Run the pipeline}{section*.158}{}}
+\newlabel{pipeline_functions:ppg-user-colour-scheme}{{2.1.3}{145}{3. Run the pipeline}{section*.159}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-size}{{2.1.3}{145}{3. Run the pipeline}{section*.160}{}}
+\newlabel{pipeline_functions:ppg-size}{{2.1.3}{145}{3. Run the pipeline}{section*.161}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-dpi}{{2.1.3}{145}{3. Run the pipeline}{section*.162}{}}
+\newlabel{pipeline_functions:ppg-dpi}{{2.1.3}{145}{3. Run the pipeline}{section*.163}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-runtime-data}{{2.1.3}{145}{3. Run the pipeline}{section*.164}{}}
+\newlabel{pipeline_functions:ppg-runtime-data}{{2.1.3}{145}{3. Run the pipeline}{section*.165}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-checksum-level}{{2.1.3}{145}{3. Run the pipeline}{section*.166}{}}
+\newlabel{pipeline_functions:ppg-checksum-level}{{2.1.3}{145}{3. Run the pipeline}{section*.167}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.2}Pipeline functions}{145}{section.2.2}}
+\newlabel{pipeline_functions:ppg-history-file}{{2.2}{145}{Pipeline functions}{section.2.2}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-history-file}{{2.2}{145}{Pipeline functions}{section.2.2}{}}
+\newlabel{pipeline_functions::doc}{{2.2}{145}{Pipeline functions}{section.2.2}{}}
+\newlabel{pipeline_functions:id1}{{2.2}{145}{Pipeline functions}{section.2.2}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run}{{2.2}{145}{Pipeline functions}{section*.168}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}\emph {pipeline\_run}}{145}{subsection.2.2.1}}
+\newlabel{pipeline_functions:id2}{{2.2.1}{145}{\emph {pipeline\_run}}{subsection.2.2.1}{}}
+\newlabel{pipeline_functions:index-0}{{2.2.1}{145}{\emph {pipeline\_run}}{subsection.2.2.1}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-target-tasks}{{2.2.1}{145}{\emph {pipeline\_run}}{section*.169}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-forcedtorun-tasks}{{2.2.1}{145}{\emph {pipeline\_run}}{section*.170}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-multiprocess}{{2.2.1}{145}{\emph {pipeline\_run}}{section*.171}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-multithread}{{2.2.1}{145}{\emph {pipeline\_run}}{section*.172}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-logger}{{2.2.1}{145}{\emph {pipeline\_run}}{section*.173}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-gnu-make}{{2.2.1}{145}{\emph {pipeline\_run}}{section*.174}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-verbose}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.175}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-runtime-data}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.176}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-one-second-per-job}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.177}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-touch-files-only}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.178}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-exceptions-terminate-immediately}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.179}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-log-exceptions}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.180}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-history-file}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.181}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-checksum-level}{{2.2.1}{146}{\emph {pipeline\_run}}{section*.182}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-run-verbose-abbreviated-path}{{2.2.1}{147}{\emph {pipeline\_run}}{section*.183}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout}{{2.2.1}{147}{\emph {pipeline\_run}}{section*.184}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}\emph {pipeline\_printout}}{147}{subsection.2.2.2}}
+\newlabel{pipeline_functions:index-1}{{2.2.2}{147}{\emph {pipeline\_printout}}{subsection.2.2.2}{}}
+\newlabel{pipeline_functions:id3}{{2.2.2}{147}{\emph {pipeline\_printout}}{subsection.2.2.2}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-output-stream}{{2.2.2}{147}{\emph {pipeline\_printout}}{section*.185}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-target-tasks}{{2.2.2}{147}{\emph {pipeline\_printout}}{section*.186}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-forcedtorun-tasks}{{2.2.2}{147}{\emph {pipeline\_printout}}{section*.187}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-verbose}{{2.2.2}{147}{\emph {pipeline\_printout}}{section*.188}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-indent}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.189}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-gnu-make}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.190}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-wrap-width}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.191}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-runtime-data}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.192}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-history-file}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.193}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-checksum-level}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.194}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-verbose-abbreviated-path}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.195}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph}{{2.2.2}{148}{\emph {pipeline\_printout}}{section*.196}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}\emph {pipeline\_printout\_graph}}{148}{subsection.2.2.3}}
+\newlabel{pipeline_functions:id4}{{2.2.3}{148}{\emph {pipeline\_printout\_graph}}{subsection.2.2.3}{}}
+\newlabel{pipeline_functions:index-2}{{2.2.3}{148}{\emph {pipeline\_printout\_graph}}{subsection.2.2.3}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-stream}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.197}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-output-format}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.198}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-target-tasks}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.199}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-forcedtorun-tasks}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.200}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-draw-vertically}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.201}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-ignore-upstream-of-target}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.202}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-skip-uptodate-tasks}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.203}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-gnu-make}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.204}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-test-all-task-for-update}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.205}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-no-key-legend}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.206}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-minimal-key-legend}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.207}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-user-colour-scheme}{{2.2.3}{149}{\emph {pipeline\_printout\_graph}}{section*.208}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-pipeline-name}{{2.2.3}{150}{\emph {pipeline\_printout\_graph}}{section*.209}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-size}{{2.2.3}{150}{\emph {pipeline\_printout\_graph}}{section*.210}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-dpi}{{2.2.3}{150}{\emph {pipeline\_printout\_graph}}{section*.211}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-runtime-data}{{2.2.3}{150}{\emph {pipeline\_printout\_graph}}{section*.212}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-history-file}{{2.2.3}{150}{\emph {pipeline\_printout\_graph}}{section*.213}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-printout-graph-checksum-level}{{2.2.3}{150}{\emph {pipeline\_printout\_graph}}{section*.214}{}}
+\newlabel{pipeline_functions:pipeline-functions-pipeline-get-task-names}{{2.2.3}{151}{\emph {pipeline\_printout\_graph}}{section*.215}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.4}\emph {pipeline\_get\_task\_names}}{151}{subsection.2.2.4}}
+\newlabel{pipeline_functions:id5}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{subsection.2.2.4}{}}
+\newlabel{pipeline_functions:index-3}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{subsection.2.2.4}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-functions}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.216}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.217}{}}
+\newlabel{drmaa_wrapper_functions:run-job}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.218}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-cmd-str}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.219}{}}
+\newlabel{drmaa_wrapper_functions:dw-cmd-str}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.220}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-script-directory}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.221}{}}
+\newlabel{drmaa_wrapper_functions:dw-job-script-directory}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.222}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-environment}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.223}{}}
+\newlabel{drmaa_wrapper_functions:dw-job-environment}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.224}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-working-directory}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.225}{}}
+\newlabel{drmaa_wrapper_functions:dw-working-directory}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.226}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-retain-job-scripts}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.227}{}}
+\newlabel{drmaa_wrapper_functions:dw-retain-job-scripts}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.228}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-name}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.229}{}}
+\newlabel{drmaa_wrapper_functions:dw-job-name}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.230}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-other-options}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.231}{}}
+\newlabel{drmaa_wrapper_functions:dw-job-other-options}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.232}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-logger}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.233}{}}
+\newlabel{drmaa_wrapper_functions:dw-logger}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.234}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-drmaa-session}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.235}{}}
+\newlabel{drmaa_wrapper_functions:dw-drmaa-session}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.236}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-run-locally}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.237}{}}
+\newlabel{drmaa_wrapper_functions:dw-run-locally}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.238}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-output-files}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.239}{}}
+\newlabel{drmaa_wrapper_functions:dw-output-files}{{2.2.4}{151}{\emph {pipeline\_get\_task\_names}}{section*.240}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.3}drmaa functions}{151}{section.2.3}}
+\newlabel{drmaa_wrapper_functions:dw-touch-only}{{2.3}{151}{drmaa functions}{section.2.3}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-touch-only}{{2.3}{151}{drmaa functions}{section.2.3}{}}
+\newlabel{drmaa_wrapper_functions::doc}{{2.3}{151}{drmaa functions}{section.2.3}{}}
+\newlabel{drmaa_wrapper_functions:id1}{{2.3}{151}{drmaa functions}{section.2.3}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job}{{2.3}{151}{drmaa functions}{section*.241}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.1}\emph {run\_job}}{151}{subsection.2.3.1}}
+\newlabel{drmaa_wrapper_functions:id2}{{2.3.1}{151}{\emph {run\_job}}{subsection.2.3.1}{}}
+\newlabel{drmaa_wrapper_functions:index-0}{{2.3.1}{151}{\emph {run\_job}}{subsection.2.3.1}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-cmd-str}{{2.3.1}{152}{\emph {run\_job}}{section*.242}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-name}{{2.3.1}{152}{\emph {run\_job}}{section*.243}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-other-options}{{2.3.1}{152}{\emph {run\_job}}{section*.244}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-script-directory}{{2.3.1}{153}{\emph {run\_job}}{section*.245}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-environment}{{2.3.1}{153}{\emph {run\_job}}{section*.246}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-working-directory}{{2.3.1}{153}{\emph {run\_job}}{section*.247}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-retain-job-scripts}{{2.3.1}{153}{\emph {run\_job}}{section*.248}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-logger}{{2.3.1}{153}{\emph {run\_job}}{section*.249}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-drmaa-session}{{2.3.1}{153}{\emph {run\_job}}{section*.250}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-run-locally}{{2.3.1}{153}{\emph {run\_job}}{section*.251}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-touch-only}{{2.3.1}{153}{\emph {run\_job}}{section*.252}{}}
+\newlabel{drmaa_wrapper_functions:drmaa-wrapper-run-job-output-files}{{2.3.1}{154}{\emph {run\_job}}{section*.253}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.4}Installation}{154}{section.2.4}}
+\newlabel{installation:installation}{{2.4}{154}{Installation}{section.2.4}{}}
+\newlabel{installation::doc}{{2.4}{154}{Installation}{section.2.4}{}}
+\newlabel{installation:id1}{{2.4}{154}{Installation}{section.2.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.1}The easy way}{154}{subsection.2.4.1}}
+\newlabel{installation:the-easy-way}{{2.4.1}{154}{The easy way}{subsection.2.4.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.2}The most up-to-date code:}{154}{subsection.2.4.2}}
+\newlabel{installation:the-most-up-to-date-code}{{2.4.2}{154}{The most up-to-date code:}{subsection.2.4.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Graphical flowcharts}{154}{subsubsection*.254}}
+\newlabel{installation:graphical-flowcharts}{{2.4.2}{154}{Graphical flowcharts}{subsubsection*.254}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.5}Design \& Architecture}{155}{section.2.5}}
+\newlabel{design:index-0}{{2.5}{155}{Design \& Architecture}{section.2.5}{}}
+\newlabel{design::doc}{{2.5}{155}{Design \& Architecture}{section.2.5}{}}
+\newlabel{design:design-architecture}{{2.5}{155}{Design \& Architecture}{section.2.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.1}\emph {GNU Make}}{155}{subsection.2.5.1}}
+\newlabel{design:gnu-make}{{2.5.1}{155}{\emph {GNU Make}}{subsection.2.5.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Deficiencies of \emph {make} / \emph {gmake}}{155}{subsubsection*.255}}
+\newlabel{design:design-make-syntax-ugly}{{2.5.1}{155}{Deficiencies of \emph {make} / \emph {gmake}}{subsubsection*.255}{}}
+\newlabel{design:deficiencies-of-make-gmake}{{2.5.1}{155}{Deficiencies of \emph {make} / \emph {gmake}}{subsubsection*.255}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.2}\emph {Scons}, \emph {Rake} and other \emph {Make} alternatives}{155}{subsection.2.5.2}}
+\newlabel{design:design-scons-and-rake}{{2.5.2}{155}{\emph {Scons}, \emph {Rake} and other \emph {Make} alternatives}{subsection.2.5.2}{}}
+\newlabel{design:scons-rake-and-other-make-alternatives}{{2.5.2}{155}{\emph {Scons}, \emph {Rake} and other \emph {Make} alternatives}{subsection.2.5.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Implicit dependencies: disadvantages of \emph {make} / \emph {scons} / \emph {rake}}{156}{subsubsection*.256}}
+\newlabel{design:design-implicit-dependencies}{{2.5.2}{156}{Implicit dependencies: disadvantages of \emph {make} / \emph {scons} / \emph {rake}}{subsubsection*.256}{}}
+\newlabel{design:implicit-dependencies-disadvantages-of-make-scons-rake}{{2.5.2}{156}{Implicit dependencies: disadvantages of \emph {make} / \emph {scons} / \emph {rake}}{subsubsection*.256}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Explicit dependencies in \emph {Ruffus}}{156}{subsubsection*.257}}
+\newlabel{design:explicit-dependencies-in-ruffus}{{2.5.2}{156}{Explicit dependencies in \emph {Ruffus}}{subsubsection*.257}{}}
+\newlabel{design:design-explicit-dependencies-in-ruffus}{{2.5.2}{156}{Explicit dependencies in \emph {Ruffus}}{subsubsection*.257}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Static dependencies: What \emph {make} / \emph {scons} / \emph {rake} can't do (easily)}{156}{subsubsection*.258}}
+\newlabel{design:static-dependencies-what-make-scons-rake-can-t-do-easily}{{2.5.2}{156}{Static dependencies: What \emph {make} / \emph {scons} / \emph {rake} can't do (easily)}{subsubsection*.258}{}}
+\newlabel{design:design-static-dependencies}{{2.5.2}{156}{Static dependencies: What \emph {make} / \emph {scons} / \emph {rake} can't do (easily)}{subsubsection*.258}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.3}Managing pipelines stage-by-stage using \textbf {Ruffus}}{157}{subsection.2.5.3}}
+\newlabel{design:managing-pipelines-stage-by-stage-using-ruffus}{{2.5.3}{157}{Managing pipelines stage-by-stage using \textbf {Ruffus}}{subsection.2.5.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Disadvantages of the Ruffus design}{158}{subsubsection*.259}}
+\newlabel{design:disadvantages-of-the-ruffus-design}{{2.5.3}{158}{Disadvantages of the Ruffus design}{subsubsection*.259}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.4}Alternatives to \textbf {Ruffus}}{159}{subsection.2.5.4}}
+\newlabel{design:index-1}{{2.5.4}{159}{Alternatives to \textbf {Ruffus}}{subsection.2.5.4}{}}
+\newlabel{design:alternatives-to-ruffus}{{2.5.4}{159}{Alternatives to \textbf {Ruffus}}{subsection.2.5.4}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Acknowledgements}{159}{subsubsection*.260}}
+\newlabel{design:acknowledgements}{{2.5.4}{159}{Acknowledgements}{subsubsection*.260}{}}
+\newlabel{design:index-2}{{2.5.4}{159}{Acknowledgements}{subsubsection*.260}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.6}Major Features added to Ruffus}{160}{section.2.6}}
+\newlabel{history:glob}{{2.6}{160}{Major Features added to Ruffus}{section.2.6}{}}
+\newlabel{history::doc}{{2.6}{160}{Major Features added to Ruffus}{section.2.6}{}}
+\newlabel{history:major-features-added-to-ruffus}{{2.6}{160}{Major Features added to Ruffus}{section.2.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.1}version 2.5RC}{160}{subsection.2.6.1}}
+\newlabel{history:version-2-5rc}{{2.6.1}{160}{version 2.5RC}{subsection.2.6.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1) Python3 compatability (but at least python 2.6 is now required)}{160}{subsubsection*.261}}
+\newlabel{history:python3-compatability-but-at-least-python-2-6-is-now-required}{{2.6.1}{160}{1) Python3 compatability (but at least python 2.6 is now required)}{subsubsection*.261}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2) Ctrl-C interrupts}{160}{subsubsection*.262}}
+\newlabel{history:ctrl-c-interrupts}{{2.6.1}{160}{2) Ctrl-C interrupts}{subsubsection*.262}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3) Customising flowcharts in pipeline\_printout\_graph() with \texttt {@graphviz}}{160}{subsubsection*.263}}
+\newlabel{history:customising-flowcharts-in-pipeline-printout-graph-with-graphviz}{{2.6.1}{160}{3) Customising flowcharts in pipeline\_printout\_graph() with \texttt {@graphviz}}{subsubsection*.263}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4. Consistent verbosity levels}{161}{subsubsection*.264}}
+\newlabel{history:consistent-verbosity-levels}{{2.6.1}{161}{4. Consistent verbosity levels}{subsubsection*.264}{}}
+\@writefile{toc}{\contentsline {subsubsection}{5. Allow abbreviated paths from \texttt {pipeline\_run} or \texttt {pipeline\_printout}}{162}{subsubsection*.265}}
+\newlabel{history:allow-abbreviated-paths-from-pipeline-run-or-pipeline-printout}{{2.6.1}{162}{5. Allow abbreviated paths from \texttt {pipeline\_run} or \texttt {pipeline\_printout}}{subsubsection*.265}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Other changes}{163}{subsubsection*.266}}
+\newlabel{history:other-changes}{{2.6.1}{163}{Other changes}{subsubsection*.266}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.2}version 2.4.1}{163}{subsection.2.6.2}}
+\newlabel{history:version-2-4-1}{{2.6.2}{163}{version 2.4.1}{subsection.2.6.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.3}version 2.4}{163}{subsection.2.6.3}}
+\newlabel{history:version-2-4}{{2.6.3}{163}{version 2.4}{subsection.2.6.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Additions to \texttt {ruffus} namespace}{163}{subsubsection*.267}}
+\newlabel{history:additions-to-ruffus-namespace}{{2.6.3}{163}{Additions to \texttt {ruffus} namespace}{subsubsection*.267}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Installation: use pip}{163}{subsubsection*.268}}
+\newlabel{history:installation-use-pip}{{2.6.3}{163}{Installation: use pip}{subsubsection*.268}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1) Command Line support}{163}{subsubsection*.269}}
+\newlabel{history:command-line-support}{{2.6.3}{163}{1) Command Line support}{subsubsection*.269}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2) Check pointing}{164}{subsubsection*.270}}
+\newlabel{history:check-pointing}{{2.6.3}{164}{2) Check pointing}{subsubsection*.270}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3) \emph {subdivide()} (\emph {syntax})}{164}{subsubsection*.271}}
+\newlabel{history:subdivide-syntax}{{2.6.3}{164}{3) \emph {subdivide()} (\emph {syntax})}{subsubsection*.271}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4) \emph {mkdir()} (\emph {syntax}) with \emph {formatter()}, \emph {suffix()} and \emph {regex()}}{164}{subsubsection*.272}}
+\newlabel{history:mkdir-syntax-with-formatter-suffix-and-regex}{{2.6.3}{164}{4) \emph {mkdir()} (\emph {syntax}) with \emph {formatter()}, \emph {suffix()} and \emph {regex()}}{subsubsection*.272}{}}
+\@writefile{toc}{\contentsline {subsubsection}{5) \emph {originate()} (\emph {syntax})}{164}{subsubsection*.273}}
+\newlabel{history:originate-syntax}{{2.6.3}{164}{5) \emph {originate()} (\emph {syntax})}{subsubsection*.273}{}}
+\@writefile{toc}{\contentsline {subsubsection}{6) New flexible \emph {formatter()} (\emph {syntax}) alternative to \emph {regex()} \& \emph {suffix()}}{165}{subsubsection*.274}}
+\newlabel{history:new-flexible-formatter-syntax-alternative-to-regex-suffix}{{2.6.3}{165}{6) New flexible \emph {formatter()} (\emph {syntax}) alternative to \emph {regex()} \& \emph {suffix()}}{subsubsection*.274}{}}
+\@writefile{toc}{\contentsline {subsubsection}{7) Combinatorics (all vs. all decorators)}{165}{subsubsection*.275}}
+\newlabel{history:combinatorics-all-vs-all-decorators}{{2.6.3}{165}{7) Combinatorics (all vs. all decorators)}{subsubsection*.275}{}}
+\@writefile{toc}{\contentsline {subsubsection}{8) drmaa support and multithreading:}{165}{subsubsection*.276}}
+\newlabel{history:drmaa-support-and-multithreading}{{2.6.3}{165}{8) drmaa support and multithreading:}{subsubsection*.276}{}}
+\@writefile{toc}{\contentsline {subsubsection}{9) \texttt {pipeline\_run(...)} and exceptions}{165}{subsubsection*.277}}
+\newlabel{history:pipeline-run-and-exceptions}{{2.6.3}{165}{9) \texttt {pipeline\_run(...)} and exceptions}{subsubsection*.277}{}}
+\@writefile{toc}{\contentsline {subsubsection}{10) Miscellaneous}{165}{subsubsection*.278}}
+\newlabel{history:miscellaneous}{{2.6.3}{165}{10) Miscellaneous}{subsubsection*.278}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.4}version 2.3}{166}{subsection.2.6.4}}
+\newlabel{history:version-2-3}{{2.6.4}{166}{version 2.3}{subsection.2.6.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.5}version 2.2}{167}{subsection.2.6.5}}
+\newlabel{history:version-2-2}{{2.6.5}{167}{version 2.2}{subsection.2.6.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.6}version 2.1.1}{168}{subsection.2.6.6}}
+\newlabel{history:version-2-1-1}{{2.6.6}{168}{version 2.1.1}{subsection.2.6.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.7}version 2.1.0}{169}{subsection.2.6.7}}
+\newlabel{history:version-2-1-0}{{2.6.7}{169}{version 2.1.0}{subsection.2.6.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.8}version 2.0.10}{169}{subsection.2.6.8}}
+\newlabel{history:version-2-0-10}{{2.6.8}{169}{version 2.0.10}{subsection.2.6.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.9}version 2.0.9}{170}{subsection.2.6.9}}
+\newlabel{history:version-2-0-9}{{2.6.9}{170}{version 2.0.9}{subsection.2.6.9}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.10}version 2.0.8}{171}{subsection.2.6.10}}
+\newlabel{history:version-2-0-8}{{2.6.10}{171}{version 2.0.8}{subsection.2.6.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.11}version 2.0.2}{171}{subsection.2.6.11}}
+\newlabel{history:version-2-0-2}{{2.6.11}{171}{version 2.0.2}{subsection.2.6.11}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.12}version 2.0}{171}{subsection.2.6.12}}
+\newlabel{history:version-2-0}{{2.6.12}{171}{version 2.0}{subsection.2.6.12}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.13}version 1.1.4}{171}{subsection.2.6.13}}
+\newlabel{history:version-1-1-4}{{2.6.13}{171}{version 1.1.4}{subsection.2.6.13}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.14}version 1.0.7}{171}{subsection.2.6.14}}
+\newlabel{history:version-1-0-7}{{2.6.14}{171}{version 1.0.7}{subsection.2.6.14}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.6.15}version 1.0}{171}{subsection.2.6.15}}
+\newlabel{history:version-1-0}{{2.6.15}{171}{version 1.0}{subsection.2.6.15}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.7}Fixed Bugs}{172}{section.2.7}}
+\newlabel{history:fixed-bugs}{{2.7}{172}{Fixed Bugs}{section.2.7}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.8}Future Changes to Ruffus}{172}{section.2.8}}
+\newlabel{todo:todo}{{2.8}{172}{Future Changes to Ruffus}{section.2.8}{}}
+\newlabel{todo::doc}{{2.8}{172}{Future Changes to Ruffus}{section.2.8}{}}
+\newlabel{todo:future-changes-to-ruffus}{{2.8}{172}{Future Changes to Ruffus}{section.2.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.1}Todo: pipeline\_printout\_graph should print inactive tasks}{172}{subsection.2.8.1}}
+\newlabel{todo:todo-pipeline-printout-graph-should-print-inactive-tasks}{{2.8.1}{172}{Todo: pipeline\_printout\_graph should print inactive tasks}{subsection.2.8.1}{}}
+\newlabel{todo:todo-inactive-tasks-in-pipeline-printout-graph}{{2.8.1}{172}{Todo: pipeline\_printout\_graph should print inactive tasks}{subsection.2.8.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.2}Todo: Mark input strings as non-file names, and add support for dynamically returned parameters}{172}{subsection.2.8.2}}
+\newlabel{todo:todo-dynamic-strings}{{2.8.2}{172}{Todo: Mark input strings as non-file names, and add support for dynamically returned parameters}{subsection.2.8.2}{}}
+\newlabel{todo:todo-mark-input-strings-as-non-file-names-and-add-support-for-dynamically-returned-parameters}{{2.8.2}{172}{Todo: Mark input strings as non-file names, and add support for dynamically returned parameters}{subsection.2.8.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.3}Todo: Allow ``extra'' parameters to be used in output substitution}{172}{subsection.2.8.3}}
+\newlabel{todo:todo-allow-extra-parameters-to-be-used-in-output-substitution}{{2.8.3}{172}{Todo: Allow ``extra'' parameters to be used in output substitution}{subsection.2.8.3}{}}
+\newlabel{todo:todo-extra-parameters}{{2.8.3}{172}{Todo: Allow ``extra'' parameters to be used in output substitution}{subsection.2.8.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.4}Todo: Extra signalling before and after each task and job}{173}{subsection.2.8.4}}
+\newlabel{todo:todo-extra-signalling-before-and-after-each-task-and-job}{{2.8.4}{173}{Todo: Extra signalling before and after each task and job}{subsection.2.8.4}{}}
+\newlabel{todo:todo-pre-post-job}{{2.8.4}{173}{Todo: Extra signalling before and after each task and job}{subsection.2.8.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.5}Todo: \texttt {@split} / \texttt {@subdivide} returns the actual output created}{173}{subsection.2.8.5}}
+\newlabel{todo:todo-new-decorators}{{2.8.5}{173}{Todo: \texttt {@split} / \texttt {@subdivide} returns the actual output created}{subsection.2.8.5}{}}
+\newlabel{todo:todo-split-subdivide-returns-the-actual-output-created}{{2.8.5}{173}{Todo: \texttt {@split} / \texttt {@subdivide} returns the actual output created}{subsection.2.8.5}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Checkpointing}{174}{subsubsection*.279}}
+\newlabel{todo:checkpointing}{{2.8.5}{174}{Checkpointing}{subsubsection*.279}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.6}Todo: New decorators}{174}{subsection.2.8.6}}
+\newlabel{todo:id1}{{2.8.6}{174}{Todo: New decorators}{subsection.2.8.6}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Todo: \texttt {@originate}}{174}{subsubsection*.280}}
+\newlabel{todo:todo-originate}{{2.8.6}{174}{Todo: \texttt {@originate}}{subsubsection*.280}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Todo: \texttt {@recombine}}{174}{subsubsection*.281}}
+\newlabel{todo:todo-recombine}{{2.8.6}{174}{Todo: \texttt {@recombine}}{subsubsection*.281}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.7}Todo: Named parameters in decorators for clarity}{174}{subsection.2.8.7}}
+\newlabel{todo:todo-named-parameters-in-decorators-for-clarity}{{2.8.7}{174}{Todo: Named parameters in decorators for clarity}{subsection.2.8.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.8}Todo: Bioinformatics example to end all examples}{174}{subsection.2.8.8}}
+\newlabel{todo:todo-bioinformatics-example-to-end-all-examples}{{2.8.8}{174}{Todo: Bioinformatics example to end all examples}{subsection.2.8.8}{}}
+\newlabel{todo:todo-bioinformatics-example}{{2.8.8}{174}{Todo: Bioinformatics example to end all examples}{subsection.2.8.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.8.9}Todo: Allow the next task to start before all jobs in the previous task have finished}{175}{subsection.2.8.9}}
+\newlabel{todo:todo-allow-the-next-task-to-start-before-all-jobs-in-the-previous-task-have-finished}{{2.8.9}{175}{Todo: Allow the next task to start before all jobs in the previous task have finished}{subsection.2.8.9}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Converting to per-job rather than per task dependencies}{175}{subsubsection*.282}}
+\newlabel{todo:converting-to-per-job-rather-than-per-task-dependencies}{{2.8.9}{175}{Converting to per-job rather than per task dependencies}{subsubsection*.282}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Implementation}{175}{subsubsection*.283}}
+\newlabel{todo:implementation}{{2.8.9}{175}{Implementation}{subsubsection*.283}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.9}Planned Improvements to Ruffus}{176}{section.2.9}}
+\newlabel{todo:planned-improvements-to-ruffus}{{2.9}{176}{Planned Improvements to Ruffus}{section.2.9}{}}
+\newlabel{todo:todo-run-on-cluster}{{2.9}{176}{Planned Improvements to Ruffus}{section*.284}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.1}Planned: Running python code (task functions) transparently on remote cluster nodes}{176}{subsection.2.9.1}}
+\newlabel{todo:planned-running-python-code-task-functions-transparently-on-remote-cluster-nodes}{{2.9.1}{176}{Planned: Running python code (task functions) transparently on remote cluster nodes}{subsection.2.9.1}{}}
+\newlabel{todo:todo-job-trickling}{{2.9.1}{177}{Planned: Running python code (task functions) transparently on remote cluster nodes}{section*.285}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.2}Planned: Custom parameter generator}{177}{subsection.2.9.2}}
+\newlabel{todo:todo-job-trickling}{{2.9.2}{177}{Planned: Custom parameter generator}{subsection.2.9.2}{}}
+\newlabel{todo:planned-custom-parameter-generator}{{2.9.2}{177}{Planned: Custom parameter generator}{subsection.2.9.2}{}}
+\newlabel{todo:todo-custom-parameters}{{2.9.2}{177}{Planned: Custom parameter generator}{subsection.2.9.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.3}Planned: Ruffus GUI interface.}{177}{subsection.2.9.3}}
+\newlabel{todo:planned-ruffus-gui-interface}{{2.9.3}{177}{Planned: Ruffus GUI interface}{subsection.2.9.3}{}}
+\newlabel{todo:todo-gui}{{2.9.3}{177}{Planned: Ruffus GUI interface}{subsection.2.9.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.4}Planned: Non-decorator / Function interface to Ruffus}{177}{subsection.2.9.4}}
+\newlabel{todo:planned-non-decorator-function-interface-to-ruffus}{{2.9.4}{177}{Planned: Non-decorator / Function interface to Ruffus}{subsection.2.9.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.5}Planned: Remove intermediate files}{177}{subsection.2.9.5}}
+\newlabel{todo:planned-remove-intermediate-files}{{2.9.5}{177}{Planned: Remove intermediate files}{subsection.2.9.5}{}}
+\newlabel{todo:todo-intermediate-files}{{2.9.5}{177}{Planned: Remove intermediate files}{subsection.2.9.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.6}Planned: @retry\_on\_error(NUM\_OF\_RETRIES)}{178}{subsection.2.9.6}}
+\newlabel{todo:planned-retry-on-error-num-of-retries}{{2.9.6}{178}{Planned: @retry\_on\_error(NUM\_OF\_RETRIES)}{subsection.2.9.6}{}}
+\newlabel{todo:todo-retry}{{2.9.6}{178}{Planned: @retry\_on\_error(NUM\_OF\_RETRIES)}{subsection.2.9.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.9.7}Planned: Clean up}{178}{subsection.2.9.7}}
+\newlabel{todo:planned-clean-up}{{2.9.7}{178}{Planned: Clean up}{subsection.2.9.7}{}}
+\newlabel{todo:todo-cleanup}{{2.9.7}{178}{Planned: Clean up}{subsection.2.9.7}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.10}Implementation Tips}{179}{section.2.10}}
+\newlabel{implementation_notes:implementation-tips}{{2.10}{179}{Implementation Tips}{section.2.10}{}}
+\newlabel{implementation_notes::doc}{{2.10}{179}{Implementation Tips}{section.2.10}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.10.1}Release}{179}{subsection.2.10.1}}
+\newlabel{implementation_notes:release}{{2.10.1}{179}{Release}{subsection.2.10.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.10.2}dbdict.py}{180}{subsection.2.10.2}}
+\newlabel{implementation_notes:dbdict-py}{{2.10.2}{180}{dbdict.py}{subsection.2.10.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.10.3}how to write new decorators}{180}{subsection.2.10.3}}
+\newlabel{implementation_notes:how-to-write-new-decorators}{{2.10.3}{180}{how to write new decorators}{subsection.2.10.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.11}Implementation notes}{181}{section.2.11}}
+\newlabel{implementation_notes:implementation-notes}{{2.11}{181}{Implementation notes}{section.2.11}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.1}\texttt {Ctrl-C} handling}{181}{subsection.2.11.1}}
+\newlabel{implementation_notes:ctrl-c-handling}{{2.11.1}{181}{\texttt {Ctrl-C} handling}{subsection.2.11.1}{}}
+\newlabel{implementation_notes:todo-misfeatures}{{2.11.1}{181}{\texttt {Ctrl-C} handling}{subsection.2.11.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.2}Python3 compatability}{182}{subsection.2.11.2}}
+\newlabel{implementation_notes:python3-compatability}{{2.11.2}{182}{Python3 compatability}{subsection.2.11.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.3}Refactoring: parameter handling}{183}{subsection.2.11.3}}
+\newlabel{implementation_notes:refactoring-parameter-handling}{{2.11.3}{183}{Refactoring: parameter handling}{subsection.2.11.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.4}\texttt {formatter}}{183}{subsection.2.11.4}}
+\newlabel{implementation_notes:formatter}{{2.11.4}{183}{\texttt {formatter}}{subsection.2.11.4}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\texttt {formatter()}: \texttt {regex()} and \texttt {suffix()}}{184}{subsubsection*.286}}
+\newlabel{implementation_notes:formatter-regex-and-suffix}{{2.11.4}{184}{\texttt {formatter()}: \texttt {regex()} and \texttt {suffix()}}{subsubsection*.286}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.5}@product()}{184}{subsection.2.11.5}}
+\newlabel{implementation_notes:product}{{2.11.5}{184}{@product()}{subsection.2.11.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.6}\texttt {@permutations(...),} \texttt {@combinations(...),} \texttt {@combinations\_with\_replacement(...)}}{185}{subsection.2.11.6}}
+\newlabel{implementation_notes:permutations-combinations-combinations-with-replacement}{{2.11.6}{185}{\texttt {@permutations(...),} \texttt {@combinations(...),} \texttt {@combinations\_with\_replacement(...)}}{subsection.2.11.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.7}drmaa alternatives}{185}{subsection.2.11.7}}
+\newlabel{implementation_notes:drmaa-alternatives}{{2.11.7}{185}{drmaa alternatives}{subsection.2.11.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.8}Task completion monitoring}{185}{subsection.2.11.8}}
+\newlabel{implementation_notes:task-completion-monitoring}{{2.11.8}{185}{Task completion monitoring}{subsection.2.11.8}{}}
+\@writefile{toc}{\contentsline {subsubsection}{How easy is it to abstract out the database?}{185}{subsubsection*.287}}
+\newlabel{implementation_notes:how-easy-is-it-to-abstract-out-the-database}{{2.11.8}{185}{How easy is it to abstract out the database?}{subsubsection*.287}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Can we query the database, get Job history / stats?}{185}{subsubsection*.288}}
+\newlabel{implementation_notes:can-we-query-the-database-get-job-history-stats}{{2.11.8}{185}{Can we query the database, get Job history / stats?}{subsubsection*.288}{}}
+\@writefile{toc}{\contentsline {subsubsection}{What are the run time performance implications?}{186}{subsubsection*.289}}
+\newlabel{implementation_notes:what-are-the-run-time-performance-implications}{{2.11.8}{186}{What are the run time performance implications?}{subsubsection*.289}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Avoid pauses between tasks}{186}{subsubsection*.290}}
+\newlabel{implementation_notes:avoid-pauses-between-tasks}{{2.11.8}{186}{Avoid pauses between tasks}{subsubsection*.290}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.11.9}\texttt {@mkdir(...),}}{186}{subsection.2.11.9}}
+\newlabel{implementation_notes:mkdir}{{2.11.9}{186}{\texttt {@mkdir(...),}}{subsection.2.11.9}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.12}FAQ}{186}{section.2.12}}
+\newlabel{faq:glob}{{2.12}{186}{FAQ}{section.2.12}{}}
+\newlabel{faq:faq}{{2.12}{186}{FAQ}{section.2.12}{}}
+\newlabel{faq::doc}{{2.12}{186}{FAQ}{section.2.12}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.12.1}Citations}{186}{subsection.2.12.1}}
+\newlabel{faq:citations}{{2.12.1}{186}{Citations}{subsection.2.12.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. How should \emph {Ruffus} be cited in academic publications?}{186}{subsubsection*.291}}
+\newlabel{faq:q-how-should-ruffus-be-cited-in-academic-publications}{{2.12.1}{186}{Q. How should \emph {Ruffus} be cited in academic publications?}{subsubsection*.291}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.12.2}Good practices}{186}{subsection.2.12.2}}
+\newlabel{faq:good-practices}{{2.12.2}{186}{Good practices}{subsection.2.12.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. What is the best way of keeping my data and workings separate?}{186}{subsubsection*.292}}
+\newlabel{faq:q-what-is-the-best-way-of-keeping-my-data-and-workings-separate}{{2.12.2}{186}{Q. What is the best way of keeping my data and workings separate?}{subsubsection*.292}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. What is the best way of handling data in file pairs (or triplets etc.)}{187}{subsubsection*.293}}
+\newlabel{faq:q-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc}{{2.12.2}{187}{Q. What is the best way of handling data in file pairs (or triplets etc.)}{subsubsection*.293}{}}
+\newlabel{faq:faq-paired-files}{{2.12.2}{187}{Q. What is the best way of handling data in file pairs (or triplets etc.)}{subsubsection*.293}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.12.3}General}{188}{subsection.2.12.3}}
+\newlabel{faq:general}{{2.12.3}{188}{General}{subsection.2.12.3}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. \emph {Ruffus} won't create dependency graphs}{188}{subsubsection*.294}}
+\newlabel{faq:q-ruffus-won-t-create-dependency-graphs}{{2.12.3}{188}{Q. \emph {Ruffus} won't create dependency graphs}{subsubsection*.294}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. \emph {Ruffus} seems to be hanging in the same place}{188}{subsubsection*.295}}
+\newlabel{faq:q-ruffus-seems-to-be-hanging-in-the-same-place}{{2.12.3}{188}{Q. \emph {Ruffus} seems to be hanging in the same place}{subsubsection*.295}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Regular expression substitutions don't work}{188}{subsubsection*.296}}
+\newlabel{faq:q-regular-expression-substitutions-don-t-work}{{2.12.3}{188}{Q. Regular expression substitutions don't work}{subsubsection*.296}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. How to force a pipeline to appear up to date?}{189}{subsubsection*.297}}
+\newlabel{faq:q-how-to-force-a-pipeline-to-appear-up-to-date}{{2.12.3}{189}{Q. How to force a pipeline to appear up to date?}{subsubsection*.297}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. How can I use my own decorators with Ruffus?}{189}{subsubsection*.298}}
+\newlabel{faq:q-how-can-i-use-my-own-decorators-with-ruffus}{{2.12.3}{189}{Q. How can I use my own decorators with Ruffus?}{subsubsection*.298}{}}
+\@writefile{toc}{\contentsline {paragraph}{1. Use @wraps from \texttt {functools} or Michele Simionato's decorator module}{189}{paragraph*.299}}
+\newlabel{faq:use-wraps-from-functools-or-michele-simionato-s-decorator-module}{{2.12.3}{189}{1. Use @wraps from \texttt {functools} or Michele Simionato's decorator module}{paragraph*.299}{}}
+\@writefile{toc}{\contentsline {paragraph}{2. Always call Ruffus decorators first before your own decorators.}{189}{paragraph*.300}}
+\newlabel{faq:always-call-ruffus-decorators-first-before-your-own-decorators}{{2.12.3}{189}{2. Always call Ruffus decorators first before your own decorators}{paragraph*.300}{}}
+\@writefile{toc}{\contentsline {paragraph}{Example decorator:}{190}{paragraph*.301}}
+\newlabel{faq:example-decorator}{{2.12.3}{190}{Example decorator:}{paragraph*.301}{}}
+\@writefile{toc}{\contentsline {paragraph}{1. Using functools @wraps}{190}{paragraph*.302}}
+\newlabel{faq:using-functools-wraps}{{2.12.3}{190}{1. Using functools @wraps}{paragraph*.302}{}}
+\@writefile{toc}{\contentsline {paragraph}{2. Using Michele Simionato's decorator module}{191}{paragraph*.303}}
+\newlabel{faq:using-michele-simionato-s-decorator-module}{{2.12.3}{191}{2. Using Michele Simionato's decorator module}{paragraph*.303}{}}
+\@writefile{toc}{\contentsline {paragraph}{2. By hand, using a callable object}{191}{paragraph*.304}}
+\newlabel{faq:by-hand-using-a-callable-object}{{2.12.3}{191}{2. By hand, using a callable object}{paragraph*.304}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Can a task function in a \emph {Ruffus} pipeline be called normally outside of Ruffus?}{191}{subsubsection*.305}}
+\newlabel{faq:q-can-a-task-function-in-a-ruffus-pipeline-be-called-normally-outside-of-ruffus}{{2.12.3}{191}{Q. Can a task function in a \emph {Ruffus} pipeline be called normally outside of Ruffus?}{subsubsection*.305}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. My \emph {Ruffus} tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?}{191}{subsubsection*.306}}
+\newlabel{faq:q-my-ruffus-tasks-create-two-files-at-a-time-why-is-the-second-one-ignored-in-successive-stages-of-my-pipeline}{{2.12.3}{191}{Q. My \emph {Ruffus} tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?}{subsubsection*.306}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. How can a \emph {Ruffus} task produce output which goes off in different directions?}{192}{subsubsection*.307}}
+\newlabel{faq:q-how-can-a-ruffus-task-produce-output-which-goes-off-in-different-directions}{{2.12.3}{192}{Q. How can a \emph {Ruffus} task produce output which goes off in different directions?}{subsubsection*.307}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Can I call extra code before each job?}{193}{subsubsection*.308}}
+\newlabel{faq:q-can-i-call-extra-code-before-each-job}{{2.12.3}{193}{Q. Can I call extra code before each job?}{subsubsection*.308}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Does \emph {Ruffus} allow checkpointing: to distinguish interrupted and completed results?}{194}{subsubsection*.309}}
+\newlabel{faq:q-does-ruffus-allow-checkpointing-to-distinguish-interrupted-and-completed-results}{{2.12.3}{194}{Q. Does \emph {Ruffus} allow checkpointing: to distinguish interrupted and completed results?}{subsubsection*.309}{}}
+\@writefile{toc}{\contentsline {paragraph}{A. Use the builtin sqlite checkpointing}{194}{paragraph*.310}}
+\newlabel{faq:a-use-the-builtin-sqlite-checkpointing}{{2.12.3}{194}{A. Use the builtin sqlite checkpointing}{paragraph*.310}{}}
+\@writefile{toc}{\contentsline {paragraph}{A. Use a flag file}{194}{paragraph*.311}}
+\newlabel{faq:a-use-a-flag-file}{{2.12.3}{194}{A. Use a flag file}{paragraph*.311}{}}
+\@writefile{toc}{\contentsline {paragraph}{A. Use a temp file}{195}{paragraph*.312}}
+\newlabel{faq:a-use-a-temp-file}{{2.12.3}{195}{A. Use a temp file}{paragraph*.312}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.12.4}Windows}{196}{subsection.2.12.4}}
+\newlabel{faq:windows}{{2.12.4}{196}{Windows}{subsection.2.12.4}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Windows seems to spawn \emph {ruffus} processes recursively}{196}{subsubsection*.313}}
+\newlabel{faq:q-windows-seems-to-spawn-ruffus-processes-recursively}{{2.12.4}{196}{Q. Windows seems to spawn \emph {ruffus} processes recursively}{subsubsection*.313}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.12.5}Sun Grid Engine / PBS / SLURM etc}{196}{subsection.2.12.5}}
+\newlabel{faq:sun-grid-engine-pbs-slurm-etc}{{2.12.5}{196}{Sun Grid Engine / PBS / SLURM etc}{subsection.2.12.5}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Can Ruffus be used to manage a cluster or grid based pipeline?}{196}{subsubsection*.314}}
+\newlabel{faq:q-can-ruffus-be-used-to-manage-a-cluster-or-grid-based-pipeline}{{2.12.5}{196}{Q. Can Ruffus be used to manage a cluster or grid based pipeline?}{subsubsection*.314}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies}{197}{subsubsection*.315}}
+\newlabel{faq:q-when-i-submit-lots-of-jobs-via-sun-grid-engine-sge-the-head-node-occassionally-freezes-and-dies}{{2.12.5}{197}{Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies}{subsubsection*.315}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Q. Keeping Large intermediate files}{197}{subsubsection*.316}}
+\newlabel{faq:q-keeping-large-intermediate-files}{{2.12.5}{197}{Q. Keeping Large intermediate files}{subsubsection*.316}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.12.6}Sharing python objects between Ruffus processes running concurrently}{197}{subsection.2.12.6}}
+\newlabel{faq:sharing-python-objects-between-ruffus-processes-running-concurrently}{{2.12.6}{197}{Sharing python objects between Ruffus processes running concurrently}{subsection.2.12.6}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Can ordinary python objects be shared between processes?}{198}{subsubsection*.317}}
+\newlabel{faq:can-ordinary-python-objects-be-shared-between-processes}{{2.12.6}{198}{Can ordinary python objects be shared between processes?}{subsubsection*.317}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Why am I getting \texttt {PicklingError}?}{198}{subsubsection*.318}}
+\newlabel{faq:why-am-i-getting-picklingerror}{{2.12.6}{198}{Why am I getting \texttt {PicklingError}?}{subsubsection*.318}{}}
+\@writefile{toc}{\contentsline {subsubsection}{How about synchronising python objects in real time?}{199}{subsubsection*.319}}
+\newlabel{faq:id1}{{2.12.6}{199}{How about synchronising python objects in real time?}{subsubsection*.319}{}}
+\newlabel{faq:how-about-synchronising-python-objects-in-real-time}{{2.12.6}{199}{How about synchronising python objects in real time?}{subsubsection*.319}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Can I share and synchronise my own python classes via proxies?}{199}{subsubsection*.320}}
+\newlabel{faq:can-i-share-and-synchronise-my-own-python-classes-via-proxies}{{2.12.6}{199}{Can I share and synchronise my own python classes via proxies?}{subsubsection*.320}{}}
+\@writefile{toc}{\contentsline {subsubsection}{How do I send python objects back and forth without tangling myself in horrible synchronisation code?}{200}{subsubsection*.321}}
+\newlabel{faq:how-do-i-send-python-objects-back-and-forth-without-tangling-myself-in-horrible-synchronisation-code}{{2.12.6}{200}{How do I send python objects back and forth without tangling myself in horrible synchronisation code?}{subsubsection*.321}{}}
+\@writefile{toc}{\contentsline {subsubsection}{How do I share large amounts of data efficiently across processes?}{200}{subsubsection*.322}}
+\newlabel{faq:how-do-i-share-large-amounts-of-data-efficiently-across-processes}{{2.12.6}{200}{How do I share large amounts of data efficiently across processes?}{subsubsection*.322}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.13}Glossary}{200}{section.2.13}}
+\newlabel{glossary:glossary}{{2.13}{200}{Glossary}{section.2.13}{}}
+\newlabel{glossary:glob}{{2.13}{200}{Glossary}{section.2.13}{}}
+\newlabel{glossary::doc}{{2.13}{200}{Glossary}{section.2.13}{}}
+\newlabel{glossary:id1}{{2.13}{200}{Glossary}{section*.323}{}}
+\newlabel{glossary:glossary-task}{{2.13}{200}{Glossary}{section*.324}{}}
+\newlabel{glossary:term-task}{{2.13}{200}{Glossary}{section*.325}{}}
+\newlabel{glossary:term-job}{{2.13}{200}{Glossary}{section*.326}{}}
+\newlabel{glossary:term-decorator}{{2.13}{201}{Glossary}{section*.327}{}}
+\newlabel{glossary:term-generator}{{2.13}{201}{Glossary}{section*.328}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.14}Hall of Fame: User contributed flowcharts}{201}{section.2.14}}
+\newlabel{gallery:hall-of-fame-user-contributed-flowcharts}{{2.14}{201}{Hall of Fame: User contributed flowcharts}{section.2.14}{}}
+\newlabel{gallery::doc}{{2.14}{201}{Hall of Fame: User contributed flowcharts}{section.2.14}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.14.1}RNASeq pipeline}{201}{subsection.2.14.1}}
+\newlabel{gallery:rnaseq-pipeline}{{2.14.1}{201}{RNASeq pipeline}{subsection.2.14.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.14.2}non-coding evolutionary constraints}{203}{subsection.2.14.2}}
+\newlabel{gallery:non-coding-evolutionary-constraints}{{2.14.2}{203}{non-coding evolutionary constraints}{subsection.2.14.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.14.3}SNP annotation}{203}{subsection.2.14.3}}
+\newlabel{gallery:snp-annotation}{{2.14.3}{203}{SNP annotation}{subsection.2.14.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.14.4}Chip-Seq analysis}{204}{subsection.2.14.4}}
+\newlabel{gallery:chip-seq-analysis}{{2.14.4}{204}{Chip-Seq analysis}{subsection.2.14.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.15}Why \emph {Ruffus}?}{204}{section.2.15}}
+\newlabel{why_ruffus:index-0}{{2.15}{204}{Why \emph {Ruffus}?}{section.2.15}{}}
+\newlabel{why_ruffus:why-ruffus}{{2.15}{204}{Why \emph {Ruffus}?}{section.2.15}{}}
+\newlabel{why_ruffus::doc}{{2.15}{204}{Why \emph {Ruffus}?}{section.2.15}{}}
+\newlabel{why_ruffus:design-why-ruffus}{{2.15}{204}{Why \emph {Ruffus}?}{section.2.15}{}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {3}Examples}{207}{chapter.3}}
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\newlabel{contents:examples}{{3}{207}{Examples}{chapter.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.1}Construction of a simple pipeline to run BLAST jobs}{207}{section.3.1}}
+\newlabel{examples/bioinformatics/index:construction-of-a-simple-pipeline-to-run-blast-jobs}{{3.1}{207}{Construction of a simple pipeline to run BLAST jobs}{section.3.1}{}}
+\newlabel{examples/bioinformatics/index::doc}{{3.1}{207}{Construction of a simple pipeline to run BLAST jobs}{section.3.1}{}}
+\newlabel{examples/bioinformatics/index:examples-bioinformatics-part1}{{3.1}{207}{Construction of a simple pipeline to run BLAST jobs}{section.3.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.1}Overview}{207}{subsection.3.1.1}}
+\newlabel{examples/bioinformatics/index:overview}{{3.1.1}{207}{Overview}{subsection.3.1.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.2}Prerequisites}{207}{subsection.3.1.2}}
+\newlabel{examples/bioinformatics/index:prerequisites}{{3.1.2}{207}{Prerequisites}{subsection.3.1.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{1. Ruffus}{207}{subsubsection*.329}}
+\newlabel{examples/bioinformatics/index:ruffus}{{3.1.2}{207}{1. Ruffus}{subsubsection*.329}{}}
+\@writefile{toc}{\contentsline {subsubsection}{2. BLAST}{208}{subsubsection*.330}}
+\newlabel{examples/bioinformatics/index:blast}{{3.1.2}{208}{2. BLAST}{subsubsection*.330}{}}
+\@writefile{toc}{\contentsline {subsubsection}{3. human refseq sequence database}{208}{subsubsection*.331}}
+\newlabel{examples/bioinformatics/index:human-refseq-sequence-database}{{3.1.2}{208}{3. human refseq sequence database}{subsubsection*.331}{}}
+\@writefile{toc}{\contentsline {subsubsection}{4. test sequences}{208}{subsubsection*.332}}
+\newlabel{examples/bioinformatics/index:test-sequences}{{3.1.2}{208}{4. test sequences}{subsubsection*.332}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.3}Code}{208}{subsection.3.1.3}}
+\newlabel{examples/bioinformatics/index:code}{{3.1.3}{208}{Code}{subsection.3.1.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.4}Step 1. Splitting up the query sequences}{208}{subsection.3.1.4}}
+\newlabel{examples/bioinformatics/index:step-1-splitting-up-the-query-sequences}{{3.1.4}{208}{Step 1. Splitting up the query sequences}{subsection.3.1.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.5}Step 2. Run BLAST jobs in parallel}{209}{subsection.3.1.5}}
+\newlabel{examples/bioinformatics/index:step-2-run-blast-jobs-in-parallel}{{3.1.5}{209}{Step 2. Run BLAST jobs in parallel}{subsection.3.1.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.6}Step 3. Combining BLAST results}{209}{subsection.3.1.6}}
+\newlabel{examples/bioinformatics/index:step-3-combining-blast-results}{{3.1.6}{209}{Step 3. Combining BLAST results}{subsection.3.1.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.7}Step 4. Running the pipeline}{210}{subsection.3.1.7}}
+\newlabel{examples/bioinformatics/index:step-4-running-the-pipeline}{{3.1.7}{210}{Step 4. Running the pipeline}{subsection.3.1.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.8}Step 5. Testing dependencies}{210}{subsection.3.1.8}}
+\newlabel{examples/bioinformatics/index:step-5-testing-dependencies}{{3.1.8}{210}{Step 5. Testing dependencies}{subsection.3.1.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.9}What is next?}{211}{subsection.3.1.9}}
+\newlabel{examples/bioinformatics/index:what-is-next}{{3.1.9}{211}{What is next?}{subsection.3.1.9}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.2}Part 2: A slightly more practical pipeline to run blasts jobs}{211}{section.3.2}}
+\newlabel{examples/bioinformatics/part2:part-2-a-slightly-more-practical-pipeline-to-run-blasts-jobs}{{3.2}{211}{Part 2: A slightly more practical pipeline to run blasts jobs}{section.3.2}{}}
+\newlabel{examples/bioinformatics/part2::doc}{{3.2}{211}{Part 2: A slightly more practical pipeline to run blasts jobs}{section.3.2}{}}
+\newlabel{examples/bioinformatics/part2:examples-bioinformatics-part2}{{3.2}{211}{Part 2: A slightly more practical pipeline to run blasts jobs}{section.3.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.1}Overview}{211}{subsection.3.2.1}}
+\newlabel{examples/bioinformatics/part2:overview}{{3.2.1}{211}{Overview}{subsection.3.2.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.2}Step 1. Cleaning up any leftover junk from previous pipeline runs}{212}{subsection.3.2.2}}
+\newlabel{examples/bioinformatics/part2:step-1-cleaning-up-any-leftover-junk-from-previous-pipeline-runs}{{3.2.2}{212}{Step 1. Cleaning up any leftover junk from previous pipeline runs}{subsection.3.2.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.3}Step 2. Adding a ``flag'' file to mark successful completion}{212}{subsection.3.2.3}}
+\newlabel{examples/bioinformatics/part2:step-2-adding-a-flag-file-to-mark-successful-completion}{{3.2.3}{212}{Step 2. Adding a ``flag'' file to mark successful completion}{subsection.3.2.3}{}}
+\newlabel{examples/bioinformatics/part2:examples-bioinformatics-part2-step2}{{3.2.3}{212}{Step 2. Adding a ``flag'' file to mark successful completion}{subsection.3.2.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.4}Step 3. Allowing the script to be invoked on the command line}{213}{subsection.3.2.4}}
+\newlabel{examples/bioinformatics/part2:step-3-allowing-the-script-to-be-invoked-on-the-command-line}{{3.2.4}{213}{Step 3. Allowing the script to be invoked on the command line}{subsection.3.2.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.5}Step 4. Printing out a flowchart for the pipeline}{213}{subsection.3.2.5}}
+\newlabel{examples/bioinformatics/part2:step-4-printing-out-a-flowchart-for-the-pipeline}{{3.2.5}{213}{Step 4. Printing out a flowchart for the pipeline}{subsection.3.2.5}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.6}Step 5. Errors}{213}{subsection.3.2.6}}
+\newlabel{examples/bioinformatics/part2:step-5-errors}{{3.2.6}{213}{Step 5. Errors}{subsection.3.2.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.7}Step 6. Will it run?}{214}{subsection.3.2.7}}
+\newlabel{examples/bioinformatics/part2:step-6-will-it-run}{{3.2.7}{214}{Step 6. Will it run?}{subsection.3.2.7}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.3}Ruffus code}{215}{section.3.3}}
+\newlabel{examples/bioinformatics/part1_code:examples-bioinformatics-part1-code}{{3.3}{215}{Ruffus code}{section.3.3}{}}
+\newlabel{examples/bioinformatics/part1_code:ruffus-code}{{3.3}{215}{Ruffus code}{section.3.3}{}}
+\newlabel{examples/bioinformatics/part1_code::doc}{{3.3}{215}{Ruffus code}{section.3.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.4}Ruffus code}{216}{section.3.4}}
+\newlabel{examples/bioinformatics/part2_code:ruffus-code}{{3.4}{216}{Ruffus code}{section.3.4}{}}
+\newlabel{examples/bioinformatics/part2_code:examples-bioinformatics-part2-code}{{3.4}{216}{Ruffus code}{section.3.4}{}}
+\newlabel{examples/bioinformatics/part2_code::doc}{{3.4}{216}{Ruffus code}{section.3.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.5}Example code for \emph {FAQ Good practices: ``What is the best way of handling data in file pairs (or triplets etc.)?''}}{220}{section.3.5}}
+\newlabel{examples/paired_end_data.py:example-code-for-faq-good-practices-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc}{{3.5}{220}{Example code for \emph {FAQ Good practices: ``What is the best way of handling data in file pairs (or triplets etc.)?''}}{section.3.5}{}}
+\newlabel{examples/paired_end_data.py:faq-paired-files-code}{{3.5}{220}{Example code for \emph {FAQ Good practices: ``What is the best way of handling data in file pairs (or triplets etc.)?''}}{section.3.5}{}}
+\newlabel{examples/paired_end_data.py::doc}{{3.5}{220}{Example code for \emph {FAQ Good practices: ``What is the best way of handling data in file pairs (or triplets etc.)?''}}{section.3.5}{}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {4}Reference:}{223}{chapter.4}}
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\newlabel{contents:reference}{{4}{223}{Reference:}{chapter.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.1}Decorators}{223}{section.4.1}}
+\newlabel{contents:decorators}{{4.1}{223}{Decorators}{section.4.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.1}Ruffus Decorators}{223}{subsection.4.1.1}}
+\newlabel{decorators/decorators:glob}{{4.1.1}{223}{Ruffus Decorators}{subsection.4.1.1}{}}
+\newlabel{decorators/decorators::doc}{{4.1.1}{223}{Ruffus Decorators}{subsection.4.1.1}{}}
+\newlabel{decorators/decorators:ruffus-decorators}{{4.1.1}{223}{Ruffus Decorators}{subsection.4.1.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {Core}}{223}{subsubsection*.333}}
+\newlabel{decorators/decorators:core}{{4.1.1}{223}{\emph {Core}}{subsubsection*.333}{}}
+\newlabel{decorators/decorators:decorators}{{4.1.1}{223}{\emph {Core}}{subsubsection*.333}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {Combinatorics}}{225}{subsubsection*.334}}
+\newlabel{decorators/decorators:combinatorics}{{4.1.1}{225}{\emph {Combinatorics}}{subsubsection*.334}{}}
+\newlabel{decorators/decorators:decorators-combinatorics}{{4.1.1}{225}{\emph {Combinatorics}}{subsubsection*.334}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {Advanced}}{227}{subsubsection*.335}}
+\newlabel{decorators/decorators:advanced}{{4.1.1}{227}{\emph {Advanced}}{subsubsection*.335}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {Esoteric!}}{228}{subsubsection*.336}}
+\newlabel{decorators/decorators:esoteric}{{4.1.1}{228}{\emph {Esoteric!}}{subsubsection*.336}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.2}Indicator Objects}{228}{subsection.4.1.2}}
+\newlabel{decorators/indicator_objects:decorators-indicator-objects}{{4.1.2}{228}{Indicator Objects}{subsection.4.1.2}{}}
+\newlabel{decorators/indicator_objects:index-0}{{4.1.2}{228}{Indicator Objects}{subsection.4.1.2}{}}
+\newlabel{decorators/indicator_objects::doc}{{4.1.2}{228}{Indicator Objects}{subsection.4.1.2}{}}
+\newlabel{decorators/indicator_objects:indicator-objects}{{4.1.2}{228}{Indicator Objects}{subsection.4.1.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {formatter}}{228}{subsubsection*.337}}
+\newlabel{decorators/indicator_objects:decorators-formatter}{{4.1.2}{228}{\emph {formatter}}{subsubsection*.337}{}}
+\newlabel{decorators/indicator_objects:formatter}{{4.1.2}{228}{\emph {formatter}}{subsubsection*.337}{}}
+\newlabel{decorators/indicator_objects:index-1}{{4.1.2}{228}{\emph {formatter}}{subsubsection*.337}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {suffix}}{231}{subsubsection*.338}}
+\newlabel{decorators/indicator_objects:decorators-suffix}{{4.1.2}{231}{\emph {suffix}}{subsubsection*.338}{}}
+\newlabel{decorators/indicator_objects:index-2}{{4.1.2}{231}{\emph {suffix}}{subsubsection*.338}{}}
+\newlabel{decorators/indicator_objects:suffix}{{4.1.2}{231}{\emph {suffix}}{subsubsection*.338}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {regex}}{232}{subsubsection*.339}}
+\newlabel{decorators/indicator_objects:decorators-regex}{{4.1.2}{232}{\emph {regex}}{subsubsection*.339}{}}
+\newlabel{decorators/indicator_objects:regex}{{4.1.2}{232}{\emph {regex}}{subsubsection*.339}{}}
+\newlabel{decorators/indicator_objects:index-3}{{4.1.2}{232}{\emph {regex}}{subsubsection*.339}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {add\_inputs}}{232}{subsubsection*.340}}
+\newlabel{decorators/indicator_objects:index-4}{{4.1.2}{232}{\emph {add\_inputs}}{subsubsection*.340}{}}
+\newlabel{decorators/indicator_objects:add-inputs}{{4.1.2}{232}{\emph {add\_inputs}}{subsubsection*.340}{}}
+\newlabel{decorators/indicator_objects:decorators-add-inputs}{{4.1.2}{232}{\emph {add\_inputs}}{subsubsection*.340}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {inputs}}{233}{subsubsection*.341}}
+\newlabel{decorators/indicator_objects:decorators-inputs}{{4.1.2}{233}{\emph {inputs}}{subsubsection*.341}{}}
+\newlabel{decorators/indicator_objects:index-5}{{4.1.2}{233}{\emph {inputs}}{subsubsection*.341}{}}
+\newlabel{decorators/indicator_objects:inputs}{{4.1.2}{233}{\emph {inputs}}{subsubsection*.341}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {mkdir}}{234}{subsubsection*.342}}
+\newlabel{decorators/indicator_objects:index-6}{{4.1.2}{234}{\emph {mkdir}}{subsubsection*.342}{}}
+\newlabel{decorators/indicator_objects:mkdir}{{4.1.2}{234}{\emph {mkdir}}{subsubsection*.342}{}}
+\newlabel{decorators/indicator_objects:decorators-indicator-objects-mkdir}{{4.1.2}{234}{\emph {mkdir}}{subsubsection*.342}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {touch\_file}}{234}{subsubsection*.343}}
+\newlabel{decorators/indicator_objects:touch-file}{{4.1.2}{234}{\emph {touch\_file}}{subsubsection*.343}{}}
+\newlabel{decorators/indicator_objects:index-7}{{4.1.2}{234}{\emph {touch\_file}}{subsubsection*.343}{}}
+\newlabel{decorators/indicator_objects:decorators-touch-file}{{4.1.2}{234}{\emph {touch\_file}}{subsubsection*.343}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {output\_from}}{235}{subsubsection*.344}}
+\newlabel{decorators/indicator_objects:index-8}{{4.1.2}{235}{\emph {output\_from}}{subsubsection*.344}{}}
+\newlabel{decorators/indicator_objects:decorators-output-from}{{4.1.2}{235}{\emph {output\_from}}{subsubsection*.344}{}}
+\newlabel{decorators/indicator_objects:output-from}{{4.1.2}{235}{\emph {output\_from}}{subsubsection*.344}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {combine}}{235}{subsubsection*.345}}
+\newlabel{decorators/indicator_objects:combine}{{4.1.2}{235}{\emph {combine}}{subsubsection*.345}{}}
+\newlabel{decorators/indicator_objects:decorators-combine}{{4.1.2}{235}{\emph {combine}}{subsubsection*.345}{}}
+\newlabel{decorators/indicator_objects:index-9}{{4.1.2}{235}{\emph {combine}}{subsubsection*.345}{}}
+\newlabel{decorators/originate:decorators-originate}{{4.1.2}{237}{\emph {combine}}{section*.346}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.3}@originate}{237}{subsection.4.1.3}}
+\newlabel{decorators/originate:originate}{{4.1.3}{237}{@originate}{subsection.4.1.3}{}}
+\newlabel{decorators/originate::doc}{{4.1.3}{237}{@originate}{subsection.4.1.3}{}}
+\newlabel{decorators/originate:decorators-originate-output-files}{{4.1.3}{237}{@originate}{section*.347}{}}
+\newlabel{decorators/originate:output-files}{{4.1.3}{237}{@originate}{section*.348}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.349}}
+\newlabel{decorators/originate:decorators-originate-extra-parameters}{{4.1.3}{237}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.349}{}}
+\newlabel{decorators/originate:originate-output-files-extra-parameters}{{4.1.3}{237}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.349}{}}
+\newlabel{decorators/originate:extra-parameters}{{4.1.3}{237}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.349}{}}
+\newlabel{decorators/originate:decorators-originate-output-files}{{4.1.3}{237}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{section*.350}{}}
+\newlabel{decorators/originate:decorators-originate-extra-parameters}{{4.1.3}{237}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{section*.351}{}}
+\newlabel{decorators/split:decorators-split}{{4.1.3}{237}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{section*.352}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.4}@split}{237}{subsection.4.1.4}}
+\newlabel{decorators/split::doc}{{4.1.4}{237}{@split}{subsection.4.1.4}{}}
+\newlabel{decorators/split:split}{{4.1.4}{237}{@split}{subsection.4.1.4}{}}
+\newlabel{decorators/split:decorators-split-tasks-or-file-names}{{4.1.4}{237}{@split}{section*.353}{}}
+\newlabel{decorators/split:tasks-or-file-names}{{4.1.4}{237}{@split}{section*.354}{}}
+\newlabel{decorators/split:decorators-split-extra-parameters}{{4.1.4}{237}{@split}{section*.355}{}}
+\newlabel{decorators/split:extra-parameters}{{4.1.4}{237}{@split}{section*.356}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.357}}
+\newlabel{decorators/split:output-files}{{4.1.4}{237}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.357}{}}
+\newlabel{decorators/split:decorators-split-output-files}{{4.1.4}{237}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.357}{}}
+\newlabel{decorators/split:split-tasks-or-file-names-output-files-extra-parameters}{{4.1.4}{237}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.357}{}}
+\newlabel{decorators/split:decorators-split-tasks-or-file-names}{{4.1.4}{237}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{section*.358}{}}
+\newlabel{decorators/split:decorators-split-output-files}{{4.1.4}{237}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{section*.359}{}}
+\newlabel{decorators/split:decorators-split-extra-parameters}{{4.1.4}{237}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{section*.360}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.5}@split with \texttt {regex(...)}, \texttt {add\_inputs} and \texttt {inputs}}{237}{subsection.4.1.5}}
+\newlabel{decorators/split:split-with-regex-add-inputs-and-inputs}{{4.1.5}{237}{@split with \texttt {regex(...)}, \texttt {add\_inputs} and \texttt {inputs}}{subsection.4.1.5}{}}
+\newlabel{decorators/transform:decorators-transform}{{4.1.5}{237}{@split with \texttt {regex(...)}, \texttt {add\_inputs} and \texttt {inputs}}{section*.361}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.6}@transform}{237}{subsection.4.1.6}}
+\newlabel{decorators/transform::doc}{{4.1.6}{237}{@transform}{subsection.4.1.6}{}}
+\newlabel{decorators/transform:transform}{{4.1.6}{237}{@transform}{subsection.4.1.6}{}}
+\newlabel{decorators/transform:decorators-transform-tasks-or-file-names}{{4.1.6}{237}{@transform}{section*.362}{}}
+\newlabel{decorators/transform:tasks-or-file-names}{{4.1.6}{237}{@transform}{section*.363}{}}
+\newlabel{decorators/transform:decorators-transform-extra-parameters}{{4.1.6}{237}{@transform}{section*.364}{}}
+\newlabel{decorators/transform:extra-parameters}{{4.1.6}{237}{@transform}{section*.365}{}}
+\newlabel{decorators/transform:decorators-transform-output-pattern}{{4.1.6}{237}{@transform}{section*.366}{}}
+\newlabel{decorators/transform:output-pattern}{{4.1.6}{237}{@transform}{section*.367}{}}
+\newlabel{decorators/transform:decorators-transform-matching-regex}{{4.1.6}{237}{@transform}{section*.368}{}}
+\newlabel{decorators/transform:matching-regex}{{4.1.6}{237}{@transform}{section*.369}{}}
+\newlabel{decorators/transform:decorators-transform-matching-formatter}{{4.1.6}{237}{@transform}{section*.370}{}}
+\newlabel{decorators/transform:matching-formatter}{{4.1.6}{237}{@transform}{section*.371}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.372}}
+\newlabel{decorators/transform:suffix-string}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.372}{}}
+\newlabel{decorators/transform:transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{ [...]
+\newlabel{decorators/transform:decorators-transform-suffix-string}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.372}{}}
+\newlabel{decorators/transform:decorators-transform-tasks-or-file-names}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.373}{}}
+\newlabel{decorators/transform:decorators-transform-suffix-string}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.374}{}}
+\newlabel{decorators/transform:decorators-transform-matching-regex}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.375}{}}
+\newlabel{decorators/transform:decorators-transform-matching-formatter}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.376}{}}
+\newlabel{decorators/transform:decorators-transform-output-pattern}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.377}{}}
+\newlabel{decorators/transform:decorators-transform-extra-parameters}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.378}{}}
+\newlabel{decorators/merge:decorators-merge}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.379}{}}
+\newlabel{decorators/merge:decorators-merge-tasks-or-file-names}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.380}{}}
+\newlabel{decorators/merge:tasks-or-file-names}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.381}{}}
+\newlabel{decorators/merge:decorators-merge-extra-parameters}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.382}{}}
+\newlabel{decorators/merge:extra-parameters}{{4.1.6}{237}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.383}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.7}@merge}{237}{subsection.4.1.7}}
+\newlabel{decorators/merge:decorators-merge-output-file}{{4.1.7}{237}{@merge}{subsection.4.1.7}{}}
+\newlabel{decorators/merge:merge}{{4.1.7}{237}{@merge}{subsection.4.1.7}{}}
+\newlabel{decorators/merge::doc}{{4.1.7}{237}{@merge}{subsection.4.1.7}{}}
+\newlabel{decorators/merge:output-file}{{4.1.7}{237}{@merge}{subsection.4.1.7}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@merge} ( \emph {tasks\_or\_file\_names}, \emph {output\_file}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.384}}
+\newlabel{decorators/merge:merge-tasks-or-file-names-output-file-extra-parameters}{{4.1.7}{237}{\emph {@merge} ( \emph {tasks\_or\_file\_names}, \emph {output\_file}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.384}{}}
+\newlabel{decorators/merge:decorators-merge-tasks-or-file-names}{{4.1.7}{237}{\emph {@merge} ( \emph {tasks\_or\_file\_names}, \emph {output\_file}, {[}\emph {extra\_parameters},...{]} )}{section*.385}{}}
+\newlabel{decorators/merge:decorators-merge-output-file}{{4.1.7}{237}{\emph {@merge} ( \emph {tasks\_or\_file\_names}, \emph {output\_file}, {[}\emph {extra\_parameters},...{]} )}{section*.386}{}}
+\newlabel{decorators/merge:decorators-merge-extra-parameters}{{4.1.7}{237}{\emph {@merge} ( \emph {tasks\_or\_file\_names}, \emph {output\_file}, {[}\emph {extra\_parameters},...{]} )}{section*.387}{}}
+\newlabel{decorators/subdivide:decorators-subdivide}{{4.1.2}{239}{\emph {combine}}{section*.388}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.8}@subdivide}{239}{subsection.4.1.8}}
+\newlabel{decorators/subdivide:subdivide}{{4.1.8}{239}{@subdivide}{subsection.4.1.8}{}}
+\newlabel{decorators/subdivide::doc}{{4.1.8}{239}{@subdivide}{subsection.4.1.8}{}}
+\newlabel{decorators/subdivide:decorators-subdivide-tasks-or-file-names}{{4.1.8}{239}{@subdivide}{section*.389}{}}
+\newlabel{decorators/subdivide:tasks-or-file-names}{{4.1.8}{239}{@subdivide}{section*.390}{}}
+\newlabel{decorators/subdivide:decorators-subdivide-extra-parameters}{{4.1.8}{239}{@subdivide}{section*.391}{}}
+\newlabel{decorators/subdivide:extra-parameters}{{4.1.8}{239}{@subdivide}{section*.392}{}}
+\newlabel{decorators/subdivide:decorators-subdivide-output-pattern}{{4.1.8}{239}{@subdivide}{section*.393}{}}
+\newlabel{decorators/subdivide:output-pattern}{{4.1.8}{239}{@subdivide}{section*.394}{}}
+\newlabel{decorators/subdivide:decorators-subdivide-matching-regex}{{4.1.8}{239}{@subdivide}{section*.395}{}}
+\newlabel{decorators/subdivide:matching-regex}{{4.1.8}{239}{@subdivide}{section*.396}{}}
+\newlabel{decorators/subdivide:decorators-subdivide-matching-formatter}{{4.1.8}{239}{@subdivide}{section*.397}{}}
+\newlabel{decorators/subdivide:matching-formatter}{{4.1.8}{239}{@subdivide}{section*.398}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.399}}
+\newlabel{decorators/subdivide:input-pattern-or-glob}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.399}{}}
+\newlabel{decorators/subdivide:decorators-subdivide-input-pattern-or-glob}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} ) [...]
+\newlabel{decorators/subdivide:subdivide-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph { [...]
+\newlabel{decorators/subdivide:decorators-subdivide-tasks-or-file-names}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{ [...]
+\newlabel{decorators/subdivide:decorators-subdivide-matching-regex}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{secti [...]
+\newlabel{decorators/subdivide:decorators-subdivide-matching-formatter}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{s [...]
+\newlabel{decorators/subdivide:decorators-subdivide-output-pattern}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{secti [...]
+\newlabel{decorators/subdivide:decorators-subdivide-input-pattern-or-glob}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} ) [...]
+\newlabel{decorators/subdivide:decorators-subdivide-extra-parameters}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{sec [...]
+\newlabel{decorators/transform_ex:decorators-transform-ex}{{4.1.8}{239}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.406}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.9}@transform with \texttt {add\_inputs} and \texttt {inputs}}{239}{subsection.4.1.9}}
+\newlabel{decorators/transform_ex:transform-with-add-inputs-and-inputs}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{subsection.4.1.9}{}}
+\newlabel{decorators/transform_ex::doc}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{subsection.4.1.9}{}}
+\newlabel{decorators/transform_ex:decorators-transform-tasks-or-file-names}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.407}{}}
+\newlabel{decorators/transform_ex:tasks-or-file-names}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.408}{}}
+\newlabel{decorators/transform_ex:decorators-transform-extra-parameters}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.409}{}}
+\newlabel{decorators/transform_ex:extra-parameters}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.410}{}}
+\newlabel{decorators/transform_ex:decorators-transform-output-pattern}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.411}{}}
+\newlabel{decorators/transform_ex:output-pattern}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.412}{}}
+\newlabel{decorators/transform_ex:decorators-transform-input-pattern-or-glob}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.413}{}}
+\newlabel{decorators/transform_ex:input-pattern-or-glob}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.414}{}}
+\newlabel{decorators/transform_ex:decorators-transform-matching-regex}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.415}{}}
+\newlabel{decorators/transform_ex:matching-regex}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.416}{}}
+\newlabel{decorators/transform_ex:decorators-transform-matching-formatter}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.417}{}}
+\newlabel{decorators/transform_ex:matching-formatter}{{4.1.9}{239}{@transform with \texttt {add\_inputs} and \texttt {inputs}}{section*.418}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubs [...]
+\newlabel{decorators/transform_ex:suffix-string}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.419}{}}
+\newlabel{decorators/transform_ex:transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-inputs-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textb [...]
+\newlabel{decorators/transform_ex:decorators-transform-suffix-string}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} [...]
+\newlabel{decorators/transform_ex:decorators-transform-tasks-or-file-names}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},. [...]
+\newlabel{decorators/transform_ex:decorators-transform-suffix-string}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} [...]
+\newlabel{decorators/transform_ex:decorators-transform-matching-regex}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} [...]
+\newlabel{decorators/transform_ex:decorators-transform-matching-formatter}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},.. [...]
+\newlabel{decorators/transform_ex:decorators-transform-input-pattern-or-glob}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters} [...]
+\newlabel{decorators/transform_ex:decorators-transform-output-pattern}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} [...]
+\newlabel{decorators/transform_ex:decorators-transform-extra-parameters}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{ [...]
+\newlabel{decorators/collate:decorators-collate}{{4.1.9}{239}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.427}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.10}@collate}{239}{subsection.4.1.10}}
+\newlabel{decorators/collate::doc}{{4.1.10}{239}{@collate}{subsection.4.1.10}{}}
+\newlabel{decorators/collate:collate}{{4.1.10}{239}{@collate}{subsection.4.1.10}{}}
+\newlabel{decorators/collate:decorators-collate-tasks-or-file-names}{{4.1.10}{239}{@collate}{section*.428}{}}
+\newlabel{decorators/collate:tasks-or-file-names}{{4.1.10}{239}{@collate}{section*.429}{}}
+\newlabel{decorators/collate:decorators-collate-extra-parameters}{{4.1.10}{239}{@collate}{section*.430}{}}
+\newlabel{decorators/collate:extra-parameters}{{4.1.10}{239}{@collate}{section*.431}{}}
+\newlabel{decorators/collate:decorators-collate-output-pattern}{{4.1.10}{239}{@collate}{section*.432}{}}
+\newlabel{decorators/collate:output-pattern}{{4.1.10}{239}{@collate}{section*.433}{}}
+\newlabel{decorators/collate:decorators-collate-matching-regex}{{4.1.10}{239}{@collate}{section*.434}{}}
+\newlabel{decorators/collate:matching-regex}{{4.1.10}{239}{@collate}{section*.435}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.436}}
+\newlabel{decorators/collate:collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.436}{}}
+\newlabel{decorators/collate:matching-formatter}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.436}{}}
+\newlabel{decorators/collate:decorators-collate-matching-formatter}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.436}{}}
+\newlabel{decorators/collate:decorators-collate-tasks-or-file-names}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.437}{}}
+\newlabel{decorators/collate:decorators-collate-matching-regex}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.438}{}}
+\newlabel{decorators/collate:decorators-collate-matching-formatter}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.439}{}}
+\newlabel{decorators/collate:decorators-collate-output-pattern}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.440}{}}
+\newlabel{decorators/collate:decorators-collate-extra-parameters}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.441}{}}
+\newlabel{decorators/collate_ex:decorators-collate-ex}{{4.1.10}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.442}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.11}@collate with \texttt {add\_inputs} and \texttt {inputs}}{239}{subsection.4.1.11}}
+\newlabel{decorators/collate_ex:collate-with-add-inputs-and-inputs}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{subsection.4.1.11}{}}
+\newlabel{decorators/collate_ex::doc}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{subsection.4.1.11}{}}
+\newlabel{decorators/collate_ex:decorators-collate-ex-tasks-or-file-names}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.443}{}}
+\newlabel{decorators/collate_ex:tasks-or-file-names}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.444}{}}
+\newlabel{decorators/collate_ex:decorators-collate-ex-extra-parameters}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.445}{}}
+\newlabel{decorators/collate_ex:extra-parameters}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.446}{}}
+\newlabel{decorators/collate_ex:decorators-collate-ex-output-pattern}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.447}{}}
+\newlabel{decorators/collate_ex:output-pattern}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.448}{}}
+\newlabel{decorators/collate_ex:decorators-collate-ex-input-pattern-or-glob}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.449}{}}
+\newlabel{decorators/collate_ex:input-pattern-or-glob}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.450}{}}
+\newlabel{decorators/collate_ex:decorators-collate-ex-matching-regex}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.451}{}}
+\newlabel{decorators/collate_ex:matching-regex}{{4.1.11}{239}{@collate with \texttt {add\_inputs} and \texttt {inputs}}{section*.452}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.453}}
+\newlabel{decorators/collate_ex:decorators-collate-ex-matching-formatter}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{sub [...]
+\newlabel{decorators/collate_ex:matching-formatter}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.453}{}}
+\newlabel{decorators/collate_ex:collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\ [...]
+\newlabel{decorators/collate_ex:decorators-collate-ex-tasks-or-file-names}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{se [...]
+\newlabel{decorators/collate_ex:decorators-collate-ex-matching-regex}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section [...]
+\newlabel{decorators/collate_ex:decorators-collate-ex-matching-formatter}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{sec [...]
+\newlabel{decorators/collate_ex:decorators-collate-ex-input-pattern-or-glob}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{ [...]
+\newlabel{decorators/collate_ex:decorators-collate-ex-output-pattern}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section [...]
+\newlabel{decorators/collate_ex:decorators-collate-ex-extra-parameters}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{secti [...]
+\newlabel{decorators/graphviz:decorators-graphviz}{{4.1.11}{239}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.460}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.12}@graphviz}{239}{subsection.4.1.12}}
+\newlabel{decorators/graphviz::doc}{{4.1.12}{239}{@graphviz}{subsection.4.1.12}{}}
+\newlabel{decorators/graphviz:graphviz}{{4.1.12}{239}{@graphviz}{subsection.4.1.12}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{239}{subsubsection*.461}}
+\newlabel{decorators/graphviz:graphviz-graphviz-parameters}{{4.1.12}{239}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{subsubsection*.461}{}}
+\newlabel{decorators/graphviz:decorators-graphviz-graphviz-parameters}{{4.1.12}{239}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{subsubsection*.461}{}}
+\newlabel{decorators/graphviz:graphviz-parameters}{{4.1.12}{239}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{subsubsection*.461}{}}
+\newlabel{decorators/graphviz:decorators-graphviz-graphviz-parameters}{{4.1.12}{239}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{section*.462}{}}
+\newlabel{decorators/mkdir:decorators-mkdir}{{4.1.12}{239}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{section*.463}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.13}@mkdir}{239}{subsection.4.1.13}}
+\newlabel{decorators/mkdir:mkdir}{{4.1.13}{239}{@mkdir}{subsection.4.1.13}{}}
+\newlabel{decorators/mkdir::doc}{{4.1.13}{239}{@mkdir}{subsection.4.1.13}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-tasks-or-file-names}{{4.1.13}{239}{@mkdir}{section*.464}{}}
+\newlabel{decorators/mkdir:tasks-or-file-names}{{4.1.13}{239}{@mkdir}{section*.465}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-output-pattern}{{4.1.13}{239}{@mkdir}{section*.466}{}}
+\newlabel{decorators/mkdir:output-pattern}{{4.1.13}{239}{@mkdir}{section*.467}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-matching-regex}{{4.1.13}{239}{@mkdir}{section*.468}{}}
+\newlabel{decorators/mkdir:matching-regex}{{4.1.13}{239}{@mkdir}{section*.469}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-matching-formatter}{{4.1.13}{239}{@mkdir}{section*.470}{}}
+\newlabel{decorators/mkdir:matching-formatter}{{4.1.13}{239}{@mkdir}{section*.471}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{239}{subsubsection*.472}}
+\newlabel{decorators/mkdir:suffix-string}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{subsubsection*.472}{}}
+\newlabel{decorators/mkdir:mkdir-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{subsubsection*.472}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-suffix-string}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{subsubsection*.472}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-tasks-or-file-names}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{section*.473}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-suffix-string}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{section*.474}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-matching-regex}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{section*.475}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-matching-formatter}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{section*.476}{}}
+\newlabel{decorators/mkdir:decorators-mkdir-output-pattern}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{section*.477}{}}
+\newlabel{decorators/jobs_limit:decorators-jobs-limit}{{4.1.13}{239}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{section*.478}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.14}@jobs\_limit}{239}{subsection.4.1.14}}
+\newlabel{decorators/jobs_limit:jobs-limit}{{4.1.14}{239}{@jobs\_limit}{subsection.4.1.14}{}}
+\newlabel{decorators/jobs_limit::doc}{{4.1.14}{239}{@jobs\_limit}{subsection.4.1.14}{}}
+\newlabel{decorators/jobs_limit:decorators-jobs-limit-maximum-num-of-jobs}{{4.1.14}{239}{@jobs\_limit}{section*.479}{}}
+\newlabel{decorators/jobs_limit:maximum-num-of-jobs}{{4.1.14}{239}{@jobs\_limit}{section*.480}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{239}{subsubsection*.481}}
+\newlabel{decorators/jobs_limit:decorators-jobs-limit-name}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{subsubsection*.481}{}}
+\newlabel{decorators/jobs_limit:name}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{subsubsection*.481}{}}
+\newlabel{decorators/jobs_limit:jobs-limit-maximum-num-of-jobs-name}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{subsubsection*.481}{}}
+\newlabel{decorators/jobs_limit:decorators-jobs-limit-maximum-num-of-jobs}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{section*.482}{}}
+\newlabel{decorators/jobs_limit:decorators-jobs-limit-name}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{section*.483}{}}
+\newlabel{decorators/posttask:decorators-posttask}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{section*.484}{}}
+\newlabel{decorators/posttask:decorators-posttask-function}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{section*.485}{}}
+\newlabel{decorators/posttask:function}{{4.1.14}{239}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{section*.486}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.15}@posttask}{239}{subsection.4.1.15}}
+\newlabel{decorators/posttask:decorators-posttask-file-name}{{4.1.15}{239}{@posttask}{subsection.4.1.15}{}}
+\newlabel{decorators/posttask::doc}{{4.1.15}{239}{@posttask}{subsection.4.1.15}{}}
+\newlabel{decorators/posttask:posttask}{{4.1.15}{239}{@posttask}{subsection.4.1.15}{}}
+\newlabel{decorators/posttask:file-name}{{4.1.15}{239}{@posttask}{subsection.4.1.15}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@posttask} (\emph {function} \textbar {} \emph {touch\_file}\emph {(}\emph {file\_name}\emph {)})}{239}{subsubsection*.487}}
+\newlabel{decorators/posttask:posttask-function-touch-file-file-name}{{4.1.15}{239}{\emph {@posttask} (\emph {function} \textbar {} \emph {touch\_file}\emph {(}\emph {file\_name}\emph {)})}{subsubsection*.487}{}}
+\newlabel{decorators/posttask:decorators-posttask-function}{{4.1.15}{239}{\emph {@posttask} (\emph {function} \textbar {} \emph {touch\_file}\emph {(}\emph {file\_name}\emph {)})}{section*.488}{}}
+\newlabel{decorators/posttask:decorators-posttask-file-name}{{4.1.15}{239}{\emph {@posttask} (\emph {function} \textbar {} \emph {touch\_file}\emph {(}\emph {file\_name}\emph {)})}{section*.489}{}}
+\newlabel{decorators/active_if:decorators-active-if}{{4.1.15}{239}{\emph {@posttask} (\emph {function} \textbar {} \emph {touch\_file}\emph {(}\emph {file\_name}\emph {)})}{section*.490}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.16}@active\_if}{239}{subsection.4.1.16}}
+\newlabel{decorators/active_if:active-if}{{4.1.16}{239}{@active\_if}{subsection.4.1.16}{}}
+\newlabel{decorators/active_if::doc}{{4.1.16}{239}{@active\_if}{subsection.4.1.16}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{239}{subsubsection*.491}}
+\newlabel{decorators/active_if:active-if-on-or-off1-on-or-off2}{{4.1.16}{239}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{subsubsection*.491}{}}
+\newlabel{decorators/active_if:decorators-active-if-on-or-off}{{4.1.16}{239}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{subsubsection*.491}{}}
+\newlabel{decorators/active_if:on-or-off}{{4.1.16}{239}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{subsubsection*.491}{}}
+\newlabel{decorators/active_if:decorators-active-if-on-or-off}{{4.1.16}{239}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{section*.492}{}}
+\newlabel{decorators/follows:decorators-follows}{{4.1.16}{239}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{section*.493}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.17}@follows}{239}{subsection.4.1.17}}
+\newlabel{decorators/follows:follows}{{4.1.17}{239}{@follows}{subsection.4.1.17}{}}
+\newlabel{decorators/follows::doc}{{4.1.17}{239}{@follows}{subsection.4.1.17}{}}
+\newlabel{decorators/follows:decorators-follows-mkdir}{{4.1.17}{239}{@follows}{section*.494}{}}
+\newlabel{decorators/follows:decorators-follows-task}{{4.1.17}{239}{@follows}{section*.495}{}}
+\newlabel{decorators/follows:task}{{4.1.17}{239}{@follows}{section*.496}{}}
+\newlabel{decorators/follows:decorators-follows-task-name}{{4.1.17}{239}{@follows}{section*.497}{}}
+\newlabel{decorators/follows:task-name}{{4.1.17}{239}{@follows}{section*.498}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{239}{subsubsection*.499}}
+\newlabel{decorators/follows:directory-name}{{4.1.17}{239}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{subsubsection*.499}{}}
+\newlabel{decorators/follows:decorators-follows-directory-name}{{4.1.17}{239}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{subsubsection*.499}{}}
+\newlabel{decorators/follows:follows-task-task-name-mkdir-directory-name-more-tasks}{{4.1.17}{239}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{subsubsection*.499}{}}
+\newlabel{decorators/follows:decorators-follows-task}{{4.1.17}{239}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{section*.500}{}}
+\newlabel{decorators/follows:decorators-follows-task-name}{{4.1.17}{239}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{section*.501}{}}
+\newlabel{decorators/follows:decorators-follows-directory-name}{{4.1.17}{239}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{section*.502}{}}
+\newlabel{decorators/product:decorators-product}{{4.1.2}{241}{\emph {combine}}{section*.503}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.18}@product}{241}{subsection.4.1.18}}
+\newlabel{decorators/product:product}{{4.1.18}{241}{@product}{subsection.4.1.18}{}}
+\newlabel{decorators/product::doc}{{4.1.18}{241}{@product}{subsection.4.1.18}{}}
+\newlabel{decorators/product:decorators-product-tasks-or-file-names}{{4.1.18}{241}{@product}{section*.504}{}}
+\newlabel{decorators/product:tasks-or-file-names}{{4.1.18}{241}{@product}{section*.505}{}}
+\newlabel{decorators/product:decorators-product-extra-parameters}{{4.1.18}{241}{@product}{section*.506}{}}
+\newlabel{decorators/product:extra-parameters}{{4.1.18}{241}{@product}{section*.507}{}}
+\newlabel{decorators/product:decorators-product-output-pattern}{{4.1.18}{241}{@product}{section*.508}{}}
+\newlabel{decorators/product:output-pattern}{{4.1.18}{241}{@product}{section*.509}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.510}}
+\newlabel{decorators/product:decorators-product-matching-formatter}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.510}{}}
+\newlabel{decorators/product:product-tasks-or-file-names-formatter-matching-formatter-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.510}{}}
+\newlabel{decorators/product:matching-formatter}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.510}{}}
+\newlabel{decorators/product:decorators-product-tasks-or-file-names}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.511}{}}
+\newlabel{decorators/product:decorators-product-matching-formatter}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.512}{}}
+\newlabel{decorators/product:decorators-product-output-pattern}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.513}{}}
+\newlabel{decorators/product:decorators-product-extra-parameters}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.514}{}}
+\newlabel{decorators/permutations:decorators-permutations}{{4.1.18}{241}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.515}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.19}@permutations}{241}{subsection.4.1.19}}
+\newlabel{decorators/permutations:permutations}{{4.1.19}{241}{@permutations}{subsection.4.1.19}{}}
+\newlabel{decorators/permutations::doc}{{4.1.19}{241}{@permutations}{subsection.4.1.19}{}}
+\newlabel{decorators/permutations:decorators-permutations-tasks-or-file-names}{{4.1.19}{241}{@permutations}{section*.516}{}}
+\newlabel{decorators/permutations:tasks-or-file-names}{{4.1.19}{241}{@permutations}{section*.517}{}}
+\newlabel{decorators/permutations:decorators-permutations-extra-parameters}{{4.1.19}{241}{@permutations}{section*.518}{}}
+\newlabel{decorators/permutations:extra-parameters}{{4.1.19}{241}{@permutations}{section*.519}{}}
+\newlabel{decorators/permutations:decorators-permutations-output-pattern}{{4.1.19}{241}{@permutations}{section*.520}{}}
+\newlabel{decorators/permutations:output-pattern}{{4.1.19}{241}{@permutations}{section*.521}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.522}}
+\newlabel{decorators/permutations:matching-formatter}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.522}{}}
+\newlabel{decorators/permutations:decorators-permutations-matching-formatter}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.522}{}}
+\newlabel{decorators/permutations:permutations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.522}{}}
+\newlabel{decorators/permutations:decorators-permutations-tasks-or-file-names}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.523}{}}
+\newlabel{decorators/permutations:decorators-permutations-matching-formatter}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.524}{}}
+\newlabel{decorators/permutations:decorators-permutations-output-pattern}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.525}{}}
+\newlabel{decorators/permutations:decorators-permutations-extra-parameters}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.526}{}}
+\newlabel{decorators/combinations:decorators-combinations}{{4.1.19}{241}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.527}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.20}@combinations}{241}{subsection.4.1.20}}
+\newlabel{decorators/combinations:combinations}{{4.1.20}{241}{@combinations}{subsection.4.1.20}{}}
+\newlabel{decorators/combinations::doc}{{4.1.20}{241}{@combinations}{subsection.4.1.20}{}}
+\newlabel{decorators/combinations:decorators-combinations-tasks-or-file-names}{{4.1.20}{241}{@combinations}{section*.528}{}}
+\newlabel{decorators/combinations:tasks-or-file-names}{{4.1.20}{241}{@combinations}{section*.529}{}}
+\newlabel{decorators/combinations:decorators-combinations-extra-parameters}{{4.1.20}{241}{@combinations}{section*.530}{}}
+\newlabel{decorators/combinations:extra-parameters}{{4.1.20}{241}{@combinations}{section*.531}{}}
+\newlabel{decorators/combinations:decorators-combinations-output-pattern}{{4.1.20}{241}{@combinations}{section*.532}{}}
+\newlabel{decorators/combinations:output-pattern}{{4.1.20}{241}{@combinations}{section*.533}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.534}}
+\newlabel{decorators/combinations:combinations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.534}{}}
+\newlabel{decorators/combinations:matching-formatter}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.534}{}}
+\newlabel{decorators/combinations:decorators-combinations-matching-formatter}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.534}{}}
+\newlabel{decorators/combinations:decorators-combinations-tasks-or-file-names}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.535}{}}
+\newlabel{decorators/combinations:decorators-combinations-matching-formatter}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.536}{}}
+\newlabel{decorators/combinations:decorators-combinations-output-pattern}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.537}{}}
+\newlabel{decorators/combinations:decorators-combinations-extra-parameters}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.538}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement}{{4.1.20}{241}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.539}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.21}@combinations\_with\_replacement}{241}{subsection.4.1.21}}
+\newlabel{decorators/combinations_with_replacement::doc}{{4.1.21}{241}{@combinations\_with\_replacement}{subsection.4.1.21}{}}
+\newlabel{decorators/combinations_with_replacement:combinations-with-replacement}{{4.1.21}{241}{@combinations\_with\_replacement}{subsection.4.1.21}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-tasks-or-file-names}{{4.1.21}{241}{@combinations\_with\_replacement}{section*.540}{}}
+\newlabel{decorators/combinations_with_replacement:tasks-or-file-names}{{4.1.21}{241}{@combinations\_with\_replacement}{section*.541}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-extra-parameters}{{4.1.21}{241}{@combinations\_with\_replacement}{section*.542}{}}
+\newlabel{decorators/combinations_with_replacement:extra-parameters}{{4.1.21}{241}{@combinations\_with\_replacement}{section*.543}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-output-pattern}{{4.1.21}{241}{@combinations\_with\_replacement}{section*.544}{}}
+\newlabel{decorators/combinations_with_replacement:output-pattern}{{4.1.21}{241}{@combinations\_with\_replacement}{section*.545}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.546}}
+\newlabel{decorators/combinations_with_replacement:combinations-with-replacement-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.546}{}}
+\newlabel{decorators/combinations_with_replacement:matching-formatter}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.546}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-matching-formatter}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{subsubsection*.546}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-tasks-or-file-names}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.547}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-matching-formatter}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.548}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-output-pattern}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.549}{}}
+\newlabel{decorators/combinations_with_replacement:decorators-combinations-with-replacement-extra-parameters}{{4.1.21}{241}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{section*.550}{}}
+\newlabel{decorators/files_ex:decorators-files-on-the-fly}{{4.1.2}{243}{\emph {combine}}{section*.551}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.22}Generating parameters on the fly for @files}{243}{subsection.4.1.22}}
+\newlabel{decorators/files_ex:decorators-files-custom-function}{{4.1.22}{243}{Generating parameters on the fly for @files}{subsection.4.1.22}{}}
+\newlabel{decorators/files_ex::doc}{{4.1.22}{243}{Generating parameters on the fly for @files}{subsection.4.1.22}{}}
+\newlabel{decorators/files_ex:custom-function}{{4.1.22}{243}{Generating parameters on the fly for @files}{subsection.4.1.22}{}}
+\newlabel{decorators/files_ex:generating-parameters-on-the-fly-for-files}{{4.1.22}{243}{Generating parameters on the fly for @files}{subsection.4.1.22}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@files} (\emph {custom\_function})}{243}{subsubsection*.552}}
+\newlabel{decorators/files_ex:files-custom-function}{{4.1.22}{243}{\emph {@files} (\emph {custom\_function})}{subsubsection*.552}{}}
+\newlabel{decorators/files_ex:decorators-files-custom-function}{{4.1.22}{243}{\emph {@files} (\emph {custom\_function})}{section*.553}{}}
+\newlabel{decorators/check_if_uptodate:decorators-check-if-uptodate}{{4.1.22}{243}{\emph {@files} (\emph {custom\_function})}{section*.554}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.23}@check\_if\_uptodate}{243}{subsection.4.1.23}}
+\newlabel{decorators/check_if_uptodate:decorators-check-if-uptodate-dependency-checking-function}{{4.1.23}{243}{@check\_if\_uptodate}{subsection.4.1.23}{}}
+\newlabel{decorators/check_if_uptodate:check-if-uptodate}{{4.1.23}{243}{@check\_if\_uptodate}{subsection.4.1.23}{}}
+\newlabel{decorators/check_if_uptodate::doc}{{4.1.23}{243}{@check\_if\_uptodate}{subsection.4.1.23}{}}
+\newlabel{decorators/check_if_uptodate:dependency-checking-function}{{4.1.23}{243}{@check\_if\_uptodate}{subsection.4.1.23}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@check\_if\_uptodate} (\emph {dependency\_checking\_function})}{243}{subsubsection*.555}}
+\newlabel{decorators/check_if_uptodate:check-if-uptodate-dependency-checking-function}{{4.1.23}{243}{\emph {@check\_if\_uptodate} (\emph {dependency\_checking\_function})}{subsubsection*.555}{}}
+\newlabel{decorators/check_if_uptodate:decorators-check-if-uptodate-dependency-checking-function}{{4.1.23}{243}{\emph {@check\_if\_uptodate} (\emph {dependency\_checking\_function})}{section*.556}{}}
+\newlabel{decorators/parallel:decorators-parallel}{{4.1.23}{243}{\emph {@check\_if\_uptodate} (\emph {dependency\_checking\_function})}{section*.557}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.24}@parallel}{243}{subsection.4.1.24}}
+\newlabel{decorators/parallel::doc}{{4.1.24}{243}{@parallel}{subsection.4.1.24}{}}
+\newlabel{decorators/parallel:parallel}{{4.1.24}{243}{@parallel}{subsection.4.1.24}{}}
+\newlabel{decorators/parallel:decorators-parallel-job-params}{{4.1.24}{243}{@parallel}{section*.558}{}}
+\newlabel{decorators/parallel:job-params}{{4.1.24}{243}{@parallel}{section*.559}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{243}{subsubsection*.560}}
+\newlabel{decorators/parallel:decorators-parallel-parameter-generating-function}{{4.1.24}{243}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{subsubsection*.560}{}}
+\newlabel{decorators/parallel:parallel-job-params-job-params-parameter-generating-function}{{4.1.24}{243}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{subsubsection*.560}{}}
+\newlabel{decorators/parallel:parameter-generating-function}{{4.1.24}{243}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{subsubsection*.560}{}}
+\newlabel{decorators/parallel:decorators-parallel-job-params}{{4.1.24}{243}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{section*.561}{}}
+\newlabel{decorators/parallel:decorators-parallel-parameter-generating-function}{{4.1.24}{243}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{section*.562}{}}
+\newlabel{decorators/files:decorators-files}{{4.1.2}{245}{\emph {combine}}{section*.563}{}}
+\newlabel{decorators/files:decorators-files-input}{{4.1.2}{245}{\emph {combine}}{section*.564}{}}
+\newlabel{decorators/files:input}{{4.1.2}{245}{\emph {combine}}{section*.565}{}}
+\newlabel{decorators/files:decorators-files-input1}{{4.1.2}{245}{\emph {combine}}{section*.566}{}}
+\newlabel{decorators/files:input1}{{4.1.2}{245}{\emph {combine}}{section*.567}{}}
+\newlabel{decorators/files:decorators-files-output}{{4.1.2}{245}{\emph {combine}}{section*.568}{}}
+\newlabel{decorators/files:output}{{4.1.2}{245}{\emph {combine}}{section*.569}{}}
+\newlabel{decorators/files:decorators-files-output1}{{4.1.2}{245}{\emph {combine}}{section*.570}{}}
+\newlabel{decorators/files:output1}{{4.1.2}{245}{\emph {combine}}{section*.571}{}}
+\newlabel{decorators/files:decorators-files-extra-parameters}{{4.1.2}{245}{\emph {combine}}{section*.572}{}}
+\newlabel{decorators/files:extra-parameters}{{4.1.2}{245}{\emph {combine}}{section*.573}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.25}@files}{245}{subsection.4.1.25}}
+\newlabel{decorators/files:files}{{4.1.25}{245}{@files}{subsection.4.1.25}{}}
+\newlabel{decorators/files:decorators-files-extra-parameters1}{{4.1.25}{245}{@files}{subsection.4.1.25}{}}
+\newlabel{decorators/files::doc}{{4.1.25}{245}{@files}{subsection.4.1.25}{}}
+\newlabel{decorators/files:extra-parameters1}{{4.1.25}{245}{@files}{subsection.4.1.25}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@files} (\emph {input1}, \emph {output1}, {[}\emph {extra\_parameters1}, ...{]})}{245}{subsubsection*.574}}
+\newlabel{decorators/files:files-input1-output1-extra-parameters1}{{4.1.25}{245}{\emph {@files} (\emph {input1}, \emph {output1}, {[}\emph {extra\_parameters1}, ...{]})}{subsubsection*.574}{}}
+\@writefile{toc}{\contentsline {paragraph}{@files for single jobs}{245}{paragraph*.575}}
+\newlabel{decorators/files:files-for-single-jobs}{{4.1.25}{245}{@files for single jobs}{paragraph*.575}{}}
+\newlabel{decorators/files:decorators-files-input1}{{4.1.25}{245}{@files for single jobs}{section*.576}{}}
+\newlabel{decorators/files:decorators-files-output1}{{4.1.25}{245}{@files for single jobs}{section*.577}{}}
+\newlabel{decorators/files:decorators-files-extra-parameters1}{{4.1.25}{245}{@files for single jobs}{section*.578}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@files} ( \emph {((} \emph {input}, \emph {output}, {[}\emph {extra\_parameters},...{]} \emph {), (...), ...)} )}{245}{subsubsection*.579}}
+\newlabel{decorators/files:files-input-output-extra-parameters}{{4.1.25}{245}{\emph {@files} ( \emph {((} \emph {input}, \emph {output}, {[}\emph {extra\_parameters},...{]} \emph {), (...), ...)} )}{subsubsection*.579}{}}
+\@writefile{toc}{\contentsline {paragraph}{@files in parallel}{245}{paragraph*.580}}
+\newlabel{decorators/files:files-in-parallel}{{4.1.25}{245}{@files in parallel}{paragraph*.580}{}}
+\newlabel{decorators/files:decorators-files-input}{{4.1.25}{245}{@files in parallel}{section*.581}{}}
+\newlabel{decorators/files:decorators-files-output}{{4.1.25}{245}{@files in parallel}{section*.582}{}}
+\newlabel{decorators/files:decorators-files-extra-parameters}{{4.1.25}{245}{@files in parallel}{section*.583}{}}
+\newlabel{decorators/files:decorators-files-check-up-to-date}{{4.1.25}{245}{@files in parallel}{section*.584}{}}
+\newlabel{decorators/files_re:decorators-files-re}{{4.1.25}{245}{@files in parallel}{section*.585}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1.26}@files\_re}{245}{subsection.4.1.26}}
+\newlabel{decorators/files_re:files-re}{{4.1.26}{245}{@files\_re}{subsection.4.1.26}{}}
+\newlabel{decorators/files_re::doc}{{4.1.26}{245}{@files\_re}{subsection.4.1.26}{}}
+\newlabel{decorators/files_re:decorators-files-re-tasks-or-file-names}{{4.1.26}{245}{@files\_re}{section*.586}{}}
+\newlabel{decorators/files_re:tasks-or-file-names}{{4.1.26}{245}{@files\_re}{section*.587}{}}
+\newlabel{decorators/files_re:decorators-files-re-extra-parameters}{{4.1.26}{245}{@files\_re}{section*.588}{}}
+\newlabel{decorators/files_re:extra-parameters}{{4.1.26}{245}{@files\_re}{section*.589}{}}
+\newlabel{decorators/files_re:decorators-files-re-output-pattern}{{4.1.26}{245}{@files\_re}{section*.590}{}}
+\newlabel{decorators/files_re:output-pattern}{{4.1.26}{245}{@files\_re}{section*.591}{}}
+\newlabel{decorators/files_re:decorators-files-re-input-pattern}{{4.1.26}{245}{@files\_re}{section*.592}{}}
+\newlabel{decorators/files_re:input-pattern}{{4.1.26}{245}{@files\_re}{section*.593}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\emph {@files\_re} (\emph {tasks\_or\_file\_names}, \emph {matching\_regex}, {[}\emph {input\_pattern}{]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]})}{245}{subsubsection*.594}}
+\newlabel{decorators/files_re:files-re-tasks-or-file-names-matching-regex-input-pattern-output-pattern-extra-parameters}{{4.1.26}{245}{\emph {@files\_re} (\emph {tasks\_or\_file\_names}, \emph {matching\_regex}, {[}\emph {input\_pattern}{]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]})}{subsubsection*.594}{}}
+\newlabel{decorators/files_re:matching-regex}{{4.1.26}{245}{\emph {@files\_re} (\emph {tasks\_or\_file\_names}, \emph {matching\_regex}, {[}\emph {input\_pattern}{]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]})}{subsubsection*.594}{}}
+\newlabel{decorators/files_re:decorators-files-re-matching-regex}{{4.1.26}{245}{\emph {@files\_re} (\emph {tasks\_or\_file\_names}, \emph {matching\_regex}, {[}\emph {input\_pattern}{]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]})}{subsubsection*.594}{}}
+\@writefile{toc}{\contentsline {paragraph}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{245}{paragraph*.595}}
+\newlabel{decorators/files_re:legacy-design-now-deprecated-we-suggest-using-transform-instead}{{4.1.26}{245}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{paragraph*.595}{}}
+\newlabel{decorators/files_re:decorators-files-re-tasks-or-file-names}{{4.1.26}{245}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{section*.596}{}}
+\newlabel{decorators/files_re:decorators-files-re-matching-regex}{{4.1.26}{245}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{section*.597}{}}
+\newlabel{decorators/files_re:decorators-files-re-input-pattern}{{4.1.26}{245}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{section*.598}{}}
+\newlabel{decorators/files_re:decorators-files-re-output-pattern}{{4.1.26}{245}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{section*.599}{}}
+\newlabel{decorators/files_re:decorators-files-re-extra-parameters}{{4.1.26}{245}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{section*.600}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.2}Modules:}{246}{section.4.2}}
+\newlabel{contents:modules}{{4.2}{246}{Modules:}{section.4.2}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.1}ruffus.Task}{246}{subsection.4.2.1}}
+\newlabel{task:ruffus-task}{{4.2.1}{246}{ruffus.Task}{subsection.4.2.1}{}}
+\newlabel{task:glob}{{4.2.1}{246}{ruffus.Task}{subsection.4.2.1}{}}
+\newlabel{task::doc}{{4.2.1}{246}{ruffus.Task}{subsection.4.2.1}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Decorators}{246}{subsubsection*.601}}
+\newlabel{task:decorators}{{4.2.1}{246}{Decorators}{subsubsection*.601}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Pipeline functions}{246}{subsubsection*.602}}
+\newlabel{task:pipeline-functions}{{4.2.1}{246}{Pipeline functions}{subsubsection*.602}{}}
+\@writefile{toc}{\contentsline {paragraph}{pipeline\_run}{246}{paragraph*.603}}
+\newlabel{task:pipeline-run}{{4.2.1}{246}{pipeline\_run}{paragraph*.603}{}}
+\newlabel{task:ruffus.task.pipeline_run}{{4.2.1}{246}{pipeline\_run}{section*.604}{}}
+\@writefile{toc}{\contentsline {paragraph}{pipeline\_printout}{247}{paragraph*.605}}
+\newlabel{task:pipeline-printout}{{4.2.1}{247}{pipeline\_printout}{paragraph*.605}{}}
+\newlabel{task:ruffus.task.pipeline_printout}{{4.2.1}{247}{pipeline\_printout}{section*.606}{}}
+\@writefile{toc}{\contentsline {paragraph}{pipeline\_printout\_graph}{248}{paragraph*.607}}
+\newlabel{task:pipeline-printout-graph}{{4.2.1}{248}{pipeline\_printout\_graph}{paragraph*.607}{}}
+\newlabel{task:ruffus.task.pipeline_printout_graph}{{4.2.1}{248}{pipeline\_printout\_graph}{section*.608}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Logging}{249}{subsubsection*.609}}
+\newlabel{task:id1}{{4.2.1}{249}{Logging}{subsubsection*.609}{}}
+\newlabel{task:ruffus.task.t_black_hole_logger}{{4.2.1}{249}{Logging}{section*.610}{}}
+\newlabel{task:ruffus.task.t_stderr_logger}{{4.2.1}{249}{Logging}{section*.611}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Implementation:}{249}{subsubsection*.612}}
+\newlabel{task:implementation}{{4.2.1}{249}{Implementation:}{subsubsection*.612}{}}
+\@writefile{toc}{\contentsline {paragraph}{Parameter factories:}{249}{paragraph*.613}}
+\newlabel{task:parameter-factories}{{4.2.1}{249}{Parameter factories:}{paragraph*.613}{}}
+\newlabel{task:ruffus.task.merge_param_factory}{{4.2.1}{249}{Parameter factories:}{section*.614}{}}
+\newlabel{task:ruffus.task.collate_param_factory}{{4.2.1}{249}{Parameter factories:}{section*.615}{}}
+\newlabel{task:ruffus.task.transform_param_factory}{{4.2.1}{249}{Parameter factories:}{section*.616}{}}
+\newlabel{task:ruffus.task.files_param_factory}{{4.2.1}{250}{Parameter factories:}{section*.617}{}}
+\newlabel{task:ruffus.task.args_param_factory}{{4.2.1}{250}{Parameter factories:}{section*.618}{}}
+\newlabel{task:ruffus.task.split_param_factory}{{4.2.1}{250}{Parameter factories:}{section*.619}{}}
+\@writefile{toc}{\contentsline {paragraph}{Wrappers around jobs:}{250}{paragraph*.620}}
+\newlabel{task:wrappers-around-jobs}{{4.2.1}{250}{Wrappers around jobs:}{paragraph*.620}{}}
+\newlabel{task:ruffus.task.job_wrapper_generic}{{4.2.1}{250}{Wrappers around jobs:}{section*.621}{}}
+\newlabel{task:ruffus.task.job_wrapper_io_files}{{4.2.1}{250}{Wrappers around jobs:}{section*.622}{}}
+\newlabel{task:ruffus.task.job_wrapper_mkdir}{{4.2.1}{250}{Wrappers around jobs:}{section*.623}{}}
+\@writefile{toc}{\contentsline {paragraph}{Checking if job is update:}{250}{paragraph*.624}}
+\newlabel{task:checking-if-job-is-update}{{4.2.1}{250}{Checking if job is update:}{paragraph*.624}{}}
+\newlabel{task:ruffus.task.needs_update_check_modify_time}{{4.2.1}{250}{Checking if job is update:}{section*.625}{}}
+\newlabel{task:ruffus.task.needs_update_check_directory_missing}{{4.2.1}{250}{Checking if job is update:}{section*.626}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Exceptions and Errors}{251}{subsubsection*.627}}
+\newlabel{task:exceptions-and-errors}{{4.2.1}{251}{Exceptions and Errors}{subsubsection*.627}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.2}ruffus.proxy\_logger}{251}{subsection.4.2.2}}
+\newlabel{proxy_logger:glob}{{4.2.2}{251}{ruffus.proxy\_logger}{subsection.4.2.2}{}}
+\newlabel{proxy_logger::doc}{{4.2.2}{251}{ruffus.proxy\_logger}{subsection.4.2.2}{}}
+\newlabel{proxy_logger:ruffus-proxy-logger}{{4.2.2}{251}{ruffus.proxy\_logger}{subsection.4.2.2}{}}
+\newlabel{proxy_logger:proxy-logger}{{4.2.2}{251}{ruffus.proxy\_logger}{section*.628}{}}
+\newlabel{proxy_logger:module-ruffus.proxy_logger}{{4.2.2}{251}{ruffus.proxy\_logger}{section*.629}{}}
+\newlabel{proxy_logger:proxy-logger}{{4.2.2}{251}{ruffus.proxy\_logger}{section*.630}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Create proxy for logging for use with multiprocessing}{251}{subsubsection*.631}}
+\newlabel{proxy_logger:create-proxy-for-logging-for-use-with-multiprocessing}{{4.2.2}{251}{Create proxy for logging for use with multiprocessing}{subsubsection*.631}{}}
+\@writefile{toc}{\contentsline {paragraph}{Example 1}{251}{paragraph*.632}}
+\newlabel{proxy_logger:example-1}{{4.2.2}{251}{Example 1}{paragraph*.632}{}}
+\@writefile{toc}{\contentsline {paragraph}{Example 2}{251}{paragraph*.633}}
+\newlabel{proxy_logger:example-2}{{4.2.2}{251}{Example 2}{paragraph*.633}{}}
+\@writefile{toc}{\contentsline {paragraph}{Example 3}{251}{paragraph*.634}}
+\newlabel{proxy_logger:example-3}{{4.2.2}{251}{Example 3}{paragraph*.634}{}}
+\@writefile{toc}{\contentsline {paragraph}{To use:}{252}{paragraph*.635}}
+\newlabel{proxy_logger:to-use}{{4.2.2}{252}{To use:}{paragraph*.635}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Proxies for a log:}{252}{subsubsection*.636}}
+\newlabel{proxy_logger:proxies-for-a-log}{{4.2.2}{252}{Proxies for a log:}{subsubsection*.636}{}}
+\newlabel{proxy_logger:ruffus.proxy_logger.make_shared_logger_and_proxy}{{4.2.2}{252}{Proxies for a log:}{section*.637}{}}
+\@writefile{toc}{\contentsline {subsubsection}{Create a logging object}{252}{subsubsection*.638}}
+\newlabel{proxy_logger:create-a-logging-object}{{4.2.2}{252}{Create a logging object}{subsubsection*.638}{}}
+\newlabel{proxy_logger:ruffus.proxy_logger.setup_std_shared_logger}{{4.2.2}{252}{Create a logging object}{section*.639}{}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {5}Indices and tables}{255}{chapter.5}}
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\newlabel{contents:indices-and-tables}{{5}{255}{Indices and tables}{chapter.5}{}}
+\@writefile{toc}{\contentsline {chapter}{Python Module Index}{257}{section*.640}}
diff --git a/doc/_build/latex/ruffus.idx b/doc/_build/latex/ruffus.idx
new file mode 100644
index 0000000..25b519b
--- /dev/null
+++ b/doc/_build/latex/ruffus.idx
@@ -0,0 +1,270 @@
+\indexentry{overview!Tutorial|hyperpage}{3}
+\indexentry{Tutorial!overview|hyperpage}{3}
+\indexentry{importing ruffus|hyperpage}{4}
+\indexentry{pipeline\_run!Tutorial|hyperpage}{8}
+\indexentry{Tutorial!pipeline\_run|hyperpage}{8}
+\indexentry{transform!Tutorial|hyperpage}{8}
+\indexentry{Tutorial!transform|hyperpage}{8}
+\indexentry{one to one @transform!Tutorial|hyperpage}{9}
+\indexentry{Tutorial!one to one @transform|hyperpage}{9}
+\indexentry{input / output parameters!Tutorial|hyperpage}{10}
+\indexentry{Tutorial!input / output parameters|hyperpage}{10}
+\indexentry{transforming in parallel!Tutorial|hyperpage}{11}
+\indexentry{Tutorial!transforming in parallel|hyperpage}{11}
+\indexentry{output\_from!referring to functions before they are defined|hyperpage}{13}
+\indexentry{referring to functions before they are defined!output\_from|hyperpage}{13}
+\indexentry{output\_from!defining tasks out of order|hyperpage}{13}
+\indexentry{defining tasks out of order!output\_from|hyperpage}{13}
+\indexentry{@transform!multiple dependencies|hyperpage}{14}
+\indexentry{multiple dependencies!@transform|hyperpage}{14}
+\indexentry{@follow!imposing order with|hyperpage}{14}
+\indexentry{imposing order with!@follow|hyperpage}{14}
+\indexentry{@follows!mkdir (Manual)|hyperpage}{15}
+\indexentry{mkdir!@follows (Manual)|hyperpage}{15}
+\indexentry{inputs parameters!globs|hyperpage}{15}
+\indexentry{globs!inputs parameters|hyperpage}{15}
+\indexentry{globs in input parameters!Tutorial|hyperpage}{15}
+\indexentry{Tutorial!globs in input parameters|hyperpage}{15}
+\indexentry{Mixing tasks, globs and file names!Tutorial|hyperpage}{15}
+\indexentry{Tutorial!Mixing tasks, globs and file names|hyperpage}{15}
+\indexentry{originate!Tutorial|hyperpage}{16}
+\indexentry{Tutorial!originate|hyperpage}{16}
+\indexentry{pipeline\_printout!Tutorial|hyperpage}{17}
+\indexentry{Tutorial!pipeline\_printout|hyperpage}{17}
+\indexentry{command line!Tutorial|hyperpage}{20}
+\indexentry{Tutorial!command line|hyperpage}{20}
+\indexentry{pipeline\_printout\_graph!Tutorial|hyperpage}{25}
+\indexentry{Tutorial!pipeline\_printout\_graph|hyperpage}{25}
+\indexentry{formatter!Tutorial|hyperpage}{29}
+\indexentry{Tutorial!formatter|hyperpage}{29}
+\indexentry{suffix!Tutorial|hyperpage}{29}
+\indexentry{Tutorial!suffix|hyperpage}{29}
+\indexentry{regex!Tutorial|hyperpage}{29}
+\indexentry{Tutorial!regex|hyperpage}{29}
+\indexentry{output file names!Tutorial|hyperpage}{29}
+\indexentry{Tutorial!output file names|hyperpage}{29}
+\indexentry{mkdir!Tutorial|hyperpage}{37}
+\indexentry{Tutorial!mkdir|hyperpage}{37}
+\indexentry{Up to date!Tutorial|hyperpage}{39}
+\indexentry{Tutorial!Up to date|hyperpage}{39}
+\indexentry{Task completion!Tutorial|hyperpage}{39}
+\indexentry{Tutorial!Task completion|hyperpage}{39}
+\indexentry{Exceptions!Tutorial|hyperpage}{39}
+\indexentry{Tutorial!Exceptions|hyperpage}{39}
+\indexentry{Interrupted Pipeline!Tutorial|hyperpage}{39}
+\indexentry{Tutorial!Interrupted Pipeline|hyperpage}{39}
+\indexentry{Tutorial!interrupting tasks|hyperpage}{40}
+\indexentry{interrupting tasks!Tutorial|hyperpage}{40}
+\indexentry{Tutorial!Regenerating the checkpoint file|hyperpage}{43}
+\indexentry{Regenerating the checkpoint file!Tutorial|hyperpage}{43}
+\indexentry{rules!for rerunning jobs|hyperpage}{43}
+\indexentry{for rerunning jobs!rules|hyperpage}{43}
+\indexentry{Exception!Missing input files|hyperpage}{43}
+\indexentry{Missing input files!Exception|hyperpage}{43}
+\indexentry{Manual!Timestamp resolution|hyperpage}{44}
+\indexentry{Timestamp resolution!Manual|hyperpage}{44}
+\indexentry{Manual!flag files|hyperpage}{44}
+\indexentry{flag files!Manual|hyperpage}{44}
+\indexentry{decorators\_compendium!Tutorial|hyperpage}{44}
+\indexentry{Tutorial!decorators\_compendium|hyperpage}{44}
+\indexentry{split!Tutorial|hyperpage}{47}
+\indexentry{Tutorial!split|hyperpage}{47}
+\indexentry{merge!Tutorial|hyperpage}{51}
+\indexentry{Tutorial!merge|hyperpage}{51}
+\indexentry{multiprocessing!Tutorial|hyperpage}{52}
+\indexentry{Tutorial!multiprocessing|hyperpage}{52}
+\indexentry{pipeline\_run(multiprocess)!Tutorial|hyperpage}{53}
+\indexentry{Tutorial!pipeline\_run(multiprocess)|hyperpage}{53}
+\indexentry{data sharing across processes!Tutorial|hyperpage}{53}
+\indexentry{Tutorial!data sharing across processes|hyperpage}{53}
+\indexentry{@jobs\_limit!Tutorial|hyperpage}{53}
+\indexentry{Tutorial!@jobs\_limit|hyperpage}{53}
+\indexentry{pipeline\_run touch mode!Tutorial|hyperpage}{55}
+\indexentry{Tutorial!pipeline\_run touch mode|hyperpage}{55}
+\indexentry{touch mode pipeline\_run!Tutorial|hyperpage}{55}
+\indexentry{Tutorial!touch mode pipeline\_run|hyperpage}{55}
+\indexentry{logging!Tutorial|hyperpage}{56}
+\indexentry{Tutorial!logging|hyperpage}{56}
+\indexentry{pipeline\_run verbosity!Tutorial|hyperpage}{57}
+\indexentry{Tutorial!pipeline\_run verbosity|hyperpage}{57}
+\indexentry{logging with ruffus.cmdline!Tutorial|hyperpage}{57}
+\indexentry{Tutorial!logging with ruffus.cmdline|hyperpage}{57}
+\indexentry{logging customising!Tutorial|hyperpage}{57}
+\indexentry{Tutorial!logging customising|hyperpage}{57}
+\indexentry{logging your own message!Tutorial|hyperpage}{58}
+\indexentry{Tutorial!logging your own message|hyperpage}{58}
+\indexentry{@subdivide!Tutorial|hyperpage}{59}
+\indexentry{Tutorial!@subdivide|hyperpage}{59}
+\indexentry{@collate!Tutorial|hyperpage}{59}
+\indexentry{Tutorial!@collate|hyperpage}{59}
+\indexentry{combinatorics!Tutorial|hyperpage}{62}
+\indexentry{Tutorial!combinatorics|hyperpage}{62}
+\indexentry{@active\_if!Tutorial|hyperpage}{68}
+\indexentry{Tutorial!@active\_if|hyperpage}{68}
+\indexentry{posttask!Tutorial|hyperpage}{71}
+\indexentry{Tutorial!posttask|hyperpage}{71}
+\indexentry{@posttask!touchfile (Manual)|hyperpage}{71}
+\indexentry{touchfile !@posttask (Manual)|hyperpage}{71}
+\indexentry{inputs!Tutorial|hyperpage}{72}
+\indexentry{Tutorial!inputs|hyperpage}{72}
+\indexentry{add\_inputs!Tutorial|hyperpage}{72}
+\indexentry{Tutorial!add\_inputs|hyperpage}{72}
+\indexentry{string substiution for inputs!Tutorial|hyperpage}{72}
+\indexentry{Tutorial!string substiution for inputs|hyperpage}{72}
+\indexentry{on\_the\_fly!Tutorial|hyperpage}{75}
+\indexentry{Tutorial!on\_the\_fly|hyperpage}{75}
+\indexentry{@files!Tutorial on-the-fly parameter generation|hyperpage}{76}
+\indexentry{Tutorial on-the-fly parameter generation!@files|hyperpage}{76}
+\indexentry{@parallel!Tutorial|hyperpage}{78}
+\indexentry{Tutorial!@parallel|hyperpage}{78}
+\indexentry{check\_if\_uptodate!Tutorial|hyperpage}{79}
+\indexentry{Tutorial!check\_if\_uptodate|hyperpage}{79}
+\indexentry{flowchart colours!Tutorial|hyperpage}{80}
+\indexentry{Tutorial!flowchart colours|hyperpage}{80}
+\indexentry{Checking dependencies!Tutorial|hyperpage}{81}
+\indexentry{Tutorial!Checking dependencies|hyperpage}{81}
+\indexentry{exceptions!Tutorial|hyperpage}{82}
+\indexentry{Tutorial!exceptions|hyperpage}{82}
+\indexentry{signalling|hyperpage}{84}
+\indexentry{interrupts|hyperpage}{84}
+\indexentry{break|hyperpage}{84}
+\indexentry{errors|hyperpage}{84}
+\indexentry{exceptions|hyperpage}{84}
+\indexentry{multiple errors|hyperpage}{84}
+\indexentry{Ruffus names list!Tutorial|hyperpage}{85}
+\indexentry{Tutorial!Ruffus names list|hyperpage}{85}
+\indexentry{deprecated @files!Tutorial|hyperpage}{87}
+\indexentry{Tutorial!deprecated @files|hyperpage}{87}
+\indexentry{@files!Manual|hyperpage}{87}
+\indexentry{Manual!@files|hyperpage}{87}
+\indexentry{@files!in parallel|hyperpage}{88}
+\indexentry{in parallel!@files|hyperpage}{88}
+\indexentry{@files!check if up to date|hyperpage}{89}
+\indexentry{check if up to date!@files|hyperpage}{89}
+\indexentry{deprecated @files\_re!Tutorial|hyperpage}{90}
+\indexentry{Tutorial!deprecated @files\_re|hyperpage}{90}
+\indexentry{combine!Manual|hyperpage}{90}
+\indexentry{Manual!combine|hyperpage}{90}
+\indexentry{flowchart colours!Tutorial|hyperpage}{136}
+\indexentry{Tutorial!flowchart colours|hyperpage}{136}
+\indexentry{pipeline functions!pipeline\_run|hyperpage}{145}
+\indexentry{pipeline\_run!Run pipeline|hyperpage}{145}
+\indexentry{Run pipeline!pipeline\_run|hyperpage}{145}
+\indexentry{pipeline functions!pipeline\_run|hyperpage}{147}
+\indexentry{pipeline\_printout!Printout simulated run of the pipeline|hyperpage}{147}
+\indexentry{Printout simulated run of the pipeline!pipeline\_printout|hyperpage}{147}
+\indexentry{pipeline functions!pipeline\_printout\_graph|hyperpage}{148}
+\indexentry{pipeline\_printout\_graph!print flowchart representation of pipeline functions|hyperpage}{148}
+\indexentry{print flowchart representation of pipeline functions!pipeline\_printout\_graph|hyperpage}{148}
+\indexentry{pipeline functions!pipeline\_get\_task\_names|hyperpage}{151}
+\indexentry{pipeline\_get\_task\_names!print list of task names without running the pipeline|hyperpage}{151}
+\indexentry{print list of task names without running the pipeline!pipeline\_get\_task\_names|hyperpage}{151}
+\indexentry{drmaa !run\_job|hyperpage}{151}
+\indexentry{run\_job!Run drmaa|hyperpage}{151}
+\indexentry{Run drmaa!run\_job|hyperpage}{151}
+\indexentry{Design!Ruffus|hyperpage}{155}
+\indexentry{Ruffus!Design|hyperpage}{155}
+\indexentry{Design!Comparison of Ruffus with alternatives|hyperpage}{159}
+\indexentry{Comparison of Ruffus with alternatives!Design|hyperpage}{159}
+\indexentry{Acknowledgements|hyperpage}{159}
+\indexentry{task|hyperindexformat{\textbf}}{200}
+\indexentry{job|hyperindexformat{\textbf}}{200}
+\indexentry{decorator|hyperindexformat{\textbf}}{201}
+\indexentry{generator|hyperindexformat{\textbf}}{201}
+\indexentry{Ruffus!Etymology|hyperpage}{204}
+\indexentry{Etymology!Ruffus|hyperpage}{204}
+\indexentry{Ruffus!Name origins|hyperpage}{204}
+\indexentry{Name origins!Ruffus|hyperpage}{204}
+\indexentry{Indicator Object (Disambiguating parameters)|hyperpage}{228}
+\indexentry{formatter!Indicator Object (Disambiguating parameters)|hyperpage}{228}
+\indexentry{Indicator Object (Disambiguating parameters)!formatter|hyperpage}{228}
+\indexentry{suffix!Indicator Object (Disambiguating parameters)|hyperpage}{231}
+\indexentry{Indicator Object (Disambiguating parameters)!suffix|hyperpage}{231}
+\indexentry{regex!Indicator Object (Disambiguating parameters)|hyperpage}{231}
+\indexentry{Indicator Object (Disambiguating parameters)!regex|hyperpage}{231}
+\indexentry{add\_inputs!Indicator Object (Adding additional input parameters)|hyperpage}{232}
+\indexentry{Indicator Object (Adding additional input parameters)!add\_inputs|hyperpage}{232}
+\indexentry{inputs!Indicator Object (Replacing input parameters)|hyperpage}{233}
+\indexentry{Indicator Object (Replacing input parameters)!inputs|hyperpage}{233}
+\indexentry{@follows!mkdir (Syntax)|hyperpage}{234}
+\indexentry{mkdir!@follows (Syntax)|hyperpage}{234}
+\indexentry{Indicator Object (Disambiguating parameters)!mkdir|hyperpage}{234}
+\indexentry{@posttask!touch\_file (Syntax)|hyperpage}{234}
+\indexentry{touch\_file!@posttask (Syntax)|hyperpage}{234}
+\indexentry{Indicator Object (Disambiguating parameters)!touch\_file|hyperpage}{234}
+\indexentry{output\_from!Indicator Object (Disambiguating parameters)|hyperpage}{234}
+\indexentry{Indicator Object (Disambiguating parameters)!output\_from|hyperpage}{234}
+\indexentry{@files\_re!combine (Deprecated Syntax)|hyperpage}{235}
+\indexentry{combine!@follows (Deprecated Syntax)|hyperpage}{235}
+\indexentry{Indicator Object (Disambiguating parameters)!combine|hyperpage}{235}
+\indexentry{@originate!Syntax|hyperpage}{237}
+\indexentry{Syntax!@originate|hyperpage}{237}
+\indexentry{@split!Syntax|hyperpage}{237}
+\indexentry{Syntax!@split|hyperpage}{237}
+\indexentry{@transform!Syntax|hyperpage}{237}
+\indexentry{Syntax!@transform|hyperpage}{237}
+\indexentry{@merge!Syntax|hyperpage}{237}
+\indexentry{Syntax!@merge|hyperpage}{237}
+\indexentry{@subdivide!Syntax|hyperpage}{239}
+\indexentry{Syntax!@subdivide|hyperpage}{239}
+\indexentry{@transform, inputs(...)!Syntax|hyperpage}{239}
+\indexentry{Syntax!@transform, inputs(...)|hyperpage}{239}
+\indexentry{@transform, add\_inputs(...)!Syntax|hyperpage}{239}
+\indexentry{Syntax!@transform, add\_inputs(...)|hyperpage}{239}
+\indexentry{@collate!Syntax|hyperpage}{239}
+\indexentry{Syntax!@collate|hyperpage}{239}
+\indexentry{@collate (Advanced Usage)!Syntax|hyperpage}{239}
+\indexentry{Syntax!@collate (Advanced Usage)|hyperpage}{239}
+\indexentry{@collate, inputs(...)!Syntax|hyperpage}{239}
+\indexentry{Syntax!@collate, inputs(...)|hyperpage}{239}
+\indexentry{@collate, add\_inputs(...)!Syntax|hyperpage}{239}
+\indexentry{Syntax!@collate, add\_inputs(...)|hyperpage}{239}
+\indexentry{@graphviz!Syntax|hyperpage}{239}
+\indexentry{Syntax!@graphviz|hyperpage}{239}
+\indexentry{@mkdir!Syntax|hyperpage}{239}
+\indexentry{Syntax!@mkdir|hyperpage}{239}
+\indexentry{@jobs\_limit!Syntax|hyperpage}{239}
+\indexentry{Syntax!@jobs\_limit|hyperpage}{239}
+\indexentry{@posttask!Syntax|hyperpage}{239}
+\indexentry{Syntax!@posttask|hyperpage}{239}
+\indexentry{@active\_if!Syntax|hyperpage}{239}
+\indexentry{Syntax!@active\_if|hyperpage}{239}
+\indexentry{@follows!Syntax|hyperpage}{239}
+\indexentry{Syntax!@follows|hyperpage}{239}
+\indexentry{@product!Syntax|hyperpage}{241}
+\indexentry{Syntax!@product|hyperpage}{241}
+\indexentry{@permutations!Syntax|hyperpage}{241}
+\indexentry{Syntax!@permutations|hyperpage}{241}
+\indexentry{@combinations!Syntax|hyperpage}{241}
+\indexentry{Syntax!@combinations|hyperpage}{241}
+\indexentry{@combinations\_with\_replacement!Syntax|hyperpage}{241}
+\indexentry{Syntax!@combinations\_with\_replacement|hyperpage}{241}
+\indexentry{@files (on-the-fly parameter generation)!Syntax|hyperpage}{243}
+\indexentry{Syntax!@files (on-the-fly parameter generation)|hyperpage}{243}
+\indexentry{@check\_if\_uptodate!Syntax|hyperpage}{243}
+\indexentry{Syntax!@check\_if\_uptodate|hyperpage}{243}
+\indexentry{@parallel!Syntax|hyperpage}{243}
+\indexentry{Syntax!@parallel|hyperpage}{243}
+\indexentry{@files!Syntax|hyperpage}{245}
+\indexentry{Syntax!@files|hyperpage}{245}
+\indexentry{@files\_re!Syntax|hyperpage}{245}
+\indexentry{Syntax!@files\_re|hyperpage}{245}
+\indexentry{pipeline\_run() (in module ruffus.task)|hyperpage}{246}
+\indexentry{pipeline\_printout() (in module ruffus.task)|hyperpage}{247}
+\indexentry{pipeline\_printout\_graph() (in module ruffus.task)|hyperpage}{248}
+\indexentry{t\_black\_hole\_logger (class in ruffus.task)|hyperpage}{249}
+\indexentry{t\_stderr\_logger (class in ruffus.task)|hyperpage}{249}
+\indexentry{merge\_param\_factory() (in module ruffus.task)|hyperpage}{249}
+\indexentry{collate\_param\_factory() (in module ruffus.task)|hyperpage}{249}
+\indexentry{transform\_param\_factory() (in module ruffus.task)|hyperpage}{249}
+\indexentry{files\_param\_factory() (in module ruffus.task)|hyperpage}{250}
+\indexentry{args\_param\_factory() (in module ruffus.task)|hyperpage}{250}
+\indexentry{split\_param\_factory() (in module ruffus.task)|hyperpage}{250}
+\indexentry{job\_wrapper\_generic() (in module ruffus.task)|hyperpage}{250}
+\indexentry{job\_wrapper\_io\_files() (in module ruffus.task)|hyperpage}{250}
+\indexentry{job\_wrapper\_mkdir() (in module ruffus.task)|hyperpage}{250}
+\indexentry{needs\_update\_check\_modify\_time() (in module ruffus.task)|hyperpage}{250}
+\indexentry{needs\_update\_check\_directory\_missing() (in module ruffus.task)|hyperpage}{250}
+\indexentry{ruffus.proxy\_logger (module)|hyperpage}{251}
+\indexentry{make\_shared\_logger\_and\_proxy() (in module ruffus.proxy\_logger)|hyperpage}{252}
+\indexentry{setup\_std\_shared\_logger() (in module ruffus.proxy\_logger)|hyperpage}{252}
diff --git a/doc/_build/latex/ruffus.log b/doc/_build/latex/ruffus.log
new file mode 100644
index 0000000..1df37c2
--- /dev/null
+++ b/doc/_build/latex/ruffus.log
@@ -0,0 +1,3488 @@
+This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/Debian) (format=pdflatex 2014.4.24) 6 AUG 2014 17:33
+entering extended mode
+ restricted \write18 enabled.
+ %&-line parsing enabled.
+**ruffus.tex
+(./ruffus.tex
+LaTeX2e <2011/06/27>
+Babel <3.9h> and hyphenation patterns for 2 languages loaded.
+(./sphinxmanual.cls
+Document Class: sphinxmanual 2009/06/02 Document class (Sphinx manual)
+(/usr/share/texlive/texmf-dist/tex/latex/base/report.cls
+Document Class: report 2007/10/19 v1.4h Standard LaTeX document class
+(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo
+File: size10.clo 2007/10/19 v1.4h Standard LaTeX file (size option)
+)
+\c at part=\count79
+\c at chapter=\count80
+\c at section=\count81
+\c at subsection=\count82
+\c at subsubsection=\count83
+\c at paragraph=\count84
+\c at subparagraph=\count85
+\c at figure=\count86
+\c at table=\count87
+\abovecaptionskip=\skip41
+\belowcaptionskip=\skip42
+\bibindent=\dimen102
+))
+(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
+Package: inputenc 2008/03/30 v1.1d Input encoding file
+\inpenc at prehook=\toks14
+\inpenc at posthook=\toks15
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
+File: utf8.def 2008/04/05 v1.1m UTF-8 support for inputenc
+Now handling font encoding OML ...
+... no UTF-8 mapping file for font encoding OML
+Now handling font encoding T1 ...
+... processing UTF-8 mapping file for font encoding T1
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu
+File: t1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc
+ defining Unicode char U+00A1 (decimal 161)
+ defining Unicode char U+00A3 (decimal 163)
+ defining Unicode char U+00AB (decimal 171)
+ defining Unicode char U+00BB (decimal 187)
+ defining Unicode char U+00BF (decimal 191)
+ defining Unicode char U+00C0 (decimal 192)
+ defining Unicode char U+00C1 (decimal 193)
+ defining Unicode char U+00C2 (decimal 194)
+ defining Unicode char U+00C3 (decimal 195)
+ defining Unicode char U+00C4 (decimal 196)
+ defining Unicode char U+00C5 (decimal 197)
+ defining Unicode char U+00C6 (decimal 198)
+ defining Unicode char U+00C7 (decimal 199)
+ defining Unicode char U+00C8 (decimal 200)
+ defining Unicode char U+00C9 (decimal 201)
+ defining Unicode char U+00CA (decimal 202)
+ defining Unicode char U+00CB (decimal 203)
+ defining Unicode char U+00CC (decimal 204)
+ defining Unicode char U+00CD (decimal 205)
+ defining Unicode char U+00CE (decimal 206)
+ defining Unicode char U+00CF (decimal 207)
+ defining Unicode char U+00D0 (decimal 208)
+ defining Unicode char U+00D1 (decimal 209)
+ defining Unicode char U+00D2 (decimal 210)
+ defining Unicode char U+00D3 (decimal 211)
+ defining Unicode char U+00D4 (decimal 212)
+ defining Unicode char U+00D5 (decimal 213)
+ defining Unicode char U+00D6 (decimal 214)
+ defining Unicode char U+00D8 (decimal 216)
+ defining Unicode char U+00D9 (decimal 217)
+ defining Unicode char U+00DA (decimal 218)
+ defining Unicode char U+00DB (decimal 219)
+ defining Unicode char U+00DC (decimal 220)
+ defining Unicode char U+00DD (decimal 221)
+ defining Unicode char U+00DE (decimal 222)
+ defining Unicode char U+00DF (decimal 223)
+ defining Unicode char U+00E0 (decimal 224)
+ defining Unicode char U+00E1 (decimal 225)
+ defining Unicode char U+00E2 (decimal 226)
+ defining Unicode char U+00E3 (decimal 227)
+ defining Unicode char U+00E4 (decimal 228)
+ defining Unicode char U+00E5 (decimal 229)
+ defining Unicode char U+00E6 (decimal 230)
+ defining Unicode char U+00E7 (decimal 231)
+ defining Unicode char U+00E8 (decimal 232)
+ defining Unicode char U+00E9 (decimal 233)
+ defining Unicode char U+00EA (decimal 234)
+ defining Unicode char U+00EB (decimal 235)
+ defining Unicode char U+00EC (decimal 236)
+ defining Unicode char U+00ED (decimal 237)
+ defining Unicode char U+00EE (decimal 238)
+ defining Unicode char U+00EF (decimal 239)
+ defining Unicode char U+00F0 (decimal 240)
+ defining Unicode char U+00F1 (decimal 241)
+ defining Unicode char U+00F2 (decimal 242)
+ defining Unicode char U+00F3 (decimal 243)
+ defining Unicode char U+00F4 (decimal 244)
+ defining Unicode char U+00F5 (decimal 245)
+ defining Unicode char U+00F6 (decimal 246)
+ defining Unicode char U+00F8 (decimal 248)
+ defining Unicode char U+00F9 (decimal 249)
+ defining Unicode char U+00FA (decimal 250)
+ defining Unicode char U+00FB (decimal 251)
+ defining Unicode char U+00FC (decimal 252)
+ defining Unicode char U+00FD (decimal 253)
+ defining Unicode char U+00FE (decimal 254)
+ defining Unicode char U+00FF (decimal 255)
+ defining Unicode char U+0102 (decimal 258)
+ defining Unicode char U+0103 (decimal 259)
+ defining Unicode char U+0104 (decimal 260)
+ defining Unicode char U+0105 (decimal 261)
+ defining Unicode char U+0106 (decimal 262)
+ defining Unicode char U+0107 (decimal 263)
+ defining Unicode char U+010C (decimal 268)
+ defining Unicode char U+010D (decimal 269)
+ defining Unicode char U+010E (decimal 270)
+ defining Unicode char U+010F (decimal 271)
+ defining Unicode char U+0110 (decimal 272)
+ defining Unicode char U+0111 (decimal 273)
+ defining Unicode char U+0118 (decimal 280)
+ defining Unicode char U+0119 (decimal 281)
+ defining Unicode char U+011A (decimal 282)
+ defining Unicode char U+011B (decimal 283)
+ defining Unicode char U+011E (decimal 286)
+ defining Unicode char U+011F (decimal 287)
+ defining Unicode char U+0130 (decimal 304)
+ defining Unicode char U+0131 (decimal 305)
+ defining Unicode char U+0132 (decimal 306)
+ defining Unicode char U+0133 (decimal 307)
+ defining Unicode char U+0139 (decimal 313)
+ defining Unicode char U+013A (decimal 314)
+ defining Unicode char U+013D (decimal 317)
+ defining Unicode char U+013E (decimal 318)
+ defining Unicode char U+0141 (decimal 321)
+ defining Unicode char U+0142 (decimal 322)
+ defining Unicode char U+0143 (decimal 323)
+ defining Unicode char U+0144 (decimal 324)
+ defining Unicode char U+0147 (decimal 327)
+ defining Unicode char U+0148 (decimal 328)
+ defining Unicode char U+014A (decimal 330)
+ defining Unicode char U+014B (decimal 331)
+ defining Unicode char U+0150 (decimal 336)
+ defining Unicode char U+0151 (decimal 337)
+ defining Unicode char U+0152 (decimal 338)
+ defining Unicode char U+0153 (decimal 339)
+ defining Unicode char U+0154 (decimal 340)
+ defining Unicode char U+0155 (decimal 341)
+ defining Unicode char U+0158 (decimal 344)
+ defining Unicode char U+0159 (decimal 345)
+ defining Unicode char U+015A (decimal 346)
+ defining Unicode char U+015B (decimal 347)
+ defining Unicode char U+015E (decimal 350)
+ defining Unicode char U+015F (decimal 351)
+ defining Unicode char U+0160 (decimal 352)
+ defining Unicode char U+0161 (decimal 353)
+ defining Unicode char U+0162 (decimal 354)
+ defining Unicode char U+0163 (decimal 355)
+ defining Unicode char U+0164 (decimal 356)
+ defining Unicode char U+0165 (decimal 357)
+ defining Unicode char U+016E (decimal 366)
+ defining Unicode char U+016F (decimal 367)
+ defining Unicode char U+0170 (decimal 368)
+ defining Unicode char U+0171 (decimal 369)
+ defining Unicode char U+0178 (decimal 376)
+ defining Unicode char U+0179 (decimal 377)
+ defining Unicode char U+017A (decimal 378)
+ defining Unicode char U+017B (decimal 379)
+ defining Unicode char U+017C (decimal 380)
+ defining Unicode char U+017D (decimal 381)
+ defining Unicode char U+017E (decimal 382)
+ defining Unicode char U+200C (decimal 8204)
+ defining Unicode char U+2013 (decimal 8211)
+ defining Unicode char U+2014 (decimal 8212)
+ defining Unicode char U+2018 (decimal 8216)
+ defining Unicode char U+2019 (decimal 8217)
+ defining Unicode char U+201A (decimal 8218)
+ defining Unicode char U+201C (decimal 8220)
+ defining Unicode char U+201D (decimal 8221)
+ defining Unicode char U+201E (decimal 8222)
+ defining Unicode char U+2030 (decimal 8240)
+ defining Unicode char U+2031 (decimal 8241)
+ defining Unicode char U+2039 (decimal 8249)
+ defining Unicode char U+203A (decimal 8250)
+ defining Unicode char U+2423 (decimal 9251)
+)
+Now handling font encoding OT1 ...
+... processing UTF-8 mapping file for font encoding OT1
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu
+File: ot1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc
+ defining Unicode char U+00A1 (decimal 161)
+ defining Unicode char U+00A3 (decimal 163)
+ defining Unicode char U+00B8 (decimal 184)
+ defining Unicode char U+00BF (decimal 191)
+ defining Unicode char U+00C5 (decimal 197)
+ defining Unicode char U+00C6 (decimal 198)
+ defining Unicode char U+00D8 (decimal 216)
+ defining Unicode char U+00DF (decimal 223)
+ defining Unicode char U+00E6 (decimal 230)
+ defining Unicode char U+00EC (decimal 236)
+ defining Unicode char U+00ED (decimal 237)
+ defining Unicode char U+00EE (decimal 238)
+ defining Unicode char U+00EF (decimal 239)
+ defining Unicode char U+00F8 (decimal 248)
+ defining Unicode char U+0131 (decimal 305)
+ defining Unicode char U+0141 (decimal 321)
+ defining Unicode char U+0142 (decimal 322)
+ defining Unicode char U+0152 (decimal 338)
+ defining Unicode char U+0153 (decimal 339)
+ defining Unicode char U+2013 (decimal 8211)
+ defining Unicode char U+2014 (decimal 8212)
+ defining Unicode char U+2018 (decimal 8216)
+ defining Unicode char U+2019 (decimal 8217)
+ defining Unicode char U+201C (decimal 8220)
+ defining Unicode char U+201D (decimal 8221)
+)
+Now handling font encoding OMS ...
+... processing UTF-8 mapping file for font encoding OMS
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu
+File: omsenc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc
+ defining Unicode char U+00A7 (decimal 167)
+ defining Unicode char U+00B6 (decimal 182)
+ defining Unicode char U+00B7 (decimal 183)
+ defining Unicode char U+2020 (decimal 8224)
+ defining Unicode char U+2021 (decimal 8225)
+ defining Unicode char U+2022 (decimal 8226)
+)
+Now handling font encoding OMX ...
+... no UTF-8 mapping file for font encoding OMX
+Now handling font encoding U ...
+... no UTF-8 mapping file for font encoding U
+ defining Unicode char U+00A9 (decimal 169)
+ defining Unicode char U+00AA (decimal 170)
+ defining Unicode char U+00AE (decimal 174)
+ defining Unicode char U+00BA (decimal 186)
+ defining Unicode char U+02C6 (decimal 710)
+ defining Unicode char U+02DC (decimal 732)
+ defining Unicode char U+200C (decimal 8204)
+ defining Unicode char U+2026 (decimal 8230)
+ defining Unicode char U+2122 (decimal 8482)
+ defining Unicode char U+2423 (decimal 9251)
+))
+ defining Unicode char U+00A0 (decimal 160)
+
+(/usr/share/texlive/texmf-dist/tex/latex/cmap/cmap.sty
+Package: cmap 2008/03/06 v1.0h CMap support: searchable PDF
+)
+(/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty
+Package: fontenc 2005/09/27 v1.99g Standard LaTeX package
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def
+File: t1enc.def 2005/09/27 v1.99g Standard LaTeX file
+LaTeX Font Info: Redeclaring font encoding T1 on input line 43.
+)<<t1.cmap>>)
+(/usr/share/texlive/texmf-dist/tex/generic/babel/babel.sty
+Package: babel 2013/12/03 3.9h The Babel package
+
+(/usr/share/texlive/texmf-dist/tex/generic/babel-english/english.ldf
+Language: english 2012/08/20 v3.3p English support from the babel system
+
+(/usr/share/texlive/texmf-dist/tex/generic/babel/babel.def
+File: babel.def 2013/12/03 3.9h Babel common definitions
+\babel at savecnt=\count88
+\U at D=\dimen103
+)
+\l at british = a dialect from \language\l at english
+\l at UKenglish = a dialect from \language\l at english
+\l at canadian = a dialect from \language\l at american
+\l at australian = a dialect from \language\l at british
+\l at newzealand = a dialect from \language\l at british
+))
+(/usr/share/texlive/texmf-dist/tex/latex/psnfss/times.sty
+Package: times 2005/04/12 PSNFSS-v9.2a (SPQR)
+) (./fncychap.sty
+Package: fncychap 2007/07/30 v1.34 LaTeX package (Revised chapters)
+\RW=\skip43
+\mylen=\skip44
+\myhi=\skip45
+\px=\skip46
+\py=\skip47
+\pyy=\skip48
+\pxx=\skip49
+\c at AlphaCnt=\count89
+\c at AlphaDecCnt=\count90
+)
+(/usr/share/texlive/texmf-dist/tex/latex/tools/longtable.sty
+Package: longtable 2004/02/01 v4.11 Multi-page Table package (DPC)
+\LTleft=\skip50
+\LTright=\skip51
+\LTpre=\skip52
+\LTpost=\skip53
+\LTchunksize=\count91
+\LTcapwidth=\dimen104
+\LT at head=\box26
+\LT at firsthead=\box27
+\LT at foot=\box28
+\LT at lastfoot=\box29
+\LT at cols=\count92
+\LT at rows=\count93
+\c at LT@tables=\count94
+\c at LT@chunks=\count95
+\LT at p@ftn=\toks16
+) (./sphinx.sty
+Package: sphinx 2010/01/15 LaTeX package (Sphinx markup)
+
+(/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
+\fancy at headwidth=\skip54
+\f at ncyO@elh=\skip55
+\f at ncyO@erh=\skip56
+\f at ncyO@olh=\skip57
+\f at ncyO@orh=\skip58
+\f at ncyO@elf=\skip59
+\f at ncyO@erf=\skip60
+\f at ncyO@olf=\skip61
+\f at ncyO@orf=\skip62
+)
+(/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
+Package: textcomp 2005/09/27 v1.99g Standard LaTeX package
+Package textcomp Info: Sub-encoding information:
+(textcomp) 5 = only ISO-Adobe without \textcurrency
+(textcomp) 4 = 5 + \texteuro
+(textcomp) 3 = 4 + \textohm
+(textcomp) 2 = 3 + \textestimated + \textcurrency
+(textcomp) 1 = TS1 - \textcircled - \t
+(textcomp) 0 = TS1 (full)
+(textcomp) Font families with sub-encoding setting implement
+(textcomp) only a restricted character set as indicated.
+(textcomp) Family '?' is the default used for unknown fonts.
+(textcomp) See the documentation for details.
+Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 71.
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def
+File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
+Now handling font encoding TS1 ...
+... processing UTF-8 mapping file for font encoding TS1
+
+(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu
+File: ts1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc
+ defining Unicode char U+00A2 (decimal 162)
+ defining Unicode char U+00A3 (decimal 163)
+ defining Unicode char U+00A4 (decimal 164)
+ defining Unicode char U+00A5 (decimal 165)
+ defining Unicode char U+00A6 (decimal 166)
+ defining Unicode char U+00A7 (decimal 167)
+ defining Unicode char U+00A8 (decimal 168)
+ defining Unicode char U+00A9 (decimal 169)
+ defining Unicode char U+00AA (decimal 170)
+ defining Unicode char U+00AC (decimal 172)
+ defining Unicode char U+00AE (decimal 174)
+ defining Unicode char U+00AF (decimal 175)
+ defining Unicode char U+00B0 (decimal 176)
+ defining Unicode char U+00B1 (decimal 177)
+ defining Unicode char U+00B2 (decimal 178)
+ defining Unicode char U+00B3 (decimal 179)
+ defining Unicode char U+00B4 (decimal 180)
+ defining Unicode char U+00B5 (decimal 181)
+ defining Unicode char U+00B6 (decimal 182)
+ defining Unicode char U+00B7 (decimal 183)
+ defining Unicode char U+00B9 (decimal 185)
+ defining Unicode char U+00BA (decimal 186)
+ defining Unicode char U+00BC (decimal 188)
+ defining Unicode char U+00BD (decimal 189)
+ defining Unicode char U+00BE (decimal 190)
+ defining Unicode char U+00D7 (decimal 215)
+ defining Unicode char U+00F7 (decimal 247)
+ defining Unicode char U+0192 (decimal 402)
+ defining Unicode char U+02C7 (decimal 711)
+ defining Unicode char U+02D8 (decimal 728)
+ defining Unicode char U+02DD (decimal 733)
+ defining Unicode char U+0E3F (decimal 3647)
+ defining Unicode char U+2016 (decimal 8214)
+ defining Unicode char U+2020 (decimal 8224)
+ defining Unicode char U+2021 (decimal 8225)
+ defining Unicode char U+2022 (decimal 8226)
+ defining Unicode char U+2030 (decimal 8240)
+ defining Unicode char U+2031 (decimal 8241)
+ defining Unicode char U+203B (decimal 8251)
+ defining Unicode char U+203D (decimal 8253)
+ defining Unicode char U+2044 (decimal 8260)
+ defining Unicode char U+204E (decimal 8270)
+ defining Unicode char U+2052 (decimal 8274)
+ defining Unicode char U+20A1 (decimal 8353)
+ defining Unicode char U+20A4 (decimal 8356)
+ defining Unicode char U+20A6 (decimal 8358)
+ defining Unicode char U+20A9 (decimal 8361)
+ defining Unicode char U+20AB (decimal 8363)
+ defining Unicode char U+20AC (decimal 8364)
+ defining Unicode char U+20B1 (decimal 8369)
+ defining Unicode char U+2103 (decimal 8451)
+ defining Unicode char U+2116 (decimal 8470)
+ defining Unicode char U+2117 (decimal 8471)
+ defining Unicode char U+211E (decimal 8478)
+ defining Unicode char U+2120 (decimal 8480)
+ defining Unicode char U+2122 (decimal 8482)
+ defining Unicode char U+2126 (decimal 8486)
+ defining Unicode char U+2127 (decimal 8487)
+ defining Unicode char U+212E (decimal 8494)
+ defining Unicode char U+2190 (decimal 8592)
+ defining Unicode char U+2191 (decimal 8593)
+ defining Unicode char U+2192 (decimal 8594)
+ defining Unicode char U+2193 (decimal 8595)
+ defining Unicode char U+2329 (decimal 9001)
+ defining Unicode char U+232A (decimal 9002)
+ defining Unicode char U+2422 (decimal 9250)
+ defining Unicode char U+25E6 (decimal 9702)
+ defining Unicode char U+25EF (decimal 9711)
+ defining Unicode char U+266A (decimal 9834)
+))
+LaTeX Info: Redefining \oldstylenums on input line 266.
+Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 281.
+Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 282.
+Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 283.
+Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 284.
+Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 285.
+Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 286.
+Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 287.
+Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 288.
+Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 289.
+Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 290.
+Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 291.
+Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 292.
+Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 293.
+Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 294.
+Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 295.
+Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 296.
+Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 297.
+Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 298.
+Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 299.
+Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 300.
+Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 301.
+Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 302.
+Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 303.
+Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 304.
+
+Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 305.
+Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 306.
+Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 307.
+Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 308.
+Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 309.
+Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 310.
+Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 311.
+Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 312.
+Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 313.
+Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 314.
+Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 315.
+Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 316.
+Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 317.
+Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 318.
+Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 319.
+Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 320.
+Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 321.
+Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 322.
+Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 323.
+Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 324.
+Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 325.
+Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 326.
+Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 327.
+Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 328.
+Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 329.
+Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 330.
+Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 331.
+Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 332.
+Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 333.
+Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 334.
+Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 335.
+Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 336.
+Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 337.
+Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 338.
+Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 339.
+Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 340.
+)
+(/usr/share/texlive/texmf-dist/tex/latex/fancybox/fancybox.sty
+Package: fancybox 2010/05/15 1.4
+
+Style option: `fancybox' v1.4 <2010/05/15> (tvz)
+\@fancybox=\box30
+\shadowsize=\dimen105
+\@Sbox=\box31
+\do at VerbBox=\toks17
+\the at fancyput=\toks18
+\this at fancyput=\toks19
+\EndVerbatimTokens=\toks20
+\Verbatim at Outfile=\write3
+\Verbatim at Infile=\read1
+) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/titlesec.sty
+Package: titlesec 2011/12/15 v2.10.0 Sectioning titles
+\ttl at box=\box32
+\beforetitleunit=\skip63
+\aftertitleunit=\skip64
+\ttl at plus=\dimen106
+\ttl at minus=\dimen107
+\ttl at toksa=\toks21
+\titlewidth=\dimen108
+\titlewidthlast=\dimen109
+\titlewidthfirst=\dimen110
+)
+(./tabulary.sty
+Package: tabulary 2007/10/02 v0.9 tabulary package (DPC)
+ (/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty
+Package: array 2008/09/09 v2.4c Tabular extension package (FMi)
+\col at sep=\dimen111
+\extrarowheight=\dimen112
+\NC at list=\toks22
+\extratabsurround=\skip65
+\backup at length=\skip66
+)
+\TY at count=\count96
+\TY at linewidth=\dimen113
+\tymin=\dimen114
+\tymax=\dimen115
+\TY at tablewidth=\dimen116
+)
+(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
+Package: amsmath 2013/01/14 v2.14 AMS math features
+\@mathmargin=\skip67
+
+For additional information on amsmath, use the `?' option.
+(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
+Package: amstext 2000/06/29 v2.01
+
+(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty
+File: amsgen.sty 1999/11/30 v2.0
+\@emptytoks=\toks23
+\ex@=\dimen117
+))
+(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty
+Package: amsbsy 1999/11/29 v1.2d
+\pmbraise@=\dimen118
+)
+(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty
+Package: amsopn 1999/12/14 v2.01 operator names
+)
+\inf at bad=\count97
+LaTeX Info: Redefining \frac on input line 210.
+\uproot@=\count98
+\leftroot@=\count99
+LaTeX Info: Redefining \overline on input line 306.
+\classnum@=\count100
+\DOTSCASE@=\count101
+LaTeX Info: Redefining \ldots on input line 378.
+LaTeX Info: Redefining \dots on input line 381.
+LaTeX Info: Redefining \cdots on input line 466.
+\Mathstrutbox@=\box33
+\strutbox@=\box34
+\big at size=\dimen119
+LaTeX Font Info: Redeclaring font encoding OML on input line 566.
+LaTeX Font Info: Redeclaring font encoding OMS on input line 567.
+\macc at depth=\count102
+\c at MaxMatrixCols=\count103
+\dotsspace@=\muskip10
+\c at parentequation=\count104
+\dspbrk at lvl=\count105
+\tag at help=\toks24
+\row@=\count106
+\column@=\count107
+\maxfields@=\count108
+\andhelp@=\toks25
+\eqnshift@=\dimen120
+\alignsep@=\dimen121
+\tagshift@=\dimen122
+\tagwidth@=\dimen123
+\totwidth@=\dimen124
+\lineht@=\dimen125
+\@envbody=\toks26
+\multlinegap=\skip68
+\multlinetaggap=\skip69
+\mathdisplay at stack=\toks27
+LaTeX Info: Redefining \[ on input line 2665.
+LaTeX Info: Redefining \] on input line 2666.
+)
+(/usr/share/texlive/texmf-dist/tex/latex/base/makeidx.sty
+Package: makeidx 2000/03/29 v1.0m Standard LaTeX package
+)
+(/usr/share/texlive/texmf-dist/tex/latex/framed/framed.sty
+Package: framed 2011/10/22 v 0.96: framed or shaded text with page breaks
+\OuterFrameSep=\skip70
+\fb at frw=\dimen126
+\fb at frh=\dimen127
+\FrameRule=\dimen128
+\FrameSep=\dimen129
+)
+(/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty
+Package: ifthen 2001/05/26 v1.1c Standard LaTeX ifthen package (DPC)
+)
+(/usr/share/texlive/texmf-dist/tex/latex/graphics/color.sty
+Package: color 2005/11/14 v1.0j Standard LaTeX Color (DPC)
+
+(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/color.cfg
+File: color.cfg 2007/01/18 v1.5 color configuration of teTeX/TeXLive
+)
+Package color Info: Driver file: pdftex.def on input line 130.
+
+(/usr/share/texlive/texmf-dist/tex/latex/pdftex-def/pdftex.def
+File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX
+
+(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/infwarerr.sty
+Package: infwarerr 2010/04/08 v1.3 Providing info/warning/error messages (HO)
+)
+(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ltxcmds.sty
+Package: ltxcmds 2011/11/09 v1.22 LaTeX kernel commands for general use (HO)
+)
+\Gread at gobject=\count109
+))
+(/usr/share/texlive/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty
+Package: fancyvrb 2008/02/07
+
+Style option: `fancyvrb' v2.7a, with DG/SPQR fixes, and firstline=lastline fix
+<2008/02/07> (tvz) (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty
+Package: keyval 1999/03/16 v1.13 key=value parser (DPC)
+\KV at toks@=\toks28
+)
+\FV at CodeLineNo=\count110
+\FV at InFile=\read2
+\FV at TabBox=\box35
+\c at FancyVerbLine=\count111
+\FV at StepNumber=\count112
+\FV at OutFile=\write4
+) (/usr/share/texlive/texmf-dist/tex/latex/threeparttable/threeparttable.sty
+Package: threeparttable 2003/06/13 v 3.0
+\@tempboxb=\box36
+)
+(/usr/share/texlive/texmf-dist/tex/latex/mdwtools/footnote.sty
+Package: footnote 1997/01/28 1.13 Save footnotes around boxes
+\fn at notes=\box37
+\fn at width=\dimen130
+)
+(/usr/share/texlive/texmf-dist/tex/latex/wrapfig/wrapfig.sty
+\wrapoverhang=\dimen131
+\WF at size=\dimen132
+\c at WF@wrappedlines=\count113
+\WF at box=\box38
+\WF at everypar=\toks29
+Package: wrapfig 2003/01/31 v 3.6
+)
+(/usr/share/texlive/texmf-dist/tex/latex/parskip/parskip.sty
+Package: parskip 2001/04/09 non-zero parskip adjustments
+)
+(/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty
+Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR)
+
+(/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty
+Package: graphics 2009/02/05 v1.0o Standard LaTeX Graphics (DPC,SPQR)
+
+(/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty
+Package: trig 1999/03/16 v1.09 sin cos tan (DPC)
+)
+(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/graphics.cfg
+File: graphics.cfg 2010/04/23 v1.9 graphics configuration of TeX Live
+)
+Package graphics Info: Driver file: pdftex.def on input line 91.
+)
+\Gin at req@height=\dimen133
+\Gin at req@width=\dimen134
+)
+(/usr/share/texlive/texmf-dist/tex/plain/misc/pdfcolor.tex)
+\distancetoright=\skip71
+\py at argswidth=\skip72
+\py at noticelength=\skip73
+\lineblockindentation=\skip74
+\image at box=\box39
+\image at width=\dimen135
+
+(/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty
+Package: hyperref 2012/11/06 v6.83m Hypertext links for LaTeX
+
+(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty
+Package: hobsub-hyperref 2012/05/28 v1.13 Bundle oberdiek, subset hyperref (HO)
+
+
+(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty
+Package: hobsub-generic 2012/05/28 v1.13 Bundle oberdiek, subset generic (HO)
+Package: hobsub 2012/05/28 v1.13 Construct package bundles (HO)
+Package hobsub Info: Skipping package `infwarerr' (already loaded).
+Package hobsub Info: Skipping package `ltxcmds' (already loaded).
+Package: ifluatex 2010/03/01 v1.3 Provides the ifluatex switch (HO)
+Package ifluatex Info: LuaTeX not detected.
+Package: ifvtex 2010/03/01 v1.5 Detect VTeX and its facilities (HO)
+Package ifvtex Info: VTeX not detected.
+Package: intcalc 2007/09/27 v1.1 Expandable calculations with integers (HO)
+Package: ifpdf 2011/01/30 v2.3 Provides the ifpdf switch (HO)
+Package ifpdf Info: pdfTeX in PDF mode is detected.
+Package: etexcmds 2011/02/16 v1.5 Avoid name clashes with e-TeX commands (HO)
+Package etexcmds Info: Could not find \expanded.
+(etexcmds) That can mean that you are not using pdfTeX 1.50 or
+(etexcmds) that some package has redefined \expanded.
+(etexcmds) In the latter case, load this package earlier.
+Package: kvsetkeys 2012/04/25 v1.16 Key value parser (HO)
+Package: kvdefinekeys 2011/04/07 v1.3 Define keys (HO)
+Package: pdftexcmds 2011/11/29 v0.20 Utility functions of pdfTeX for LuaTeX (HO
+)
+Package pdftexcmds Info: LuaTeX not detected.
+Package pdftexcmds Info: \pdf at primitive is available.
+Package pdftexcmds Info: \pdf at ifprimitive is available.
+Package pdftexcmds Info: \pdfdraftmode found.
+Package: pdfescape 2011/11/25 v1.13 Implements pdfTeX's escape features (HO)
+Package: bigintcalc 2012/04/08 v1.3 Expandable calculations on big integers (HO
+)
+Package: bitset 2011/01/30 v1.1 Handle bit-vector datatype (HO)
+Package: uniquecounter 2011/01/30 v1.2 Provide unlimited unique counter (HO)
+)
+Package hobsub Info: Skipping package `hobsub' (already loaded).
+Package: letltxmacro 2010/09/02 v1.4 Let assignment for LaTeX macros (HO)
+Package: hopatch 2012/05/28 v1.2 Wrapper for package hooks (HO)
+Package: xcolor-patch 2011/01/30 xcolor patch
+Package: atveryend 2011/06/30 v1.8 Hooks at the very end of document (HO)
+Package atveryend Info: \enddocument detected (standard20110627).
+Package: atbegshi 2011/10/05 v1.16 At begin shipout hook (HO)
+Package: refcount 2011/10/16 v3.4 Data extraction from label references (HO)
+Package: hycolor 2011/01/30 v1.7 Color options for hyperref/bookmark (HO)
+)
+(/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty
+Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
+)
+(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty
+Package: auxhook 2011/03/04 v1.3 Hooks for auxiliary files (HO)
+)
+(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty
+Package: kvoptions 2011/06/30 v3.11 Key value format for package options (HO)
+)
+\@linkdim=\dimen136
+\Hy at linkcounter=\count114
+\Hy at pagecounter=\count115
+
+(/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def
+File: pd1enc.def 2012/11/06 v6.83m Hyperref: PDFDocEncoding definition (HO)
+Now handling font encoding PD1 ...
+... no UTF-8 mapping file for font encoding PD1
+)
+\Hy at SavedSpaceFactor=\count116
+
+(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg
+File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive
+)
+Package hyperref Info: Option `colorlinks' set `true' on input line 4319.
+Package hyperref Info: Option `breaklinks' set `true' on input line 4319.
+Package hyperref Info: Hyper figures OFF on input line 4443.
+Package hyperref Info: Link nesting OFF on input line 4448.
+Package hyperref Info: Hyper index ON on input line 4451.
+Package hyperref Info: Plain pages OFF on input line 4458.
+Package hyperref Info: Backreferencing OFF on input line 4463.
+Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
+Package hyperref Info: Bookmarks ON on input line 4688.
+\c at Hy@tempcnt=\count117
+
+(/usr/share/texlive/texmf-dist/tex/latex/url/url.sty
+\Urlmuskip=\muskip11
+Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
+)
+LaTeX Info: Redefining \url on input line 5041.
+\XeTeXLinkMargin=\dimen137
+\Fld at menulength=\count118
+\Field at Width=\dimen138
+\Fld at charsize=\dimen139
+Package hyperref Info: Hyper figures OFF on input line 6295.
+Package hyperref Info: Link nesting OFF on input line 6300.
+Package hyperref Info: Hyper index ON on input line 6303.
+Package hyperref Info: backreferencing OFF on input line 6310.
+Package hyperref Info: Link coloring ON on input line 6313.
+Package hyperref Info: Link coloring with OCG OFF on input line 6320.
+Package hyperref Info: PDF/A mode OFF on input line 6325.
+LaTeX Info: Redefining \ref on input line 6365.
+LaTeX Info: Redefining \pageref on input line 6369.
+\Hy at abspage=\count119
+\c at Item=\count120
+\c at Hfootnote=\count121
+)
+
+Package hyperref Message: Driver (autodetected): hpdftex.
+
+(/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def
+File: hpdftex.def 2012/11/06 v6.83m Hyperref driver for pdfTeX
+\Fld at listcount=\count122
+\c at bookmark@seq at number=\count123
+
+(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty
+Package: rerunfilecheck 2011/04/15 v1.7 Rerun checks for auxiliary files (HO)
+Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
+82.
+)
+\Hy at SectionHShift=\skip75
+)
+(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/hypcap.sty
+Package: hypcap 2011/02/16 v1.11 Adjusting the anchors of captions (HO)
+)
+\DUlineblockindent=\skip76
+)
+(/usr/share/texlive/texmf-dist/tex/latex/multirow/multirow.sty
+\bigstrutjot=\dimen140
+)
+\@indexfile=\write5
+\openout5 = `ruffus.idx'.
+
+
+Writing index file ruffus.idx
+
+LaTeX Warning: Unused global option(s):
+ [A4].
+
+(./ruffus.aux
+
+LaTeX Warning: Label `tutorials/new_tutorial/command_line:new-manual-cmdline-ru
+n' multiply defined.
+
+
+LaTeX Warning: Label `tutorials/new_tutorial/command_line:new-manual-cmdline-ge
+t-argparse' multiply defined.
+
+
+LaTeX Warning: Label `tutorials/new_tutorial/deprecated_files:new-manual-files-
+is-uptodate' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run' multi
+ply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-target
+-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-forced
+torun-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-multip
+rocess' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-multit
+hread' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-logger
+' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-gnu-ma
+ke' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-verbos
+e' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-runtim
+e-data' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-one-se
+cond-per-job' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-touch-
+files-only' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-except
+ions-terminate-immediately' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-log-ex
+ceptions' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-histor
+y-file' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-checks
+um-level' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-run-verbos
+e-abbreviated-path' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout'
+multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-o
+utput-stream' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-t
+arget-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-f
+orcedtorun-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-v
+erbose' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-i
+ndent' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+nu-make' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-w
+rap-width' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-r
+untime-data' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-h
+istory-file' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-c
+hecksum-level' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-v
+erbose-abbreviated-path' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-stream' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-output-format' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-target-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-forcedtorun-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-draw-vertically' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-ignore-upstream-of-target' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-skip-uptodate-tasks' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-gnu-make' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-test-all-task-for-update' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-no-key-legend' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-minimal-key-legend' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-user-colour-scheme' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-pipeline-name' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-size' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-dpi' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-runtime-data' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-history-file' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-printout-g
+raph-checksum-level' multiply defined.
+
+
+LaTeX Warning: Label `pipeline_functions:pipeline-functions-pipeline-get-task-n
+ames' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job' multiply d
+efined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-cmd-str' mu
+ltiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-job-name' m
+ultiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-job-other-o
+ptions' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-job-script-
+directory' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-job-environ
+ment' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-working-dir
+ectory' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-retain-job-
+scripts' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-logger' mul
+tiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-drmaa-sessi
+on' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-run-locally
+' multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-touch-only'
+ multiply defined.
+
+
+LaTeX Warning: Label `drmaa_wrapper_functions:drmaa-wrapper-run-job-output-file
+s' multiply defined.
+
+
+LaTeX Warning: Label `installation:installation' multiply defined.
+
+
+LaTeX Warning: Label `installation::doc' multiply defined.
+
+
+LaTeX Warning: Label `installation:id1' multiply defined.
+
+
+LaTeX Warning: Label `installation:the-easy-way' multiply defined.
+
+
+LaTeX Warning: Label `installation:the-most-up-to-date-code' multiply defined.
+
+
+LaTeX Warning: Label `installation:graphical-flowcharts' multiply defined.
+
+
+LaTeX Warning: Label `todo:todo-job-trickling' multiply defined.
+
+
+LaTeX Warning: Label `decorators/originate:decorators-originate-output-files' m
+ultiply defined.
+
+
+LaTeX Warning: Label `decorators/originate:decorators-originate-extra-parameter
+s' multiply defined.
+
+
+LaTeX Warning: Label `decorators/split:decorators-split-tasks-or-file-names' mu
+ltiply defined.
+
+
+LaTeX Warning: Label `decorators/split:decorators-split-output-files' multiply
+defined.
+
+
+LaTeX Warning: Label `decorators/split:decorators-split-extra-parameters' multi
+ply defined.
+
+
+LaTeX Warning: Label `decorators/transform:decorators-transform-tasks-or-file-n
+ames' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform:decorators-transform-suffix-string'
+multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform:decorators-transform-matching-regex'
+ multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform:decorators-transform-matching-format
+ter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform:decorators-transform-output-pattern'
+ multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform:decorators-transform-extra-parameter
+s' multiply defined.
+
+
+LaTeX Warning: Label `decorators/merge:decorators-merge-tasks-or-file-names' mu
+ltiply defined.
+
+
+LaTeX Warning: Label `decorators/merge:decorators-merge-output-file' multiply d
+efined.
+
+
+LaTeX Warning: Label `decorators/merge:decorators-merge-extra-parameters' multi
+ply defined.
+
+
+LaTeX Warning: Label `decorators/subdivide:decorators-subdivide-tasks-or-file-n
+ames' multiply defined.
+
+
+LaTeX Warning: Label `decorators/subdivide:decorators-subdivide-matching-regex'
+ multiply defined.
+
+
+LaTeX Warning: Label `decorators/subdivide:decorators-subdivide-matching-format
+ter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/subdivide:decorators-subdivide-output-pattern'
+ multiply defined.
+
+
+LaTeX Warning: Label `decorators/subdivide:decorators-subdivide-input-pattern-o
+r-glob' multiply defined.
+
+
+LaTeX Warning: Label `decorators/subdivide:decorators-subdivide-extra-parameter
+s' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-tasks-or-fil
+e-names' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-suffix-strin
+g' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-matching-reg
+ex' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-matching-for
+matter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-input-patter
+n-or-glob' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-output-patte
+rn' multiply defined.
+
+
+LaTeX Warning: Label `decorators/transform_ex:decorators-transform-extra-parame
+ters' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate:decorators-collate-tasks-or-file-names
+' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate:decorators-collate-matching-regex' mul
+tiply defined.
+
+
+LaTeX Warning: Label `decorators/collate:decorators-collate-matching-formatter'
+ multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate:decorators-collate-output-pattern' mul
+tiply defined.
+
+
+LaTeX Warning: Label `decorators/collate:decorators-collate-extra-parameters' m
+ultiply defined.
+
+
+LaTeX Warning: Label `decorators/collate_ex:decorators-collate-ex-tasks-or-file
+-names' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate_ex:decorators-collate-ex-matching-rege
+x' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate_ex:decorators-collate-ex-matching-form
+atter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate_ex:decorators-collate-ex-input-pattern
+-or-glob' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate_ex:decorators-collate-ex-output-patter
+n' multiply defined.
+
+
+LaTeX Warning: Label `decorators/collate_ex:decorators-collate-ex-extra-paramet
+ers' multiply defined.
+
+
+LaTeX Warning: Label `decorators/graphviz:decorators-graphviz-graphviz-paramete
+rs' multiply defined.
+
+
+LaTeX Warning: Label `decorators/mkdir:decorators-mkdir-tasks-or-file-names' mu
+ltiply defined.
+
+
+LaTeX Warning: Label `decorators/mkdir:decorators-mkdir-suffix-string' multiply
+ defined.
+
+
+LaTeX Warning: Label `decorators/mkdir:decorators-mkdir-matching-regex' multipl
+y defined.
+
+
+LaTeX Warning: Label `decorators/mkdir:decorators-mkdir-matching-formatter' mul
+tiply defined.
+
+
+LaTeX Warning: Label `decorators/mkdir:decorators-mkdir-output-pattern' multipl
+y defined.
+
+
+LaTeX Warning: Label `decorators/jobs_limit:decorators-jobs-limit-maximum-num-o
+f-jobs' multiply defined.
+
+
+LaTeX Warning: Label `decorators/jobs_limit:decorators-jobs-limit-name' multipl
+y defined.
+
+
+LaTeX Warning: Label `decorators/posttask:decorators-posttask-function' multipl
+y defined.
+
+
+LaTeX Warning: Label `decorators/posttask:decorators-posttask-file-name' multip
+ly defined.
+
+
+LaTeX Warning: Label `decorators/active_if:decorators-active-if-on-or-off' mult
+iply defined.
+
+
+LaTeX Warning: Label `decorators/follows:decorators-follows-task' multiply defi
+ned.
+
+
+LaTeX Warning: Label `decorators/follows:decorators-follows-task-name' multiply
+ defined.
+
+
+LaTeX Warning: Label `decorators/follows:decorators-follows-directory-name' mul
+tiply defined.
+
+
+LaTeX Warning: Label `decorators/product:decorators-product-tasks-or-file-names
+' multiply defined.
+
+
+LaTeX Warning: Label `decorators/product:decorators-product-matching-formatter'
+ multiply defined.
+
+
+LaTeX Warning: Label `decorators/product:decorators-product-output-pattern' mul
+tiply defined.
+
+
+LaTeX Warning: Label `decorators/product:decorators-product-extra-parameters' m
+ultiply defined.
+
+
+LaTeX Warning: Label `decorators/permutations:decorators-permutations-tasks-or-
+file-names' multiply defined.
+
+
+LaTeX Warning: Label `decorators/permutations:decorators-permutations-matching-
+formatter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/permutations:decorators-permutations-output-pa
+ttern' multiply defined.
+
+
+LaTeX Warning: Label `decorators/permutations:decorators-permutations-extra-par
+ameters' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations:decorators-combinations-tasks-or-
+file-names' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations:decorators-combinations-matching-
+formatter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations:decorators-combinations-output-pa
+ttern' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations:decorators-combinations-extra-par
+ameters' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations_with_replacement:decorators-combi
+nations-with-replacement-tasks-or-file-names' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations_with_replacement:decorators-combi
+nations-with-replacement-matching-formatter' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations_with_replacement:decorators-combi
+nations-with-replacement-output-pattern' multiply defined.
+
+
+LaTeX Warning: Label `decorators/combinations_with_replacement:decorators-combi
+nations-with-replacement-extra-parameters' multiply defined.
+
+
+LaTeX Warning: Label `decorators/files_ex:decorators-files-custom-function' mul
+tiply defined.
+
+
+LaTeX Warning: Label `decorators/check_if_uptodate:decorators-check-if-uptodate
+-dependency-checking-function' multiply defined.
+
+
+LaTeX Warning: Label `decorators/parallel:decorators-parallel-job-params' multi
+ply defined.
+
+
+LaTeX Warning: Label `decorators/parallel:decorators-parallel-parameter-generat
+ing-function' multiply defined.
+
+
+LaTeX Warning: Label `decorators/files:decorators-files-input1' multiply define
+d.
+
+
+LaTeX Warning: Label `decorators/files:decorators-files-output1' multiply defin
+ed.
+
+
+LaTeX Warning: Label `decorators/files:decorators-files-extra-parameters1' mult
+iply defined.
+
+
+LaTeX Warning: Label `decorators/files:decorators-files-input' multiply defined
+.
+
+
+LaTeX Warning: Label `decorators/files:decorators-files-output' multiply define
+d.
+
+
+LaTeX Warning: Label `decorators/files:decorators-files-extra-parameters' multi
+ply defined.
+
+
+LaTeX Warning: Label `decorators/files_re:decorators-files-re-tasks-or-file-nam
+es' multiply defined.
+
+
+LaTeX Warning: Label `decorators/files_re:decorators-files-re-matching-regex' m
+ultiply defined.
+
+
+LaTeX Warning: Label `decorators/files_re:decorators-files-re-input-pattern' mu
+ltiply defined.
+
+
+LaTeX Warning: Label `decorators/files_re:decorators-files-re-output-pattern' m
+ultiply defined.
+
+
+LaTeX Warning: Label `decorators/files_re:decorators-files-re-extra-parameters'
+ multiply defined.
+
+
+LaTeX Warning: Label `proxy_logger:proxy-logger' multiply defined.
+
+)
+\openout1 = `ruffus.aux'.
+
+LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 117.
+LaTeX Font Info: Try loading font information for TS1+cmr on input line 117.
+
+ (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd
+File: ts1cmr.fd 1999/05/25 v2.5h Standard LaTeX font definitions
+)
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 117.
+LaTeX Font Info: ... okay on input line 117.
+LaTeX Font Info: Try loading font information for T1+ptm on input line 117.
+
+(/usr/share/texlive/texmf-dist/tex/latex/psnfss/t1ptm.fd
+File: t1ptm.fd 2001/06/04 font definitions for T1/ptm.
+)
+(/usr/share/texlive/texmf-dist/tex/context/base/supp-pdf.mkii
+[Loading MPS to PDF converter (version 2006.09.02).]
+\scratchcounter=\count124
+\scratchdimen=\dimen141
+\scratchbox=\box40
+\nofMPsegments=\count125
+\nofMParguments=\count126
+\everyMPshowfont=\toks30
+\MPscratchCnt=\count127
+\MPscratchDim=\dimen142
+\MPnumerator=\count128
+\makeMPintoPDFobject=\count129
+\everyMPtoPDFconversion=\toks31
+) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty
+Package: epstopdf-base 2010/02/09 v2.5 Base part for package epstopdf
+
+(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty
+Package: grfext 2010/08/19 v1.1 Manage graphics extensions (HO)
+)
+Package grfext Info: Graphics extension search list:
+(grfext) [.png,.pdf,.jpg,.mps,.jpeg,.jbig2,.jb2,.PNG,.PDF,.JPG,.JPE
+G,.JBIG2,.JB2,.eps]
+(grfext) \AppendGraphicsExtensions on input line 452.
+
+(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
+File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
+e
+))
+\AtBeginShipoutBox=\box41
+Package hyperref Info: Link coloring ON on input line 117.
+
+(/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty
+Package: nameref 2012/10/27 v2.43 Cross-referencing by name of section
+
+(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty
+Package: gettitlestring 2010/12/03 v1.4 Cleanup title references (HO)
+)
+\c at section@level=\count130
+)
+LaTeX Info: Redefining \ref on input line 117.
+LaTeX Info: Redefining \pageref on input line 117.
+LaTeX Info: Redefining \nameref on input line 117.
+
+(./ruffus.out) (./ruffus.out)
+\@outlinefile=\write6
+\openout6 = `ruffus.out'.
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 120--120
+
+ []
+
+LaTeX Font Info: Try loading font information for T1+phv on input line 120.
+(/usr/share/texlive/texmf-dist/tex/latex/psnfss/t1phv.fd
+File: t1phv.fd 2001/06/04 scalable font definitions for T1/phv.
+)
+LaTeX Font Info: Font shape `T1/phv/bx/n' in size <24.88> not available
+(Font) Font shape `T1/phv/b/n' tried instead on input line 120.
+LaTeX Font Info: Font shape `T1/phv/m/it' in size <17.28> not available
+(Font) Font shape `T1/phv/m/sl' tried instead on input line 120.
+LaTeX Font Info: Font shape `T1/phv/bx/it' in size <17.28> not available
+(Font) Font shape `T1/phv/b/it' tried instead on input line 120.
+LaTeX Font Info: Font shape `T1/phv/b/it' in size <17.28> not available
+(Font) Font shape `T1/phv/b/sl' tried instead on input line 120.
+LaTeX Font Info: Font shape `T1/phv/bx/n' in size <17.28> not available
+(Font) Font shape `T1/phv/b/n' tried instead on input line 120.
+<<ot1.cmap>><<oml.cmap>><<oms.cmap>><<omx.cmap>> [1
+
+{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] [2
+
+] (./ruffus.toc
+LaTeX Font Info: Font shape `T1/ptm/bx/n' in size <10> not available
+(Font) Font shape `T1/ptm/b/n' tried instead on input line 2.
+LaTeX Font Info: Try loading font information for T1+pcr on input line 24.
+ (/usr/share/texlive/texmf-dist/tex/latex/psnfss/t1pcr.fd
+File: t1pcr.fd 2001/06/04 font definitions for T1/pcr.
+)
+LaTeX Font Info: Font shape `T1/phv/bx/n' in size <10> not available
+(Font) Font shape `T1/phv/b/n' tried instead on input line 242.
+pdfTeX warning (ext4): destination with the same identifier (name{page.i}) has
+been already used, duplicate ignored
+<to be read again>
+ \relax
+l.242 ...elines and Exceptions}{112}{section.1.40}
+ [1
+
+])
+\tf at toc=\write7
+\openout7 = `ruffus.toc'.
+
+pdfTeX warning (ext4): destination with the same identifier (name{page.ii}) has
+ been already used, duplicate ignored
+<to be read again>
+ \relax
+l.120 \tableofcontents
+ [2]
+Chapter 1.
+LaTeX Font Info: Font shape `T1/phv/bx/n' in size <14.4> not available
+(Font) Font shape `T1/phv/b/n' tried instead on input line 125.
+LaTeX Font Info: Font shape `T1/ptm/bx/n' in size <14.4> not available
+(Font) Font shape `T1/ptm/b/n' tried instead on input line 125.
+LaTeX Font Info: Font shape `T1/ptm/bx/n' in size <24.88> not available
+(Font) Font shape `T1/ptm/b/n' tried instead on input line 125.
+LaTeX Font Info: Font shape `T1/phv/bx/n' in size <12> not available
+(Font) Font shape `T1/phv/b/n' tried instead on input line 133.
+LaTeX Font Info: Try loading font information for TS1+ptm on input line 167.
+
+(/usr/share/texlive/texmf-dist/tex/latex/psnfss/ts1ptm.fd
+File: ts1ptm.fd 2001/06/04 font definitions for TS1/ptm.
+) [1
+
+
+] [2]
+LaTeX Font Info: Font shape `T1/phv/bx/it' in size <14.4> not available
+(Font) Font shape `T1/phv/b/it' tried instead on input line 374.
+LaTeX Font Info: Font shape `T1/phv/b/it' in size <14.4> not available
+(Font) Font shape `T1/phv/b/sl' tried instead on input line 374.
+
+<theoretical_pipeline_schematic.png, id=488, 918.43124pt x 106.89937pt>
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+391.
+(pdftex.def) Requested size: 918.429pt x 106.8991pt.
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+391.
+(pdftex.def) Requested size: 419.75494pt x 48.85785pt.
+LaTeX Font Info: Font shape `T1/phv/bx/it' in size <10> not available
+(Font) Font shape `T1/phv/b/it' tried instead on input line 403.
+LaTeX Font Info: Font shape `T1/phv/b/it' in size <10> not available
+(Font) Font shape `T1/phv/b/sl' tried instead on input line 403.
+ [3 <./theoretical_pipeline_schematic.png>]
+LaTeX Font Info: Font shape `T1/phv/bx/it' in size <12> not available
+(Font) Font shape `T1/phv/b/it' tried instead on input line 410.
+LaTeX Font Info: Font shape `T1/phv/b/it' in size <12> not available
+(Font) Font shape `T1/phv/b/sl' tried instead on input line 410.
+LaTeX Font Info: Font shape `T1/pcr/bx/n' in size <9> not available
+(Font) Font shape `T1/pcr/b/n' tried instead on input line 417.
+ <tutorial_step1_decorator_syntax.png, id=503, 310.15875pt x 83.5622pt>
+File: tutorial_step1_decorator_syntax.png Graphic file (type png)
+
+<use tutorial_step1_decorator_syntax.png>
+Package pdftex.def Info: tutorial_step1_decorator_syntax.png used on input line
+ 457.
+(pdftex.def) Requested size: 310.15799pt x 83.56198pt.
+File: tutorial_step1_decorator_syntax.png Graphic file (type png)
+
+<use tutorial_step1_decorator_syntax.png>
+Package pdftex.def Info: tutorial_step1_decorator_syntax.png used on input line
+ 457.
+(pdftex.def) Requested size: 310.15799pt x 83.56198pt.
+ [4 <./tutorial_step1_decorator_syntax.png>] <tutorial_ruffus_files.jpg, id=514
+, 1204.5pt x 426.59375pt>
+File: tutorial_ruffus_files.jpg Graphic file (type jpg)
+
+<use tutorial_ruffus_files.jpg>
+Package pdftex.def Info: tutorial_ruffus_files.jpg used on input line 492.
+(pdftex.def) Requested size: 1204.49707pt x 426.5927pt.
+File: tutorial_ruffus_files.jpg Graphic file (type jpg)
+ <use tutorial_ruffus_files.jpg>
+Package pdftex.def Info: tutorial_ruffus_files.jpg used on input line 492.
+(pdftex.def) Requested size: 375.75494pt x 133.08238pt.
+LaTeX Font Info: Font shape `T1/pcr/bx/n' in size <10> not available
+(Font) Font shape `T1/pcr/b/n' tried instead on input line 495.
+ [5 <./tutorial_ruffus_files.jpg>]
+LaTeX Font Info: Font shape `T1/pcr/m/it' in size <9> not available
+(Font) Font shape `T1/pcr/m/sl' tried instead on input line 563.
+ [6] [7] [8]
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+ <use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+827.
+(pdftex.def) Requested size: 918.429pt x 106.8991pt.
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+827.
+(pdftex.def) Requested size: 419.75494pt x 48.85785pt.
+
+<transform_1_to_1_example.png, id=570, 193.50293pt x 134.06085pt>
+File: transform_1_to_1_example.png Graphic file (type png)
+
+<use transform_1_to_1_example.png>
+Package pdftex.def Info: transform_1_to_1_example.png used on input line 875.
+(pdftex.def) Requested size: 193.50244pt x 134.06052pt.
+File: transform_1_to_1_example.png Graphic file (type png)
+ <use transform_1_to_1_example.png>
+Package pdftex.def Info: transform_1_to_1_example.png used on input line 875.
+(pdftex.def) Requested size: 193.50244pt x 134.06052pt.
+ [9 <./transform_1_to_1_example.png>] [10]
+LaTeX Font Info: Font shape `T1/pcr/bx/n' in size <14.4> not available
+(Font) Font shape `T1/pcr/b/n' tried instead on input line 1032.
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+ <use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+1061.
+(pdftex.def) Requested size: 918.429pt x 106.8991pt.
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+1061.
+(pdftex.def) Requested size: 419.75494pt x 48.85785pt.
+ [11]
+Overfull \hbox (121.9102pt too wide) in paragraph at lines 1159--1160
+[]
+ []
+
+[12] [13] [14] [15] [16] [17] [18]
+LaTeX Font Info: Font shape `T1/pcr/bx/n' in size <12> not available
+(Font) Font shape `T1/pcr/b/n' tried instead on input line 1691.
+
+Underfull \hbox (badness 10000) in paragraph at lines 1697--1698
+[]\T1/ptm/m/n/10 The \T1/pcr/m/n/10 verbose_abbreviated_path \T1/ptm/m/n/10 pa-
+ram-e-ter spec-i-fies that [][]\T1/ptm/m/it/10 pipeline_printout(...)[][] \T1/p
+tm/m/n/10 and
+ []
+
+[19]
+Overfull \hbox (22.18498pt too wide) in paragraph at lines 1722--1723
+[]\T1/ptm/m/n/10 Given \T1/pcr/m/n/10 ["aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed
+/eeee/ffff/gggg.txt"]
+ []
+
+[20] [21] [22] [23] [24]
+Underfull \hbox (badness 10000) in paragraph at lines 2124--2124
+ |[][]\T1/phv/b/n/14.4 Chapter 7: Dis-play-ing the pipeline vi-su-ally with
+ []
+
+<simple_tutorial_stage5_flowchart.png, id=760, 843.15pt x 456.70625pt>
+File: simple_tutorial_stage5_flowchart.png Graphic file (type png)
+
+<use simple_tutorial_stage5_flowchart.png>
+Package pdftex.def Info: simple_tutorial_stage5_flowchart.png used on input lin
+e 2169.
+(pdftex.def) Requested size: 843.14793pt x 456.70512pt.
+File: simple_tutorial_stage5_flowchart.png Graphic file (type png)
+
+<use simple_tutorial_stage5_flowchart.png>
+Package pdftex.def Info: simple_tutorial_stage5_flowchart.png used on input lin
+e 2169.
+(pdftex.def) Requested size: 375.75494pt x 203.53677pt.
+ [25 <./simple_tutorial_stage5_flowchart.png>] <flowchart_colour_schemes.png, i
+d=769, 1324.95pt x 473.77pt>
+File: flowchart_colour_schemes.png Graphic file (type png)
+
+<use flowchart_colour_schemes.png>
+Package pdftex.def Info: flowchart_colour_schemes.png used on input line 2183.
+(pdftex.def) Requested size: 1324.94678pt x 473.76884pt.
+File: flowchart_colour_schemes.png Graphic file (type png)
+ <use flowchart_colour_schemes.png>
+Package pdftex.def Info: flowchart_colour_schemes.png used on input line 2183.
+(pdftex.def) Requested size: 375.75494pt x 134.36076pt.
+
+<simple_tutorial_complex_flowchart.png, id=772, 558.085pt x 941.5175pt>
+File: simple_tutorial_complex_flowchart.png Graphic file (type png)
+
+<use simple_tutorial_complex_flowchart.png>
+Package pdftex.def Info: simple_tutorial_complex_flowchart.png used on input li
+ne 2236.
+(pdftex.def) Requested size: 558.08365pt x 941.51521pt.
+File: simple_tutorial_complex_flowchart.png Graphic file (type png)
+
+<use simple_tutorial_complex_flowchart.png>
+Package pdftex.def Info: simple_tutorial_complex_flowchart.png used on input li
+ne 2236.
+(pdftex.def) Requested size: 375.75494pt x 633.93097pt.
+ [26 <./flowchart_colour_schemes.png>]
+<simple_tutorial_complex_flowchart_error.png, id=780, 558.085pt x 941.5175pt>
+File: simple_tutorial_complex_flowchart_error.png Graphic file (type png)
+
+<use simple_tutorial_complex_flowchart_error.png>
+Package pdftex.def Info: simple_tutorial_complex_flowchart_error.png used on in
+put line 2248.
+(pdftex.def) Requested size: 558.08365pt x 941.51521pt.
+File: simple_tutorial_complex_flowchart_error.png Graphic file (type png)
+
+<use simple_tutorial_complex_flowchart_error.png>
+Package pdftex.def Info: simple_tutorial_complex_flowchart_error.png used on in
+put line 2248.
+(pdftex.def) Requested size: 375.75494pt x 633.93097pt.
+ [27 <./simple_tutorial_complex_flowchart.png>]
+<history_html_flowchart2.png, id=789, 1126.2075pt x 1058.95625pt>
+File: history_html_flowchart2.png Graphic file (type png)
+
+<use history_html_flowchart2.png>
+Package pdftex.def Info: history_html_flowchart2.png used on input line 2263.
+(pdftex.def) Requested size: 1126.20477pt x 1058.95367pt.
+File: history_html_flowchart2.png Graphic file (type png)
+ <use history_html_flowchart2.png>
+Package pdftex.def Info: history_html_flowchart2.png used on input line 2263.
+(pdftex.def) Requested size: 375.75494pt x 353.3185pt.
+ [28 <./simple_tutorial_complex_flowchart_error.png> <./history_html_flowchart2
+.png>]
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+2346.
+(pdftex.def) Requested size: 918.429pt x 106.8991pt.
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+2346.
+(pdftex.def) Requested size: 419.75494pt x 48.85785pt.
+ [29]
+Underfull \hbox (badness 10000) in paragraph at lines 2430--2432
+[]\T1/ptm/m/n/10 Each el-e-ment pro-vides an in-put for the task. So if the pre
+-vi-ous task
+ []
+
+[30]
+Underfull \hbox (badness 7326) in paragraph at lines 2462--2463
+[]\T1/ptm/m/n/10 When the pipeline is run, this re-sults in the fol-low-ing equ
+iv-a-lent call to
+ []
+
+[31]
+Underfull \hbox (badness 10000) in paragraph at lines 2522--2523
+[]\T1/ptm/m/n/10 Information may have to be de-coded from data file names, e.g.
+
+ []
+
+
+Underfull \hbox (badness 5475) in paragraph at lines 2532--2533
+[]\T1/ptm/m/n/10 Allows easy ma-nip-u-la-tion of path sub-com-po-nents in the s
+tyle of [][]os.path.split()[][], and
+ []
+
+[32] [33] [34]
+<simple_tutorial_zoo_animals_formatter_example.jpg, id=865, 441.0879pt x 100.45
+53pt>
+File: simple_tutorial_zoo_animals_formatter_example.jpg Graphic file (type jpg)
+
+ <use simple_tutorial_zoo_animals_formatter_example.jpg>
+Package pdftex.def Info: simple_tutorial_zoo_animals_formatter_example.jpg used
+ on input line 2814.
+(pdftex.def) Requested size: 441.08682pt x 100.45505pt.
+File: simple_tutorial_zoo_animals_formatter_example.jpg Graphic file (type jpg)
+
+
+<use simple_tutorial_zoo_animals_formatter_example.jpg>
+Package pdftex.def Info: simple_tutorial_zoo_animals_formatter_example.jpg used
+ on input line 2814.
+(pdftex.def) Requested size: 375.75494pt x 85.58054pt.
+ [35 <./simple_tutorial_zoo_animals_formatter_example.jpg>] [36] [37]
+File: simple_tutorial_zoo_animals_formatter_example.jpg Graphic file (type jpg)
+
+
+<use simple_tutorial_zoo_animals_formatter_example.jpg>
+Package pdftex.def Info: simple_tutorial_zoo_animals_formatter_example.jpg used
+ on input line 2999.
+(pdftex.def) Requested size: 441.08682pt x 100.45505pt.
+File: simple_tutorial_zoo_animals_formatter_example.jpg Graphic file (type jpg)
+
+
+<use simple_tutorial_zoo_animals_formatter_example.jpg>
+Package pdftex.def Info: simple_tutorial_zoo_animals_formatter_example.jpg used
+ on input line 2999.
+(pdftex.def) Requested size: 375.75494pt x 85.58054pt.
+ [38]
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+3123.
+(pdftex.def) Requested size: 918.429pt x 106.8991pt.
+File: theoretical_pipeline_schematic.png Graphic file (type png)
+
+<use theoretical_pipeline_schematic.png>
+Package pdftex.def Info: theoretical_pipeline_schematic.png used on input line
+3123.
+(pdftex.def) Requested size: 419.75494pt x 48.85785pt.
+ [39] [40]
+Underfull \hbox (badness 10000) in paragraph at lines 3268--3269
+[]\T1/ptm/m/n/10 The name of the check-point file is the value of the en-vi-ron
+-ment vari-able
+ []
+
+[41] [42] [43] [44] <bestiary_transform.png, id=969, 265.05023pt x 94.6737pt>
+File: bestiary_transform.png Graphic file (type png)
+
+<use bestiary_transform.png>
+Package pdftex.def Info: bestiary_transform.png used on input line 3550.
+(pdftex.def) Requested size: 265.04958pt x 94.67346pt.
+File: bestiary_transform.png Graphic file (type png)
+ <use bestiary_transform.png>
+Package pdftex.def Info: bestiary_transform.png used on input line 3550.
+(pdftex.def) Requested size: 265.04958pt x 94.67346pt.
+
+<bestiary_decorators.png, id=971, 491.25533pt x 106.41757pt>
+File: bestiary_decorators.png Graphic file (type png)
+
+<use bestiary_decorators.png>
+Package pdftex.def Info: bestiary_decorators.png used on input line 3564.
+(pdftex.def) Requested size: 491.25412pt x 106.4173pt.
+File: bestiary_decorators.png Graphic file (type png)
+ <use bestiary_decorators.png>
+Package pdftex.def Info: bestiary_decorators.png used on input line 3564.
+(pdftex.def) Requested size: 419.75494pt x 90.92949pt.
+ [45 <./bestiary_transform.png> <./bestiary_decorators.png>]
+<bestiary_combinatorics.png, id=1006, 491.61667pt x 98.10652pt>
+File: bestiary_combinatorics.png Graphic file (type png)
+
+<use bestiary_combinatorics.png>
+Package pdftex.def Info: bestiary_combinatorics.png used on input line 3660.
+(pdftex.def) Requested size: 491.61546pt x 98.10626pt.
+File: bestiary_combinatorics.png Graphic file (type png)
+ <use bestiary_combinatorics.png>
+Package pdftex.def Info: bestiary_combinatorics.png used on input line 3660.
+(pdftex.def) Requested size: 419.75494pt x 83.76816pt.
+ [46 <./bestiary_combinatorics.png>] [47]
+<manual_split_merge_example.jpg, id=1037, 630.10406pt x 495.35063pt>
+File: manual_split_merge_example.jpg Graphic file (type jpg)
+
+<use manual_split_merge_example.jpg>
+Package pdftex.def Info: manual_split_merge_example.jpg used on input line 3785
+.
+(pdftex.def) Requested size: 630.10252pt x 495.34943pt.
+File: manual_split_merge_example.jpg Graphic file (type jpg)
+ <use manual_split_merge_example.jpg>
+Package pdftex.def Info: manual_split_merge_example.jpg used on input line 3785
+.
+(pdftex.def) Requested size: 419.75494pt x 329.98604pt.
+
+[48 <./manual_split_merge_example.jpg>]
+Underfull \hbox (badness 10000) in paragraph at lines 3847--3850
+
+ []
+
+[49] [50]
+File: manual_split_merge_example.jpg Graphic file (type jpg)
+ <use manual_split_merge_example.jpg>
+Package pdftex.def Info: manual_split_merge_example.jpg used on input line 4030
+.
+(pdftex.def) Requested size: 630.10252pt x 495.34943pt.
+File: manual_split_merge_example.jpg Graphic file (type jpg)
+
+<use manual_split_merge_example.jpg>
+Package pdftex.def Info: manual_split_merge_example.jpg used on input line 4030
+.
+(pdftex.def) Requested size: 419.75494pt x 329.98604pt.
+ [51] [52] [53] [54] [55] [56] [57]
+[58] [59]
+Underfull \vbox (badness 3364) detected at line 4791
+ []
+
+[60] [61]
+Underfull \hbox (badness 10000) in paragraph at lines 4872--4874
+[]\T1/ptm/m/n/10 For ex-am-ple, the frag-ment files may not be sent to
+ []
+
+[62]
+Underfull \hbox (badness 10000) in paragraph at lines 4974--4976
+[]\T1/ptm/m/n/10 For ex-am-ple, if the \T1/ptm/m/it/10 in-puts \T1/ptm/m/n/10 a
+re \T1/ptm/b/n/10 [A1,A2],[B1,B2],[C1,C2] vs [P1,P2],[Q1,Q2],[R1,R2] vs
+ []
+
+[63] [64] [65] [66]
+Overfull \hbox (3.81097pt too wide) in paragraph at lines 5287--5287
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 5287--5287
+
+ []
+
+[67] [68]
+Underfull \hbox (badness 10000) in paragraph at lines 5454--5455
+[]\T1/ptm/m/n/10 Evaluated each time \T1/pcr/m/n/10 pipeline_run\T1/ptm/m/n/10
+, \T1/pcr/m/n/10 pipeline_printout \T1/ptm/m/n/10 or
+ []
+
+[69]
+Overfull \vbox (1.23709pt too high) detected at line 5536
+ []
+
+[70] [71] [72] [73] [74] [75]
+! FancyVerb Error:
+ Empty verbatim environment
+.
+\FV at Error ... {FancyVerb Error:
+\space \space #1
+}
+
+l.6001 \end{Verbatim}
+
+?
+[76] [77] [78] [79]
+Underfull \hbox (badness 10000) in paragraph at lines 6290--6290
+ |[][]\T1/phv/b/n/14.4 Appendix 1: Flow Chart Colours with
+ []
+
+[80]
+File: flowchart_colour_schemes.png Graphic file (type png)
+ <use flowchart_colour_schemes.png>
+Package pdftex.def Info: flowchart_colour_schemes.png used on input line 6354.
+(pdftex.def) Requested size: 1324.94678pt x 473.76884pt.
+File: flowchart_colour_schemes.png Graphic file (type png)
+ <use flowchart_colour_schemes.png>
+Package pdftex.def Info: flowchart_colour_schemes.png used on input line 6354.
+(pdftex.def) Requested size: 469.755pt x 167.96904pt.
+
+<manual_dependencies_flowchart_intro.png, id=1444, 552.0625pt x 445.665pt>
+File: manual_dependencies_flowchart_intro.png Graphic file (type png)
+
+<use manual_dependencies_flowchart_intro.png>
+Package pdftex.def Info: manual_dependencies_flowchart_intro.png used on input
+line 6390.
+(pdftex.def) Requested size: 552.06114pt x 445.66391pt.
+File: manual_dependencies_flowchart_intro.png Graphic file (type png)
+
+<use manual_dependencies_flowchart_intro.png>
+Package pdftex.def Info: manual_dependencies_flowchart_intro.png used on input
+line 6390.
+(pdftex.def) Requested size: 419.75494pt x 338.87212pt.
+ [81 <./manual_dependencies_flowchart_intro.png>] [82]
+Overfull \vbox (1.25279pt too high) detected at line 6546
+ []
+
+[83] [84]
+Overfull \hbox (4.21735pt too wide) in paragraph at lines 6679--6760
+[][]
+ []
+
+[85]
+Overfull \vbox (42.80713pt too high) has occurred while \output is active []
+
+
+[86]
+Underfull \hbox (badness 10000) in paragraph at lines 6767--6777
+
+ []
+
+[87] [88] [89]
+Underfull \hbox (badness 10000) in paragraph at lines 7025--7035
+
+ []
+
+[90]
+Underfull \hbox (badness 10000) in paragraph at lines 7142--7144
+[]\T1/pcr/m/n/10 *.chunks \T1/ptm/m/n/10 are gen-er-ated by the task func-tion
+\T1/pcr/m/n/10 split_up_problem() \T1/ptm/m/n/10 and
+ []
+
+[91] [92] [93] [94] [95] [96] [97] [98] [99] [100] [101] [102] [103]
+Underfull \vbox (badness 3623) detected at line 8043
+ []
+
+[104] [105]
+<simple_tutorial_stage5_before.png, id=1698, 488.82625pt x 396.48125pt>
+File: simple_tutorial_stage5_before.png Graphic file (type png)
+
+<use simple_tutorial_stage5_before.png>
+Package pdftex.def Info: simple_tutorial_stage5_before.png used on input line 8
+145.
+(pdftex.def) Requested size: 488.82504pt x 396.48027pt.
+File: simple_tutorial_stage5_before.png Graphic file (type png)
+
+<use simple_tutorial_stage5_before.png>
+Package pdftex.def Info: simple_tutorial_stage5_before.png used on input line 8
+145.
+(pdftex.def) Requested size: 199.38615pt x 161.72348pt.
+
+<simple_tutorial_stage5_after.png, id=1699, 488.82625pt x 396.48125pt>
+File: simple_tutorial_stage5_after.png Graphic file (type png)
+
+<use simple_tutorial_stage5_after.png>
+Package pdftex.def Info: simple_tutorial_stage5_after.png used on input line 81
+49.
+(pdftex.def) Requested size: 488.82504pt x 396.48027pt.
+File: simple_tutorial_stage5_after.png Graphic file (type png)
+ <use simple_tutorial_stage5_after.png>
+Package pdftex.def Info: simple_tutorial_stage5_after.png used on input line 81
+49.
+(pdftex.def) Requested size: 199.38615pt x 161.72348pt.
+
+Overfull \hbox (4.21735pt too wide) in paragraph at lines 8142--8154
+[][]
+ []
+
+<tutorial_key.png, id=1700, 2726.185pt x 234.8775pt>
+File: tutorial_key.png Graphic file (type png)
+ <use tutorial_key.png>
+Package pdftex.def Info: tutorial_key.png used on input line 8159.
+(pdftex.def) Requested size: 2726.17838pt x 234.87692pt.
+File: tutorial_key.png Graphic file (type png)
+
+<use tutorial_key.png>
+Package pdftex.def Info: tutorial_key.png used on input line 8159.
+(pdftex.def) Requested size: 398.76591pt x 34.35562pt.
+ [106 <./simple_tutorial_stage5_before.png> <./simple_tutorial_stage5_after.png
+> <./tutorial_key.png (PNG copy)>] [107] [108] [109]
+[110] [111] [112] [113] [114]
+Underfull \vbox (badness 5726) detected at line 8810
+ []
+
+[115] [116] [117]
+Underfull \vbox (badness 3623) detected at line 8968
+ []
+
+[118] [119] [120] [121] [122] [123] [124] [125] [126] [127] [128] [129]
+[130] [131]
+Overfull \vbox (0.94992pt too high) detected at line 10115
+ []
+
+[132]
+Underfull \vbox (badness 10000) detected at line 10115
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 10115
+ []
+
+[133]
+Underfull \vbox (badness 10000) detected at line 10115
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 10115
+ []
+
+[134]
+Underfull \vbox (badness 10000) detected at line 10115
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 10115
+ []
+
+[135] [136] [137]
+Underfull \vbox (badness 10000) detected at line 10436
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 10436
+ []
+
+[138]
+Underfull \vbox (badness 10000) detected at line 10436
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 10436
+ []
+
+[139]
+Underfull \vbox (badness 10000) detected at line 10436
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 10436
+ []
+
+[140] [141] [142
+
+]
+Chapter 2.
+
+Underfull \hbox (badness 10000) in paragraph at lines 10469--10470
+[]|[][]\T1/ptm/m/it/10 @originate[][] \T1/ptm/m/n/10 ( \T1/pcr/m/n/10 output_fi
+les\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10474--10475
+[]|[][]\T1/ptm/m/it/10 @split[][] \T1/ptm/m/n/10 (
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10474--10475
+\T1/pcr/m/n/10 tasks_or_file_names\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10474--10475
+\T1/pcr/m/n/10 output_files\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10487--10488
+[]|[][]\T1/ptm/m/it/10 @merge[][]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10487--10488
+\T1/ptm/m/n/10 (\T1/pcr/m/n/10 tasks_or_file_names\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10487--10488
+\T1/pcr/m/n/10 output\T1/ptm/m/n/10 ,
+ []
+
+
+Overfull \hbox (37.08704pt too wide) in paragraph at lines 10493--10494
+\T1/pcr/m/n/10 signal_task_completion_function
+ []
+
+
+Overfull \hbox (14.58386pt too wide) in paragraph at lines 10459--10498
+[]
+ []
+
+[143] [144]
+Underfull \hbox (badness 5548) in paragraph at lines 10558--10559
+\T1/ptm/b/n/10 pipeline_run \T1/ptm/m/n/10 ( [][]\T1/ptm/m/it/10 tar-get_tasks[
+][] \T1/ptm/m/n/10 = [], [][]\T1/ptm/m/it/10 forced-torun_tasks[][] \T1/ptm/m/n
+/10 = [], [][]\T1/ptm/m/it/10 mul-ti-pro-cess[][] \T1/ptm/m/n/10 = 1, [][]\T1/p
+tm/m/it/10 log-ger[][] \T1/ptm/m/n/10 = stderr_logger,
+ []
+
+LaTeX Font Info: Font shape `T1/ptm/bx/it' in size <10> not available
+(Font) Font shape `T1/ptm/b/it' tried instead on input line 10584.
+
+
+Overfull \hbox (143.72897pt too wide) in paragraph at lines 10632--10636
+[] []
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10632--10636
+
+ []
+
+[145] [146]
+Underfull \hbox (badness 10000) in paragraph at lines 10788--10789
+[]\T1/ptm/m/n/10 level < 0: In-put / Out-put pa-ram-e-ters are trun-cated to \T
+1/pcr/m/n/10 MMM \T1/ptm/m/n/10 let-ters where
+ []
+
+[147]
+Overfull \hbox (143.72897pt too wide) in paragraph at lines 10903--10907
+[] []
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10903--10907
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 10973--10974
+[]\T1/ptm/m/n/10 level < 0: In-put / Out-put pa-ram-e-ters are trun-cated to \T
+1/pcr/m/n/10 MMM \T1/ptm/m/n/10 let-ters where
+ []
+
+[148]
+Overfull \hbox (143.72897pt too wide) in paragraph at lines 11089--11093
+[] []
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 11089--11093
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 11149--11150
+[]
+ []
+
+
+Overfull \hbox (10.68608pt too wide) in paragraph at lines 11149--11150
+\T1/pcr/m/n/10 'colour_scheme_index'
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 11163--11164
+[]\T1/pcr/m/n/10 'Explicitly
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 11172--11173
+[]\T1/pcr/m/n/10 'Up-to-date Final
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 11175--11176
+[]\T1/pcr/m/n/10 'Up-to-date task
+ []
+
+
+Overfull \hbox (16.88666pt too wide) in paragraph at lines 11140--11250
+[][]
+ []
+
+[149] [150] [151] [152]
+Underfull \hbox (badness 10000) in paragraph at lines 11488--11490
+[]\T1/ptm/m/n/10 Examples for SGE in-clude project name (\T1/pcr/m/n/10 -P proj
+ect_name\T1/ptm/m/n/10 ), par-al-lel en-vi-ron-
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 11488--11490
+\T1/ptm/m/n/10 ment (\T1/pcr/m/n/10 -pe parallel_environ\T1/ptm/m/n/10 ), ac-co
+unt (\T1/pcr/m/n/10 -A account_string\T1/ptm/m/n/10 ), re-source (\T1/pcr/m/n/1
+0 -l
+ []
+
+[153] [154] [155] [156]
+<front_page_flowchart.png, id=2330, 450.68375pt x 387.4475pt>
+File: front_page_flowchart.png Graphic file (type png)
+
+<use front_page_flowchart.png>
+Package pdftex.def Info: front_page_flowchart.png used on input line 11918.
+(pdftex.def) Requested size: 450.68263pt x 387.44655pt.
+File: front_page_flowchart.png Graphic file (type png)
+ <use front_page_flowchart.png>
+Package pdftex.def Info: front_page_flowchart.png used on input line 11918.
+(pdftex.def) Requested size: 397.75494pt x 341.95988pt.
+ [157] [158 <./front_page_flowchart.png>] [159] [160]
+<history_html_flowchart.png, id=2388, 1126.2075pt x 1058.95625pt>
+File: history_html_flowchart.png Graphic file (type png)
+
+<use history_html_flowchart.png>
+Package pdftex.def Info: history_html_flowchart.png used on input line 12176.
+(pdftex.def) Requested size: 1126.20477pt x 1058.95367pt.
+File: history_html_flowchart.png Graphic file (type png)
+ <use history_html_flowchart.png>
+Package pdftex.def Info: history_html_flowchart.png used on input line 12176.
+(pdftex.def) Requested size: 375.75494pt x 353.3185pt.
+ [161 <./history_html_flowchart.png>]
+Underfull \hbox (badness 10000) in paragraph at lines 12260--12261
+[]\T1/ptm/m/n/10 A to-tal of MMM char-ac-ters, MMM is spec-i-fied by set-ting
+ []
+
+
+Overfull \hbox (22.18498pt too wide) in paragraph at lines 12270--12271
+[]\T1/ptm/m/n/10 Given \T1/pcr/m/n/10 ["aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed
+/eeee/ffff/gggg.txt"]
+ []
+
+[162] [163]
+Underfull \hbox (badness 10000) in paragraph at lines 12394--12395
+[]\T1/ptm/m/n/10 defaults to check-ing file times-tamps stored in the cur-rent
+di-rec-tory
+ []
+
+[164]
+Overfull \hbox (141.51373pt too wide) in paragraph at lines 12549--12549
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 12549--12549
+
+ []
+
+[165]
+Underfull \hbox (badness 10000) in paragraph at lines 12582--12584
+[]\T1/ptm/m/n/10 The ex-pres-sions in-side @ac-tive_if are eval-u-ated each tim
+e \T1/pcr/m/n/10 pipeline_run\T1/ptm/m/n/10 ,
+ []
+
+[166]
+
+! LaTeX Error: Too deeply nested.
+
+See the LaTeX manual or LaTeX Companion for explanation.
+Type H <return> for immediate help.
+ ...
+
+l.12697 \begin{Verbatim}[commandchars=\\\{\}]
+
+?
+[167] [168] [169] [170] [171] [172] [173] [174]
+Overfull \hbox (145.57115pt too wide) in paragraph at lines 13363--13363
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 13363--13363
+
+ []
+
+[175]
+Underfull \hbox (badness 10000) in paragraph at lines 13488--13493
+[]\T1/ptm/m/n/10 Full ver-sion use libpython-grid? * Chris-tian Wid-mer <[][]ck
+-wid-
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 13488--13493
+\T1/ptm/m/n/10 mer at gmail.com[][]> * Cheng Soon Ong <[][]cheng-soon.ong at unimelb.
+edu.au[][]> *
+ []
+
+[176] [177] [178]
+Underfull \hbox (badness 10000) in paragraph at lines 13721--13723
+[]\T1/ptm/m/n/10 An \T1/pcr/m/n/10 --remove_empty_parent_directories \T1/ptm/m/
+n/10 op-tion would be sup-ported by
+ []
+
+[179] [180]
+Overfull \hbox (9.93314pt too wide) in paragraph at lines 13883--13885
+[]\T1/ptm/m/n/10 When a sig-nal hap-pens, we need to im-me-di-ately stop \T1/pc
+r/m/n/10 feed_job_params_to_process_pool()
+ []
+
+[181]
+Overfull \hbox (166.61172pt too wide) in paragraph at lines 13994--13996
+[]
+ []
+
+[182]
+Overfull \hbox (19.77505pt too wide) in paragraph at lines 14042--14043
+[]\T1/ptm/m/n/10 E.g. \T1/pcr/m/n/10 name = '/a/b/c/sample1.bam'\T1/ptm/m/n/10
+, \T1/pcr/m/n/10 formatter=r"(.*)(?P<id>\d+)\.(.+)")
+ []
+
+[183]
+Overfull \hbox (100.60896pt too wide) in paragraph at lines 14154--14154
+ |[][]\T1/pcr/b/n/12 @permutations(...), @combinations(...), @combinations_with
+_replacement(...)|
+ []
+
+[184]
+Underfull \hbox (badness 10000) in paragraph at lines 14164--14165
+[]\T1/ptm/m/n/10 use \T1/pcr/m/n/10 combinatorics_type \T1/ptm/m/n/10 to dis-pa
+tch to \T1/pcr/m/n/10 combinatorics.permutations\T1/ptm/m/n/10 ,
+ []
+
+
+Overfull \hbox (53.18501pt too wide) in paragraph at lines 14164--14165
+\T1/pcr/m/n/10 combinatorics.combinations \T1/ptm/m/n/10 and \T1/pcr/m/n/10 com
+binatorics.combinations_with_replacement
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 14167--14168
+[]\T1/ptm/m/n/10 use \T1/pcr/m/n/10 list_input_param_to_file_name_list \T1/ptm/
+m/n/10 from
+ []
+
+
+Overfull \hbox (44.03113pt too wide) in paragraph at lines 14191--14192
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 14236--14237
+[]\T1/ptm/m/n/10 Should be fast: a sin-gle db con-nec-tion is cre-ated and used
+ in-side \T1/pcr/m/n/10 pipeline_run\T1/ptm/m/n/10 ,
+ []
+
+[185] [186] [187] <pretty_flowchart.png, id=2676, 1120.185pt x 180.675pt>
+File: pretty_flowchart.png Graphic file (type png)
+
+<use pretty_flowchart.png>
+Package pdftex.def Info: pretty_flowchart.png used on input line 14394.
+(pdftex.def) Requested size: 1120.18227pt x 180.67455pt.
+File: pretty_flowchart.png Graphic file (type png)
+ <use pretty_flowchart.png>
+Package pdftex.def Info: pretty_flowchart.png used on input line 14394.
+(pdftex.def) Requested size: 375.75494pt x 60.60437pt.
+ [188 <./pretty_flowchart.png>]
+LaTeX Font Info: Font shape `T1/phv/bx/n' in size <9> not available
+(Font) Font shape `T1/phv/b/n' tried instead on input line 14481.
+ [189] [190] [191] [192] [193] [194] [195] [196] [197] [198] [199]
+[200] <logo.jpg, id=2826, 155.85182pt x 47.13261pt>
+File: logo.jpg Graphic file (type jpg)
+ <use logo.jpg>
+Package pdftex.def Info: logo.jpg used on input line 15378.
+(pdftex.def) Requested size: 155.85143pt x 47.13249pt.
+File: logo.jpg Graphic file (type jpg)
+
+<use logo.jpg>
+Package pdftex.def Info: logo.jpg used on input line 15378.
+(pdftex.def) Requested size: 155.85143pt x 47.13249pt.
+ <gallery_rna_seq.png, id=2828, 435.6275pt x 722.7pt>
+File: gallery_rna_seq.png Graphic file (type png)
+
+<use gallery_rna_seq.png>
+Package pdftex.def Info: gallery_rna_seq.png used on input line 15393.
+(pdftex.def) Requested size: 435.62643pt x 722.69823pt.
+File: gallery_rna_seq.png Graphic file (type png)
+ <use gallery_rna_seq.png>
+Package pdftex.def Info: gallery_rna_seq.png used on input line 15393.
+(pdftex.def) Requested size: 419.75494pt x 696.36455pt.
+
+Underfull \hbox (badness 10000) in paragraph at lines 15392--15393
+[]\T1/ptm/m/n/10 Mapping tran-scripts onto genomes us-ing high-throughput se-qu
+enc-ing tech-nolo-gies (\T1/pcr/m/n/10 svg\T1/ptm/m/n/10 ).
+ []
+
+[201 <./logo.jpg>]
+Overfull \vbox (45.93456pt too high) has occurred while \output is active []
+
+
+[202 <./gallery_rna_seq.png>]
+<gallery_dless.png, id=2842, 1046.91125pt x 602.25pt>
+File: gallery_dless.png Graphic file (type png)
+ <use gallery_dless.png>
+Package pdftex.def Info: gallery_dless.png used on input line 15402.
+(pdftex.def) Requested size: 1046.9087pt x 602.24852pt.
+File: gallery_dless.png Graphic file (type png)
+
+<use gallery_dless.png>
+Package pdftex.def Info: gallery_dless.png used on input line 15402.
+(pdftex.def) Requested size: 419.75494pt x 241.46548pt.
+
+Underfull \hbox (badness 10000) in paragraph at lines 15401--15402
+[]\T1/ptm/m/n/10 Non-protein cod-ing evo-lu-tion-ary con-straints in dif-fer-en
+t species (\T1/pcr/m/n/10 svg\T1/ptm/m/n/10 ).
+ []
+
+<gallery_snp_annotation.png, id=2844, 1084.05pt x 526.96875pt>
+File: gallery_snp_annotation.png Graphic file (type png)
+
+<use gallery_snp_annotation.png>
+Package pdftex.def Info: gallery_snp_annotation.png used on input line 15412.
+(pdftex.def) Requested size: 1084.04736pt x 526.96745pt.
+File: gallery_snp_annotation.png Graphic file (type png)
+ <use gallery_snp_annotation.png>
+Package pdftex.def Info: gallery_snp_annotation.png used on input line 15412.
+(pdftex.def) Requested size: 469.755pt x 228.35309pt.
+
+<gallery_snp_annotation_consequences.png, id=2845, 1084.05pt x 526.96875pt>
+File: gallery_snp_annotation_consequences.png Graphic file (type png)
+
+<use gallery_snp_annotation_consequences.png>
+Package pdftex.def Info: gallery_snp_annotation_consequences.png used on input
+line 15414.
+(pdftex.def) Requested size: 1084.04736pt x 526.96745pt.
+File: gallery_snp_annotation_consequences.png Graphic file (type png)
+
+<use gallery_snp_annotation_consequences.png>
+Package pdftex.def Info: gallery_snp_annotation_consequences.png used on input
+line 15414.
+(pdftex.def) Requested size: 469.755pt x 228.35309pt.
+
+Overfull \hbox (176.83836pt too wide) in paragraph at lines 15411--15415
+[]\T1/ptm/m/n/10 Population vari-a-tion across genomes (\T1/pcr/m/n/10 svg\T1/p
+tm/m/n/10 ). []
+ []
+
+
+Overfull \hbox (243.84808pt too wide) in paragraph at lines 15411--15415
+\T1/ptm/m/n/10 Us-ing ``pseudo'' tar-gets to run only part of the pipeline (\T1
+/pcr/m/n/10 svg\T1/ptm/m/n/10 ). []
+ []
+
+[203 <./gallery_dless.png> <./gallery_snp_annotation.png>]
+<gallery_big_pipeline.png, id=2856, 7528.125pt x 836.12375pt>
+File: gallery_big_pipeline.png Graphic file (type png)
+
+<use gallery_big_pipeline.png>
+Package pdftex.def Info: gallery_big_pipeline.png used on input line 15423.
+(pdftex.def) Requested size: 7528.10675pt x 836.1217pt.
+File: gallery_big_pipeline.png Graphic file (type png)
+ <use gallery_big_pipeline.png>
+Package pdftex.def Info: gallery_big_pipeline.png used on input line 15423.
+(pdftex.def) Requested size: 419.75494pt x 46.61847pt.
+
+Overfull \hbox (26.55994pt too wide) in paragraph at lines 15422--15423
+[]\T1/ptm/m/n/10 (\T1/pcr/m/n/10 svg\T1/ptm/m/n/10 ) []
+ []
+
+<wikimedia_cyl_ruffus.jpg, id=2860, 411.5375pt x 348.30125pt>
+File: wikimedia_cyl_ruffus.jpg Graphic file (type jpg)
+
+<use wikimedia_cyl_ruffus.jpg>
+Package pdftex.def Info: wikimedia_cyl_ruffus.jpg used on input line 15439.
+(pdftex.def) Requested size: 411.5365pt x 348.3004pt.
+File: wikimedia_cyl_ruffus.jpg Graphic file (type jpg)
+ <use wikimedia_cyl_ruffus.jpg>
+Package pdftex.def Info: wikimedia_cyl_ruffus.jpg used on input line 15439.
+(pdftex.def) Requested size: 223.13649pt x 188.8505pt.
+
+<wikimedia_bandedkrait.jpg, id=2861, 216.81pt x 162.6075pt>
+File: wikimedia_bandedkrait.jpg Graphic file (type jpg)
+
+<use wikimedia_bandedkrait.jpg>
+Package pdftex.def Info: wikimedia_bandedkrait.jpg used on input line 15441.
+(pdftex.def) Requested size: 216.80946pt x 162.6071pt.
+File: wikimedia_bandedkrait.jpg Graphic file (type jpg)
+ <use wikimedia_bandedkrait.jpg>
+Package pdftex.def Info: wikimedia_bandedkrait.jpg used on input line 15441.
+(pdftex.def) Requested size: 216.80946pt x 162.6071pt.
+
+Overfull \hbox (1.71796pt too wide) in paragraph at lines 15436--15461
+[][]
+ []
+
+[204 <./gallery_snp_annotation_consequences.png> <./gallery_big_pipeline.png>]
+[205 <./wikimedia_cyl_ruffus.jpg> <./wikimedia_bandedkrait.jpg>] [206
+
+]
+Chapter 3.
+[207] <examples_bioinformatics_split.jpg, id=2908, 730.73pt x 307.1475pt>
+File: examples_bioinformatics_split.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_split.jpg>
+Package pdftex.def Info: examples_bioinformatics_split.jpg used on input line 1
+5625.
+(pdftex.def) Requested size: 730.72821pt x 307.14674pt.
+File: examples_bioinformatics_split.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_split.jpg>
+Package pdftex.def Info: examples_bioinformatics_split.jpg used on input line 1
+5625.
+(pdftex.def) Requested size: 375.75494pt x 157.94136pt.
+ [208]
+<examples_bioinformatics_transform.jpg, id=2920, 615.29875pt x 185.69376pt>
+File: examples_bioinformatics_transform.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_transform.jpg>
+Package pdftex.def Info: examples_bioinformatics_transform.jpg used on input li
+ne 15662.
+(pdftex.def) Requested size: 615.29724pt x 185.6933pt.
+File: examples_bioinformatics_transform.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_transform.jpg>
+Package pdftex.def Info: examples_bioinformatics_transform.jpg used on input li
+ne 15662.
+(pdftex.def) Requested size: 375.75494pt x 113.40335pt.
+ [209 <./examples_bioinformatics_split.jpg> <./examples_bioinformatics_transfor
+m.jpg>]
+<examples_bioinformatics_merge.jpg, id=2929, 691.58376pt x 200.75pt>
+File: examples_bioinformatics_merge.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_merge.jpg>
+Package pdftex.def Info: examples_bioinformatics_merge.jpg used on input line 1
+5686.
+(pdftex.def) Requested size: 691.58206pt x 200.7495pt.
+File: examples_bioinformatics_merge.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_merge.jpg>
+Package pdftex.def Info: examples_bioinformatics_merge.jpg used on input line 1
+5686.
+(pdftex.def) Requested size: 375.75494pt x 109.07422pt.
+ [210 <./examples_bioinformatics_merge.jpg>] [211] [212]
+<examples_bioinformatics_pipeline.jpg, id=2960, 602.25pt x 251.94125pt>
+File: examples_bioinformatics_pipeline.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_pipeline.jpg>
+Package pdftex.def Info: examples_bioinformatics_pipeline.jpg used on input lin
+e 15957.
+(pdftex.def) Requested size: 602.24852pt x 251.94063pt.
+File: examples_bioinformatics_pipeline.jpg Graphic file (type jpg)
+
+<use examples_bioinformatics_pipeline.jpg>
+Package pdftex.def Info: examples_bioinformatics_pipeline.jpg used on input lin
+e 15957.
+(pdftex.def) Requested size: 419.75494pt x 175.60417pt.
+ [213 <./examples_bioinformatics_pipeline.jpg>]
+<examples_bioinformatics_error.png, id=2971, 791.95876pt x 989.6975pt>
+File: examples_bioinformatics_error.png Graphic file (type png)
+
+<use examples_bioinformatics_error.png>
+Package pdftex.def Info: examples_bioinformatics_error.png used on input line 1
+5977.
+(pdftex.def) Requested size: 791.95683pt x 989.69508pt.
+File: examples_bioinformatics_error.png Graphic file (type png)
+
+<use examples_bioinformatics_error.png>
+Package pdftex.def Info: examples_bioinformatics_error.png used on input line 1
+5977.
+(pdftex.def) Requested size: 375.75494pt x 469.58264pt.
+ [214 <./examples_bioinformatics_error.png>] [215] [216]
+Underfull \vbox (badness 10000) detected at line 16313
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 16313
+ []
+
+[217]
+Underfull \vbox (badness 10000) detected at line 16313
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 16313
+ []
+
+[218]
+Underfull \vbox (badness 10000) detected at line 16313
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 16313
+ []
+
+[219] [220]
+Underfull \vbox (badness 10000) detected at line 16438
+ []
+
+
+Underfull \vbox (badness 10000) detected at line 16438
+ []
+
+[221] [222]
+Chapter 4.
+
+Underfull \hbox (badness 10000) in paragraph at lines 16478--16479
+[]\T1/ptm/m/n/10 Invoked onces (a job
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16478--16479
+\T1/ptm/m/n/10 cre-ated) per item in the
+ []
+
+
+Overfull \hbox (131.05626pt too wide) in paragraph at lines 16483--16484
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16483--16484
+
+ []
+
+
+Overfull \hbox (230.08627pt too wide) in paragraph at lines 16502--16503
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16502--16503
+
+ []
+
+
+Overfull \hbox (378.77586pt too wide) in paragraph at lines 16518--16519
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16518--16519
+
+ []
+
+
+Overfull \hbox (378.38585pt too wide) in paragraph at lines 16524--16525
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16524--16525
+
+ []
+
+
+Overfull \hbox (394.40544pt too wide) in paragraph at lines 16530--16531
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16530--16531
+
+ []
+
+
+Overfull \hbox (199.87648pt too wide) in paragraph at lines 16546--16547
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16546--16547
+
+ []
+
+
+Overfull \hbox (14.58386pt too wide) in paragraph at lines 16461--16554
+[]
+ []
+
+
+Overfull \hbox (685.4845pt too wide) in paragraph at lines 16575--16576
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16575--16576
+
+ []
+
+
+Overfull \hbox (487.18509pt too wide) in paragraph at lines 16597--16598
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16597--16598
+
+ []
+
+
+Overfull \hbox (488.2952pt too wide) in paragraph at lines 16624--16625
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16624--16625
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16632--16633
+[]|\T1/ptm/b/n/10 @combinations_with_replacement
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16644--16647
+[]\T1/ptm/m/n/10 Generates the \T1/ptm/b/n/10 com-bi-na-
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16644--16647
+\T1/ptm/b/n/10 tions_with_replacement\T1/ptm/m/n/10 ,
+ []
+
+
+Overfull \hbox (566.664pt too wide) in paragraph at lines 16651--16652
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16651--16652
+
+ []
+
+
+Overfull \hbox (14.58386pt too wide) in paragraph at lines 16559--16659
+[]
+ []
+
+[223
+
+] [224]
+Overfull \vbox (22.02676pt too high) has occurred while \output is active []
+
+
+[225]
+Overfull \hbox (568.87491pt too wide) in paragraph at lines 16680--16681
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16680--16681
+
+ []
+
+
+Overfull \hbox (594.05441pt too wide) in paragraph at lines 16686--16687
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16686--16687
+
+ []
+
+
+Underfull \hbox (badness 5847) in paragraph at lines 16700--16701
+[]\T1/ptm/m/n/10 Useful for adding ad-di-
+ []
+
+
+Overfull \hbox (571.00478pt too wide) in paragraph at lines 16705--16706
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16705--16706
+
+ []
+
+
+Overfull \hbox (587.02437pt too wide) in paragraph at lines 16711--16712
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16711--16712
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16722--16723
+\T1/ptm/m/n/10 us-ing reg-u-lar ex-pres-sion
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16725--16726
+\T1/ptm/m/n/10 out-put af-ter sub-sti-tu-tion
+ []
+
+
+Overfull \hbox (374.93611pt too wide) in paragraph at lines 16730--16731
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16730--16731
+
+ []
+
+
+Overfull \hbox (543.04506pt too wide) in paragraph at lines 16736--16737
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16736--16737
+
+ []
+
+
+Overfull \hbox (414.9557pt too wide) in paragraph at lines 16742--16743
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16742--16743
+
+ []
+
+
+Overfull \hbox (583.06465pt too wide) in paragraph at lines 16748--16749
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16748--16749
+
+ []
+
+
+Overfull \hbox (25.84651pt too wide) in paragraph at lines 16767--16768
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16767--16768
+
+ []
+
+
+Overfull \hbox (104.12614pt too wide) in paragraph at lines 16773--16774
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16773--16774
+
+ []
+
+
+Overfull \hbox (135.75656pt too wide) in paragraph at lines 16792--16793
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16792--16793
+
+ []
+
+
+Overfull \hbox (99.43596pt too wide) in paragraph at lines 16798--16799
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16798--16799
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16812--16813
+[]\T1/ptm/m/n/10 Evaluated each time
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16812--16813
+[][]\T1/ptm/m/it/10 pipeline_run(...)[][]\T1/ptm/m/n/10 ,
+ []
+
+
+Overfull \hbox (1.28616pt too wide) in paragraph at lines 16812--16813
+[][]\T1/ptm/m/it/10 pipeline_printout_graph(...)[][]
+ []
+
+
+Overfull \hbox (126.9864pt too wide) in paragraph at lines 16820--16821
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16820--16821
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16837--16838
+[]\T1/ptm/m/n/10 Overrides
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16837--16838
+\T1/pcr/m/n/10 multiprocess \T1/ptm/m/n/10 pa-
+ []
+
+
+Overfull \hbox (167.01613pt too wide) in paragraph at lines 16842--16843
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16842--16843
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16853--16854
+[]\T1/ptm/m/n/10 Generates paths for
+ []
+
+
+Overfull \hbox (256.79636pt too wide) in paragraph at lines 16858--16859
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16858--16859
+
+ []
+
+
+Overfull \hbox (256.40634pt too wide) in paragraph at lines 16864--16865
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16864--16865
+
+ []
+
+
+Overfull \hbox (269.92593pt too wide) in paragraph at lines 16870--16871
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16870--16871
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16881--16882
+[]\T1/ptm/m/n/10 Customise the graphic
+ []
+
+
+Overfull \hbox (284.47655pt too wide) in paragraph at lines 16886--16887
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16886--16887
+
+ []
+
+
+Overfull \hbox (17.03363pt too wide) in paragraph at lines 16665--16894
+[][]
+ []
+
+[226]
+Overfull \vbox (296.6966pt too high) has occurred while \output is active []
+
+
+[227]
+Overfull \hbox (14.58665pt too wide) in paragraph at lines 16923--16924
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16923--16924
+
+ []
+
+
+Overfull \hbox (104.58665pt too wide) in paragraph at lines 16929--16930
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16929--16930
+
+ []
+
+
+Overfull \hbox (154.18663pt too wide) in paragraph at lines 16935--16936
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16935--16936
+
+ []
+
+
+Underfull \hbox (badness 5817) in paragraph at lines 16949--16950
+[]\T1/ptm/m/n/10 Best used in con-juc-tion
+ []
+
+
+Overfull \hbox (89.54584pt too wide) in paragraph at lines 16954--16955
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16954--16955
+
+ []
+
+
+Overfull \hbox (179.54584pt too wide) in paragraph at lines 16960--16961
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16960--16961
+
+ []
+
+
+Overfull \hbox (153.76628pt too wide) in paragraph at lines 16976--16977
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 16976--16977
+
+ []
+
+
+Overfull \hbox (120.90428pt too wide) in paragraph at lines 16985--16987
+[]
+ []
+
+
+Overfull \hbox (351.67638pt too wide) in paragraph at lines 17005--17009
+[]
+ []
+
+
+Overfull \hbox (72.41699pt too wide) in paragraph at lines 17005--17009
+\T1/pcr/m/n/10 input_pattern\T1/ptm/m/n/10 /\T1/pcr/m/n/10 output_pattern
+ []
+
+
+Overfull \hbox (17.03363pt too wide) in paragraph at lines 16901--17015
+[][]
+ []
+
+[228] [229] [230] [231] [232]
+Overfull \hbox (134.36107pt too wide) in paragraph at lines 17391--17392
+[]
+ []
+
+[233] [234]
+Underfull \hbox (badness 10000) in paragraph at lines 17618--17626
+
+ []
+
+[235]
+Underfull \hbox (badness 10000) in paragraph at lines 17956--17958
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18129--18131
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Overfull \hbox (14.79999pt too wide) in paragraph at lines 18160--18160
+ []
+ []
+
+[236]
+Overfull \vbox (1743.5831pt too high) has occurred while \output is active []
+
+
+[237]
+Underfull \hbox (badness 10000) in paragraph at lines 18204--18206
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18307--18309
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18407--18410
+[]\T1/ptm/m/n/10 Output file names and strings in the ex-tra pa-ram-e-ters are
+de-ter-mined from
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18488--18490
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18636--18639
+[]\T1/ptm/m/n/10 Output file names and strings in the ex-tra pa-ram-e-ters are
+de-ter-mined from
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18676--18678
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 18898--18900
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+<history_html_flowchart1.png, id=3418, 1126.2075pt x 1058.95625pt>
+File: history_html_flowchart1.png Graphic file (type png)
+
+<use history_html_flowchart1.png>
+Package pdftex.def Info: history_html_flowchart1.png used on input line 19054.
+(pdftex.def) Requested size: 1126.20477pt x 1058.95367pt.
+File: history_html_flowchart1.png Graphic file (type png)
+ <use history_html_flowchart1.png>
+Package pdftex.def Info: history_html_flowchart1.png used on input line 19054.
+(pdftex.def) Requested size: 374.26575pt x 351.91272pt.
+
+Underfull \hbox (badness 10000) in paragraph at lines 19281--19283
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+<jobs_limit.png, id=3439, 501.875pt x 569.12625pt>
+File: jobs_limit.png Graphic file (type png)
+ <use jobs_limit.png>
+Package pdftex.def Info: jobs_limit.png used on input line 19452.
+(pdftex.def) Requested size: 501.87376pt x 569.12486pt.
+File: jobs_limit.png Graphic file (type png)
+
+<use jobs_limit.png>
+Package pdftex.def Info: jobs_limit.png used on input line 19452.
+(pdftex.def) Requested size: 374.26575pt x 424.43813pt.
+
+Underfull \hbox (badness 10000) in paragraph at lines 19564--19565
+[]\T1/ptm/m/n/10 Evaluated each time \T1/pcr/m/n/10 pipeline_run\T1/ptm/m/n/10
+, \T1/pcr/m/n/10 pipeline_printout \T1/ptm/m/n/10 or
+ []
+
+
+Overfull \hbox (14.79999pt too wide) in paragraph at lines 19751--19751
+ []
+ []
+
+[238]
+Overfull \vbox (8478.05183pt too high) has occurred while \output is active []
+
+
+[239 <./history_html_flowchart1.png> <./jobs_limit.png>]
+Underfull \hbox (badness 10000) in paragraph at lines 19934--19936
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 20109--20111
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 20279--20281
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 20453--20455
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Overfull \hbox (14.79999pt too wide) in paragraph at lines 20493--20493
+ []
+ []
+
+[240]
+Overfull \vbox (3859.21509pt too high) has occurred while \output is active []
+
+
+[241]
+Overfull \hbox (14.79999pt too wide) in paragraph at lines 20751--20751
+ []
+ []
+
+[242]
+Overfull \vbox (860.27077pt too high) has occurred while \output is active []
+
+
+[243]
+Underfull \hbox (badness 5970) in paragraph at lines 20953--20953
+|[]\T1/phv/b/sl/10 @files_re \T1/phv/b/n/10 (\T1/phv/b/sl/10 tasks_or_file_name
+s\T1/phv/b/n/10 , \T1/phv/b/sl/10 match-ing_regex\T1/phv/b/n/10 , [\T1/phv/b/sl
+/10 in-put_pattern\T1/phv/b/n/10 ], \T1/phv/b/sl/10 out-put_pattern\T1/phv/b/n/
+10 , [\T1/phv/b/sl/10 ex-
+ []
+
+LaTeX Font Info: Font shape `T1/phv/bx/it' in size <9> not available
+(Font) Font shape `T1/phv/b/it' tried instead on input line 20956.
+
+LaTeX Font Info: Font shape `T1/phv/b/it' in size <9> not available
+(Font) Font shape `T1/phv/b/sl' tried instead on input line 20956.
+
+
+Overfull \hbox (5.66pt too wide) in paragraph at lines 20986--20987
+[]
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21010--21012
+[]\T1/ptm/m/n/10 E.g.:\T1/pcr/m/n/10 "a.*" => "a.1",
+ []
+
+
+Overfull \hbox (2.65997pt too wide) in paragraph at lines 21075--21076
+[]
+ []
+
+
+Overfull \hbox (14.79999pt too wide) in paragraph at lines 21088--21088
+ []
+ []
+
+[244]
+Overfull \vbox (1022.10062pt too high) has occurred while \output is active []
+
+
+[245]
+Underfull \hbox (badness 10000) in paragraph at lines 21145--21145
+[]\T1/ptm/m/it/10 target_tasks\T1/ptm/m/n/10 , \T1/ptm/m/it/10 forced-torun_tas
+ks=\T1/ptm/m/n/14.4 [ ]\T1/ptm/m/n/10 , \T1/ptm/m/it/10 mul-ti-pro-cess=1\T1/pt
+m/m/n/10 , \T1/ptm/m/it/10 log-
+ []
+
+[246]
+Underfull \hbox (badness 10000) in paragraph at lines 21222--21222
+[]\T1/ptm/m/it/10 output_stream=None\T1/ptm/m/n/10 , \T1/ptm/m/it/10 tar-get_ta
+sks=\T1/ptm/m/n/14.4 [ ]\T1/ptm/m/n/10 , \T1/ptm/m/it/10 forced-
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21222--21222
+\T1/ptm/m/it/10 torun_tasks=\T1/ptm/m/n/14.4 [ ]\T1/ptm/m/n/10 , \T1/ptm/m/it/1
+0 ver-bose=None\T1/ptm/m/n/10 , \T1/ptm/m/it/10 in-dent=4\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21222--21222
+\T1/ptm/m/it/10 gnu_make_maximal_rebuild_mode=True\T1/ptm/m/n/10 , \T1/ptm/m/it
+/10 wrap_width=100\T1/ptm/m/n/10 ,
+ []
+
+[247]
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+[]\T1/ptm/m/it/10 stream\T1/ptm/m/n/10 , \T1/ptm/m/it/10 out-put_format=None\T1
+/ptm/m/n/10 , \T1/ptm/m/it/10 tar-get_tasks=\T1/ptm/m/n/14.4 [
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+\T1/ptm/m/n/14.4 ]\T1/ptm/m/n/10 , \T1/ptm/m/it/10 forced-torun_tasks=\T1/ptm/m
+/n/14.4 [ ]\T1/ptm/m/n/10 , \T1/ptm/m/it/10 draw_vertically=True\T1/ptm/m/n/10
+,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+\T1/ptm/m/it/10 ig-nore_upstream_of_target=False\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+\T1/ptm/m/it/10 skip_uptodate_tasks=False\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+\T1/ptm/m/it/10 gnu_make_maximal_rebuild_mode=True\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+\T1/ptm/m/it/10 test_all_task_for_update=True\T1/ptm/m/n/10 , \T1/ptm/m/it/10 n
+o_key_legend=False\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 8113) in paragraph at lines 21301--21301
+\T1/ptm/m/it/10 pipeline_name='Pipeline:'\T1/ptm/m/n/10 , \T1/ptm/m/it/10 size=
+(11\T1/ptm/m/n/10 , \T1/ptm/m/it/10 8)\T1/ptm/m/n/10 , \T1/ptm/m/it/10 dpi=120\
+T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21301--21301
+\T1/ptm/m/it/10 run-time_data=None\T1/ptm/m/n/10 , \T1/ptm/m/it/10 check-sum_le
+vel=None\T1/ptm/m/n/10 , \T1/ptm/m/it/10 his-
+ []
+
+[248]
+Underfull \hbox (badness 10000) in paragraph at lines 21406--21406
+\T1/ptm/m/it/10 ex-tra_input_files_task_globs\T1/ptm/m/n/10 , \T1/ptm/m/it/10 r
+e-place_inputs\T1/ptm/m/n/10 , \T1/ptm/m/it/10 out-
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21416--21416
+[]\T1/ptm/m/it/10 input_files_task_globs\T1/ptm/m/n/10 , \T1/ptm/m/it/10 flat-t
+en_input\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21416--21416
+\T1/ptm/m/it/10 file_names_transform\T1/ptm/m/n/10 , \T1/ptm/m/it/10 ex-tra_inp
+ut_files_task_globs\T1/ptm/m/n/10 ,
+ []
+
+[249]
+Underfull \hbox (badness 10000) in paragraph at lines 21424--21424
+[]\T1/ptm/m/it/10 input_files_task_globs\T1/ptm/m/n/10 , \T1/ptm/m/it/10 flat-t
+en_input\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21464--21464
+[]\T1/ptm/m/it/10 input_files_task_globs\T1/ptm/m/n/10 , \T1/ptm/m/it/10 out-pu
+t_files_task_globs\T1/ptm/m/n/10 , \T1/ptm/m/it/10 *ex-
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21475--21475
+[]\T1/ptm/m/it/10 param\T1/ptm/m/n/10 , \T1/ptm/m/it/10 user_defined_work_func\
+T1/ptm/m/n/10 , \T1/ptm/m/it/10 reg-is-ter_cleanup\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21483--21483
+[]\T1/ptm/m/it/10 param\T1/ptm/m/n/10 , \T1/ptm/m/it/10 user_defined_work_func\
+T1/ptm/m/n/10 , \T1/ptm/m/it/10 reg-is-ter_cleanup\T1/ptm/m/n/10 ,
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 21491--21491
+[]\T1/ptm/m/it/10 param\T1/ptm/m/n/10 , \T1/ptm/m/it/10 user_defined_work_func\
+T1/ptm/m/n/10 , \T1/ptm/m/it/10 reg-is-ter_cleanup\T1/ptm/m/n/10 ,
+ []
+
+[250] [251] [252]
+Underfull \hbox (badness 10000) in paragraph at lines 21719--21721
+[]\T1/ptm/b/n/10 args \T1/ptm/m/n/10 -- a dic-tio-nary of pa-ram-e-ters for-war
+ded from
+ []
+
+
+! LaTeX Error: Too deeply nested.
+
+See the LaTeX manual or LaTeX Companion for explanation.
+Type H <return> for immediate help.
+ ...
+
+l.21787 \begin{Verbatim}[commandchars=\\\{\}]
+
+?
+
+! LaTeX Error: Too deeply nested.
+
+See the LaTeX manual or LaTeX Companion for explanation.
+Type H <return> for immediate help.
+ ...
+
+l.21787 \begin{Verbatim}[commandchars=\\\{\}]
+
+?
+[253] [254
+
+]
+Chapter 5.
+[255] [256
+
+] [257
+
+
+]
+No file ruffus.ind.
+Package atveryend Info: Empty hook `BeforeClearDocument' on input line 21827.
+Package atveryend Info: Empty hook `AfterLastShipout' on input line 21827.
+(./ruffus.aux)
+Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 21827.
+
+Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 21827
+.
+Package rerunfilecheck Info: File `ruffus.out' has not changed.
+(rerunfilecheck) Checksum: EF6AADFE414A2F7739402C49BF99432D;7549.
+
+
+LaTeX Warning: There were multiply-defined labels.
+
+Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 21827.
+ )
+Here is how much of TeX's memory you used:
+ 11999 strings out of 495028
+ 217113 string characters out of 6181497
+ 629820 words of memory out of 5000000
+ 13480 multiletter control sequences out of 15000+600000
+ 85281 words of font info for 90 fonts, out of 8000000 for 9000
+ 15 hyphenation exceptions out of 8191
+ 40i,14n,51p,7137b,640s stack positions out of 5000i,500n,10000p,200000b,80000s
+{/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc}</usr/share/texliv
+e/texmf-dist/fonts/type1/urw/courier/ucrb8a.pfb></usr/share/texlive/texmf-dist/
+fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/u
+rw/courier/ucrro8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/
+uhvb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvbo8a.pfb>
+</usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb></usr/share/
+texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texlive/texmf-d
+ist/fonts/type1/urw/times/utmbi8a.pfb></usr/share/texlive/texmf-dist/fonts/type
+1/urw/times/utmr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utm
+ri8a.pfb>
+Output written on ruffus.pdf (261 pages, 4513083 bytes).
+PDF statistics:
+ 4233 PDF objects out of 4296 (max. 8388607)
+ 3863 compressed objects within 39 object streams
+ 1420 named destinations out of 1440 (max. 500000)
+ 817 words of extra memory for PDF output out of 10000 (max. 10000000)
+
diff --git a/doc/_build/latex/ruffus.out b/doc/_build/latex/ruffus.out
new file mode 100644
index 0000000..b7c99fb
--- /dev/null
+++ b/doc/_build/latex/ruffus.out
@@ -0,0 +1,77 @@
+\BOOKMARK [0][-]{chapter.1}{Start Here:}{}% 1
+\BOOKMARK [1][-]{section.1.1}{Installation}{chapter.1}% 2
+\BOOKMARK [1][-]{section.1.2}{Ruffus Manual: List of Chapters and Example code}{chapter.1}% 3
+\BOOKMARK [1][-]{section.1.3}{Chapter 1: An introduction to basic Ruffus syntax}{chapter.1}% 4
+\BOOKMARK [1][-]{section.1.4}{Chapter 2: Transforming data in a pipeline with @transform}{chapter.1}% 5
+\BOOKMARK [1][-]{section.1.5}{Chapter 3: More on @transform-ing data}{chapter.1}% 6
+\BOOKMARK [1][-]{section.1.6}{Chapter 4: Creating files with @originate}{chapter.1}% 7
+\BOOKMARK [1][-]{section.1.7}{Chapter 5: Understanding how your pipeline works with pipeline\137printout\(...\)}{chapter.1}% 8
+\BOOKMARK [1][-]{section.1.8}{Chapter 6: Running Ruffus from the command line with ruffus.cmdline}{chapter.1}% 9
+\BOOKMARK [1][-]{section.1.9}{Chapter 7: Displaying the pipeline visually with pipeline\137printout\137graph\(...\)}{chapter.1}% 10
+\BOOKMARK [1][-]{section.1.10}{Chapter 8: Specifying output file names with formatter\(\) and regex\(\)}{chapter.1}% 11
+\BOOKMARK [1][-]{section.1.11}{Chapter 9: Preparing directories for output with @mkdir\(\)}{chapter.1}% 12
+\BOOKMARK [1][-]{section.1.12}{Chapter 10: Checkpointing: Interrupted Pipelines and Exceptions}{chapter.1}% 13
+\BOOKMARK [1][-]{section.1.13}{Chapter 11: Pipeline topologies and a compendium of Ruffus decorators}{chapter.1}% 14
+\BOOKMARK [1][-]{section.1.14}{Chapter 12: Splitting up large tasks / files with @split}{chapter.1}% 15
+\BOOKMARK [1][-]{section.1.15}{Chapter 13: @merge multiple input into a single result}{chapter.1}% 16
+\BOOKMARK [1][-]{section.1.16}{Chapter 14: Multiprocessing, drmaa and Computation Clusters}{chapter.1}% 17
+\BOOKMARK [1][-]{section.1.17}{Chapter 15: Logging progress through a pipeline}{chapter.1}% 18
+\BOOKMARK [1][-]{section.1.18}{Chapter 16: @subdivide tasks to run efficiently and regroup with @collate}{chapter.1}% 19
+\BOOKMARK [1][-]{section.1.19}{Chapter 17: @combinations, @permutations and all versus all @product}{chapter.1}% 20
+\BOOKMARK [1][-]{section.1.20}{Chapter 18: Turning parts of the pipeline on and off at runtime with @active\137if}{chapter.1}% 21
+\BOOKMARK [1][-]{section.1.21}{Chapter 19: Signal the completion of each stage of our pipeline with @posttask}{chapter.1}% 22
+\BOOKMARK [1][-]{section.1.22}{Chapter 20: Manipulating task inputs via string substitution using inputs\(\) and add\137inputs\(\)}{chapter.1}% 23
+\BOOKMARK [1][-]{section.1.23}{Chapter 21: Esoteric: Generating parameters on the fly with @files}{chapter.1}% 24
+\BOOKMARK [1][-]{section.1.24}{Chapter 22: Esoteric: Running jobs in parallel without files using @parallel}{chapter.1}% 25
+\BOOKMARK [1][-]{section.1.25}{Chapter 23: Esoteric: Writing custom functions to decide which jobs are up to date with @check\137if\137uptodate}{chapter.1}% 26
+\BOOKMARK [1][-]{section.1.26}{Appendix 1: Flow Chart Colours with pipeline\137printout\137graph\(...\)}{chapter.1}% 27
+\BOOKMARK [1][-]{section.1.27}{Appendix 2: How dependency is checked}{chapter.1}% 28
+\BOOKMARK [1][-]{section.1.28}{Appendix 3: Exceptions thrown inside pipelines}{chapter.1}% 29
+\BOOKMARK [1][-]{section.1.29}{Appendix 4: Names exported from Ruffus}{chapter.1}% 30
+\BOOKMARK [1][-]{section.1.30}{Appendix 5: @files: Deprecated syntax}{chapter.1}% 31
+\BOOKMARK [1][-]{section.1.31}{Appendix 6: @files\137re: Deprecated syntax using regular expressions}{chapter.1}% 32
+\BOOKMARK [1][-]{section.1.32}{Chapter 1: Python Code for An introduction to basic Ruffus syntax}{chapter.1}% 33
+\BOOKMARK [1][-]{section.1.33}{Chapter 1: Python Code for Transforming data in a pipeline with @transform}{chapter.1}% 34
+\BOOKMARK [1][-]{section.1.34}{Chapter 3: Python Code for More on @transform-ing data}{chapter.1}% 35
+\BOOKMARK [1][-]{section.1.35}{Chapter 4: Python Code for Creating files with @originate}{chapter.1}% 36
+\BOOKMARK [1][-]{section.1.36}{Chapter 5: Python Code for Understanding how your pipeline works with pipeline\137printout\(...\)}{chapter.1}% 37
+\BOOKMARK [1][-]{section.1.37}{Chapter 7: Python Code for Displaying the pipeline visually with pipeline\137printout\137graph\(...\)}{chapter.1}% 38
+\BOOKMARK [1][-]{section.1.38}{Chapter 8: Python Code for Specifying output file names with formatter\(\) and regex\(\)}{chapter.1}% 39
+\BOOKMARK [1][-]{section.1.39}{Chapter 9: Python Code for Preparing directories for output with @mkdir\(\)}{chapter.1}% 40
+\BOOKMARK [1][-]{section.1.40}{Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}{chapter.1}% 41
+\BOOKMARK [1][-]{section.1.41}{Chapter 12: Python Code for Splitting up large tasks / files with @split}{chapter.1}% 42
+\BOOKMARK [1][-]{section.1.42}{Chapter 13: Python Code for @merge multiple input into a single result}{chapter.1}% 43
+\BOOKMARK [1][-]{section.1.43}{Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters}{chapter.1}% 44
+\BOOKMARK [1][-]{section.1.44}{Chapter 15: Python Code for Logging progress through a pipeline}{chapter.1}% 45
+\BOOKMARK [1][-]{section.1.45}{Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate}{chapter.1}% 46
+\BOOKMARK [1][-]{section.1.46}{Chapter 17: Python Code for @combinations, @permutations and all versus all @product}{chapter.1}% 47
+\BOOKMARK [1][-]{section.1.47}{Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs\(\) and add\137inputs\(\)}{chapter.1}% 48
+\BOOKMARK [1][-]{section.1.48}{Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files}{chapter.1}% 49
+\BOOKMARK [1][-]{section.1.49}{Appendix 1: Python code for Flow Chart Colours with pipeline\137printout\137graph\(...\)}{chapter.1}% 50
+\BOOKMARK [0][-]{chapter.2}{Overview:}{}% 51
+\BOOKMARK [1][-]{section.2.1}{Cheat Sheet}{chapter.2}% 52
+\BOOKMARK [1][-]{section.2.2}{Pipeline functions}{chapter.2}% 53
+\BOOKMARK [1][-]{section.2.3}{drmaa functions}{chapter.2}% 54
+\BOOKMARK [1][-]{section.2.4}{Installation}{chapter.2}% 55
+\BOOKMARK [1][-]{section.2.5}{Design \046 Architecture}{chapter.2}% 56
+\BOOKMARK [1][-]{section.2.6}{Major Features added to Ruffus}{chapter.2}% 57
+\BOOKMARK [1][-]{section.2.7}{Fixed Bugs}{chapter.2}% 58
+\BOOKMARK [1][-]{section.2.8}{Future Changes to Ruffus}{chapter.2}% 59
+\BOOKMARK [1][-]{section.2.9}{Planned Improvements to Ruffus}{chapter.2}% 60
+\BOOKMARK [1][-]{section.2.10}{Implementation Tips}{chapter.2}% 61
+\BOOKMARK [1][-]{section.2.11}{Implementation notes}{chapter.2}% 62
+\BOOKMARK [1][-]{section.2.12}{FAQ}{chapter.2}% 63
+\BOOKMARK [1][-]{section.2.13}{Glossary}{chapter.2}% 64
+\BOOKMARK [1][-]{section.2.14}{Hall of Fame: User contributed flowcharts}{chapter.2}% 65
+\BOOKMARK [1][-]{section.2.15}{Why Ruffus?}{chapter.2}% 66
+\BOOKMARK [0][-]{chapter.3}{Examples}{}% 67
+\BOOKMARK [1][-]{section.3.1}{Construction of a simple pipeline to run BLAST jobs}{chapter.3}% 68
+\BOOKMARK [1][-]{section.3.2}{Part 2: A slightly more practical pipeline to run blasts jobs}{chapter.3}% 69
+\BOOKMARK [1][-]{section.3.3}{Ruffus code}{chapter.3}% 70
+\BOOKMARK [1][-]{section.3.4}{Ruffus code}{chapter.3}% 71
+\BOOKMARK [1][-]{section.3.5}{Example code for FAQ Good practices: ``What is the best way of handling data in file pairs \(or triplets etc.\)?''}{chapter.3}% 72
+\BOOKMARK [0][-]{chapter.4}{Reference:}{}% 73
+\BOOKMARK [1][-]{section.4.1}{Decorators}{chapter.4}% 74
+\BOOKMARK [1][-]{section.4.2}{Modules:}{chapter.4}% 75
+\BOOKMARK [0][-]{chapter.5}{Indices and tables}{}% 76
+\BOOKMARK [0][-]{section*.640}{Python Module Index}{}% 77
diff --git a/doc/_build/latex/ruffus.pdf b/doc/_build/latex/ruffus.pdf
new file mode 100644
index 0000000..68836b8
Binary files /dev/null and b/doc/_build/latex/ruffus.pdf differ
diff --git a/doc/_build/latex/ruffus.tex b/doc/_build/latex/ruffus.tex
new file mode 100644
index 0000000..9340ab1
--- /dev/null
+++ b/doc/_build/latex/ruffus.tex
@@ -0,0 +1,21827 @@
+% Generated by Sphinx.
+\def\sphinxdocclass{report}
+\documentclass[A4,10pt,english]{sphinxmanual}
+\usepackage[utf8]{inputenc}
+\DeclareUnicodeCharacter{00A0}{\nobreakspace}
+\usepackage{cmap}
+\usepackage[T1]{fontenc}
+\usepackage{babel}
+\usepackage{times}
+\usepackage[Bjarne]{fncychap}
+\usepackage{longtable}
+\usepackage{sphinx}
+\usepackage{multirow}
+
+
+\title{ruffus Documentation}
+\date{August 06, 2014}
+\release{2.5}
+\author{Leo Goodstadt}
+\newcommand{\sphinxlogo}{}
+\renewcommand{\releasename}{Release}
+\makeindex
+
+\makeatletter
+\def\PYG at reset{\let\PYG at it=\relax \let\PYG at bf=\relax%
+ \let\PYG at ul=\relax \let\PYG at tc=\relax%
+ \let\PYG at bc=\relax \let\PYG at ff=\relax}
+\def\PYG at tok#1{\csname PYG at tok@#1\endcsname}
+\def\PYG at toks#1+{\ifx\relax#1\empty\else%
+ \PYG at tok{#1}\expandafter\PYG at toks\fi}
+\def\PYG at do#1{\PYG at bc{\PYG at tc{\PYG at ul{%
+ \PYG at it{\PYG at bf{\PYG at ff{#1}}}}}}}
+\def\PYG#1#2{\PYG at reset\PYG at toks#1+\relax+\PYG at do{#2}}
+
+\expandafter\def\csname PYG at tok@gd\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
+\expandafter\def\csname PYG at tok@gu\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
+\expandafter\def\csname PYG at tok@gt\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
+\expandafter\def\csname PYG at tok@gs\endcsname{\let\PYG at bf=\textbf}
+\expandafter\def\csname PYG at tok@gr\endcsname{\def\PYG at tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
+\expandafter\def\csname PYG at tok@cm\endcsname{\let\PYG at it=\textit\def\PYG at tc##1{\textcolor[rgb]{0.25,0.50,0.56}{##1}}}
+\expandafter\def\csname PYG at tok@vg\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.73,0.38,0.84}{##1}}}
+\expandafter\def\csname PYG at tok@m\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.13,0.50,0.31}{##1}}}
+\expandafter\def\csname PYG at tok@mh\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.13,0.50,0.31}{##1}}}
+\expandafter\def\csname PYG at tok@cs\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.50,0.56}{##1}}\def\PYG at bc##1{\setlength{\fboxsep}{0pt}\colorbox[rgb]{1.00,0.94,0.94}{\strut ##1}}}
+\expandafter\def\csname PYG at tok@ge\endcsname{\let\PYG at it=\textit}
+\expandafter\def\csname PYG at tok@vc\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.73,0.38,0.84}{##1}}}
+\expandafter\def\csname PYG at tok@il\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.13,0.50,0.31}{##1}}}
+\expandafter\def\csname PYG at tok@go\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.20,0.20,0.20}{##1}}}
+\expandafter\def\csname PYG at tok@cp\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@gi\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
+\expandafter\def\csname PYG at tok@gh\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
+\expandafter\def\csname PYG at tok@ni\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.84,0.33,0.22}{##1}}}
+\expandafter\def\csname PYG at tok@nl\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.13,0.44}{##1}}}
+\expandafter\def\csname PYG at tok@nn\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.05,0.52,0.71}{##1}}}
+\expandafter\def\csname PYG at tok@no\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.38,0.68,0.84}{##1}}}
+\expandafter\def\csname PYG at tok@na\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@nb\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@nc\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.05,0.52,0.71}{##1}}}
+\expandafter\def\csname PYG at tok@nd\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.33,0.33,0.33}{##1}}}
+\expandafter\def\csname PYG at tok@ne\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@nf\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.02,0.16,0.49}{##1}}}
+\expandafter\def\csname PYG at tok@si\endcsname{\let\PYG at it=\textit\def\PYG at tc##1{\textcolor[rgb]{0.44,0.63,0.82}{##1}}}
+\expandafter\def\csname PYG at tok@s2\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@vi\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.73,0.38,0.84}{##1}}}
+\expandafter\def\csname PYG at tok@nt\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.02,0.16,0.45}{##1}}}
+\expandafter\def\csname PYG at tok@nv\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.73,0.38,0.84}{##1}}}
+\expandafter\def\csname PYG at tok@s1\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@gp\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.78,0.36,0.04}{##1}}}
+\expandafter\def\csname PYG at tok@sh\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@ow\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@sx\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.78,0.36,0.04}{##1}}}
+\expandafter\def\csname PYG at tok@bp\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@c1\endcsname{\let\PYG at it=\textit\def\PYG at tc##1{\textcolor[rgb]{0.25,0.50,0.56}{##1}}}
+\expandafter\def\csname PYG at tok@kc\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@c\endcsname{\let\PYG at it=\textit\def\PYG at tc##1{\textcolor[rgb]{0.25,0.50,0.56}{##1}}}
+\expandafter\def\csname PYG at tok@mf\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.13,0.50,0.31}{##1}}}
+\expandafter\def\csname PYG at tok@err\endcsname{\def\PYG at bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}
+\expandafter\def\csname PYG at tok@kd\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@ss\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.32,0.47,0.09}{##1}}}
+\expandafter\def\csname PYG at tok@sr\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.14,0.33,0.53}{##1}}}
+\expandafter\def\csname PYG at tok@mo\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.13,0.50,0.31}{##1}}}
+\expandafter\def\csname PYG at tok@mi\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.13,0.50,0.31}{##1}}}
+\expandafter\def\csname PYG at tok@kn\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@o\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
+\expandafter\def\csname PYG at tok@kr\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@s\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@kp\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@w\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
+\expandafter\def\csname PYG at tok@kt\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.56,0.13,0.00}{##1}}}
+\expandafter\def\csname PYG at tok@sc\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@sb\endcsname{\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@k\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.00,0.44,0.13}{##1}}}
+\expandafter\def\csname PYG at tok@se\endcsname{\let\PYG at bf=\textbf\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+\expandafter\def\csname PYG at tok@sd\endcsname{\let\PYG at it=\textit\def\PYG at tc##1{\textcolor[rgb]{0.25,0.44,0.63}{##1}}}
+
+\def\PYGZbs{\char`\\}
+\def\PYGZus{\char`\_}
+\def\PYGZob{\char`\{}
+\def\PYGZcb{\char`\}}
+\def\PYGZca{\char`\^}
+\def\PYGZam{\char`\&}
+\def\PYGZlt{\char`\<}
+\def\PYGZgt{\char`\>}
+\def\PYGZsh{\char`\#}
+\def\PYGZpc{\char`\%}
+\def\PYGZdl{\char`\$}
+\def\PYGZhy{\char`\-}
+\def\PYGZsq{\char`\'}
+\def\PYGZdq{\char`\"}
+\def\PYGZti{\char`\~}
+% for compatibility with earlier versions
+\def\PYGZat{@}
+\def\PYGZlb{[}
+\def\PYGZrb{]}
+\makeatother
+
+\begin{document}
+
+\maketitle
+\tableofcontents
+\phantomsection\label{contents::doc}
+
+
+
+\chapter{Start Here:}
+\label{contents:glob}\label{contents:ruffus-documentation}\label{contents:start-here}
+
+\section{Installation}
+\label{installation:installation}\label{installation::doc}\label{installation:id1}
+\code{Ruffus} is a lightweight python module for building computational pipelines.
+
+
+\subsection{The easy way}
+\label{installation:the-easy-way}\begin{quote}
+
+\emph{Ruffus} is available as an
+\href{http://peak.telecommunity.com/DevCenter/EasyInstall}{easy-install} -able package
+on the \href{http://pypi.python.org/pypi/Sphinx}{Python Package Index}.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+sudo pip install ruffus --upgrade
+\end{Verbatim}
+
+This may also work for older installations
+\begin{enumerate}
+\item {}
+Install setuptools:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+wget peak.telecommunity.com/dist/ez\_setup.py
+sudo python ez\_setup.py
+\end{Verbatim}
+
+\item {}
+Install \emph{Ruffus} automatically:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+easy\_install -U ruffus
+\end{Verbatim}
+
+\end{enumerate}
+\end{quote}
+
+
+\subsection{The most up-to-date code:}
+\label{installation:the-most-up-to-date-code}\begin{itemize}
+\item {}
+\href{https://pypi.python.org/pypi/ruffus}{Download the latest sources} or
+
+\item {}
+Check out the latest code from Google using git:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+git clone https://bunbun68@code.google.com/p/ruffus/ .
+\end{Verbatim}
+
+\item {}
+Bleeding edge Ruffus development takes place on github:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+git clone git at github.com:bunbun/ruffus.git .
+\end{Verbatim}
+
+\item {}
+To install after downloading, change to the , type:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+python ./setup.py install
+\end{Verbatim}
+
+\end{itemize}
+
+
+\subsubsection{Graphical flowcharts}
+\label{installation:graphical-flowcharts}\begin{quote}
+
+\textbf{Ruffus} relies on the \code{dot} programme from \href{http://www.graphviz.org/}{Graphviz}
+(``Graph visualisation'') to make pretty flowchart representations of your pipelines in multiple
+graphical formats (e.g. \code{png}, \code{jpg}). The crossplatform Graphviz package can be
+\href{http://www.graphviz.org/Download.php}{downloaded here} for Windows,
+Linux, Macs and Solaris. Some Linux
+distributions may include prebuilt packages.
+\begin{description}
+\item[{For Fedora, try}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+yum list 'graphviz*'
+\end{Verbatim}
+
+\item[{For ubuntu / Debian, try}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+sudo apt-get install graphviz
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+
+\section{\textbf{Ruffus} Manual: List of Chapters and Example code}
+\label{tutorials/new_tutorial/manual_contents:ruffus-manual-list-of-chapters-and-example-code}\label{tutorials/new_tutorial/manual_contents::doc}\label{tutorials/new_tutorial/manual_contents:new-manual-table-of-contents}\begin{quote}
+
+Download as \code{pdf}.
+\begin{itemize}
+\item {}
+\textbf{Chapter 1}: {\hyperref[tutorials/new_tutorial/introduction:new-manual-introduction]{\emph{An introduction to basic Ruffus syntax}}}
+
+\item {}
+\textbf{Chapter 2}: {\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{Transforming data in a pipeline with @transform}}}
+
+\item {}
+\textbf{Chapter 3}: {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel]{\emph{More on @transform-ing data}}}
+
+\item {}
+\textbf{Chapter 4}: {\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{Creating files with @originate}}}
+
+\item {}
+\textbf{Chapter 5}: {\hyperref[tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout]{\emph{Understanding how your pipeline works with pipeline\_printout()}}}
+
+\item {}
+\textbf{Chapter 6}: {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{Running Ruffus from the command line with ruffus.cmdline}}}
+
+\item {}
+\textbf{Chapter 7}: {\hyperref[tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph]{\emph{Displaying the pipeline visually with pipeline\_printout\_graph()}}}
+
+\item {}
+\textbf{Chapter 8}: {\hyperref[tutorials/new_tutorial/output_file_names:new-manual-output-file-names]{\emph{Specifying output file names with formatter() and regex()}}}
+
+\item {}
+\textbf{Chapter 9}: {\hyperref[tutorials/new_tutorial/mkdir:new-manual-mkdir]{\emph{Preparing directories for output with @mkdir}}}
+
+\item {}
+\textbf{Chapter 10}: {\hyperref[tutorials/new_tutorial/checkpointing:new-manual-checkpointing]{\emph{Checkpointing: Interrupted Pipelines and Exceptions}}}
+
+\item {}
+\textbf{Chapter 11}: {\hyperref[tutorials/new_tutorial/decorators_compendium:new-manual-decorators-compendium]{\emph{Pipeline topologies and a compendium of Ruffus decorators}}}
+
+\item {}
+\textbf{Chapter 12}: {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{Splitting up large tasks / files with @split}}}
+
+\item {}
+\textbf{Chapter 13}: {\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{@merge multiple input into a single result}}}
+
+\item {}
+\textbf{Chapter 15}: {\hyperref[tutorials/new_tutorial/logging:new-manual-logging]{\emph{Logging progress through a pipeline}}}
+
+\item {}
+\textbf{Chapter 14}: {\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-multiprocessing]{\emph{Multiprocessing, drmaa and Computation Clusters}}}
+
+\item {}
+\textbf{Chapter 16}: {\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-subdivide-collate]{\emph{@subdivide tasks to run efficiently and regroup with @collate}}}
+
+\item {}
+\textbf{Chapter 17}: {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinatorics]{\emph{@combinations, @permutations and all versus all @product}}}
+
+\item {}
+\textbf{Chapter 18}: {\hyperref[tutorials/new_tutorial/active_if:new-manual-active-if]{\emph{Turning parts of the pipeline on and off at runtime with @active\_if}}}
+
+\item {}
+\textbf{Chapter 20}: {\hyperref[tutorials/new_tutorial/inputs:new-manual-inputs]{\emph{Manipulating task inputs via string substitution with inputs() and add\_inputs()}}}
+
+\item {}
+\textbf{Chapter 19}: {\hyperref[tutorials/new_tutorial/posttask:new-manual-posttask]{\emph{Signal the completion of each stage of our pipeline with @posttask}}}
+
+\item {}
+\textbf{Chapter 21}: {\hyperref[tutorials/new_tutorial/onthefly:new-manual-on-the-fly]{\emph{Esoteric: Generating parameters on the fly with @files}}}
+
+\item {}
+\textbf{Chapter 22}: {\hyperref[tutorials/new_tutorial/parallel:new-manual-deprecated-parallel]{\emph{Esoteric: Running jobs in parallel without files using @parallel}}}
+
+\item {}
+\textbf{Chapter 23}: {\hyperref[tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate]{\emph{Esoteric: Writing custom functions to decide which jobs are up to date with @check\_if\_uptodate}}}
+
+\item {}
+\textbf{Appendix 1} {\hyperref[tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours]{\emph{Flow Chart Colours with pipeline\_printout\_graph}}}
+
+\item {}
+\textbf{Appendix 2} {\hyperref[tutorials/new_tutorial/dependencies:new-manual-dependencies]{\emph{Under the hood: How dependency works}}}
+
+\item {}
+\textbf{Appendix 3} {\hyperref[tutorials/new_tutorial/exceptions:new-manual-exceptions]{\emph{Exceptions thrown inside pipelines}}}
+
+\item {}
+\textbf{Appendix 4} {\hyperref[tutorials/new_tutorial/list_of_ruffus_names:new-manual-ruffus-names]{\emph{Names (keywords) exported from Ruffus}}}
+
+\item {}
+\textbf{Appendix 5}: {\hyperref[tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files]{\emph{Legacy and deprecated syntax @files}}}
+
+\item {}
+\textbf{Appendix 6}: {\hyperref[tutorials/new_tutorial/deprecated_files_re:new-manual-deprecated-files-re]{\emph{Legacy and deprecated syntax @files\_re}}}
+
+\end{itemize}
+\end{quote}
+
+\textbf{Ruffus} Manual: List of Example Code for Each Chapter:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/introduction_code:new-manual-introduction-code]{\emph{Chapter 1: Python Code for An introduction to basic Ruffus syntax}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/transform_code:new-manual-transform-code]{\emph{Chapter 1: Python Code for Transforming data in a pipeline with @transform}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-code]{\emph{Chapter 3: Python Code for More on @transform-ing data}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/originate_code:new-manual-originate-code]{\emph{Chapter 4: Python Code for Creating files with @originate}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-code]{\emph{Chapter 5: Python Code for Understanding how your pipeline works with pipeline\_printout(...)}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/pipeline_printout_graph_code:new-manual-pipeline-printout-graph-code]{\emph{Chapter 7: Python Code for Displaying the pipeline visually with pipeline\_printout\_graph(...)}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/output_file_names_code:new-manual-output-file-names-code]{\emph{Chapter 8: Python Code for Specifying output file names with formatter() and regex()}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/mkdir_code:new-manual-mkdir-code]{\emph{Chapter 9: Python Code for Preparing directories for output with @mkdir()}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/checkpointing_code:new-manual-checkpointing-code]{\emph{Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/split_code:new-manual-split-code]{\emph{Chapter 12: Python Code for Splitting up large tasks / files with @split}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/merge_code:new-manual-merge-code]{\emph{Chapter 13: Python Code for @merge multiple input into a single result}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-code]{\emph{Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/logging_code:new-manual-logging-code]{\emph{Chapter 15: Python Code for Logging progress through a pipeline}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/subdivide_collate_code:new-manual-subdivide-collate-code]{\emph{Chapter 16: Python Code for @subdivide tasks to run efficiently and regroup with @collate}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/combinatorics_code:new-manual-combinatorics-code]{\emph{Chapter 17: Python Code for @combinations, @permutations and all versus all @product}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/inputs_code:new-manual-inputs-code]{\emph{Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add\_inputs()}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-code]{\emph{Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files}}}
+
+\end{itemize}
+
+
+
+
+
+\index{overview!Tutorial}\index{Tutorial!overview}
+
+\section{\textbf{Chapter 1}: An introduction to basic \emph{Ruffus} syntax}
+\label{tutorials/new_tutorial/introduction:new-manual-introduction}\label{tutorials/new_tutorial/introduction:index-0}\label{tutorials/new_tutorial/introduction::doc}\label{tutorials/new_tutorial/introduction:new-manual-introduction-chapter-num-an-introduction-to-basic-ruffus-syntax}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/introduction:overview}\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{theoretical_pipeline_schematic.png}}
+
+Computational pipelines transform your data in stages until the final result is produced.
+One easy way to understand pipelines is by imagining your data flowing across a series of
+pipes until it reaches its final destination. Even quite complicated processes can be
+broken into simple stages. Of course, it helps to visualise the whole process.
+
+\emph{Ruffus} is a way of automating the plumbing in your pipeline: You supply the python functions
+which perform the data transformation, and tell \emph{Ruffus} how these pipeline \code{task} functions
+are connected up. \emph{Ruffus} will make sure that the right data flows down your pipeline in the
+right way at the right time.
+
+\begin{notice}{note}{Note:}
+\emph{Ruffus} refers to each stage of your pipeline as a {\hyperref[glossary:term-task]{\emph{task}}}.
+\end{notice}
+\end{quote}
+\phantomsection\label{tutorials/new_tutorial/introduction:new-manual-introduction-import}
+\index{importing ruffus}
+
+\subsection{Importing \emph{Ruffus}}
+\label{tutorials/new_tutorial/introduction:index-1}\label{tutorials/new_tutorial/introduction:importing-ruffus}\begin{quote}
+
+The most convenient way to use \emph{Ruffus} is to import the various names directly:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\end{Verbatim}
+\end{quote}
+
+This will allow \emph{Ruffus} terms to be used directly in your code. This is also
+the style we have adopted for this manual.
+\begin{description}
+\item[{If any of these clash with names in your code, you can use qualified names instead:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{ruffus}
+
+\PYG{n}{ruffus}\PYG{o}{.}\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{...}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\emph{Ruffus} uses only standard python syntax.
+
+There is no need to install anything extra or to have your script ``preprocessed'' to run
+your pipeline.
+\end{quote}
+
+
+\subsection{\emph{Ruffus} decorators}
+\label{tutorials/new_tutorial/introduction:ruffus-decorators}\begin{quote}
+
+To let \emph{Ruffus} know that which python functions are part of your pipeline,
+they need to be tagged or annotated using
+\emph{Ruffus} \href{https://docs.python.org/2/glossary.html\#term-decorator}{decorators} .
+
+\href{https://docs.python.org/2/glossary.html\#term-decorator}{Decorators} have been part of the Python language since version 2.4.
+Common examples from the standard library include \href{https://docs.python.org/2/library/functions.html\#staticmethod}{@staticmethod} and
+\href{https://docs.python.org/2/library/functions.html\#classmethod}{classmethod}.
+
+\href{https://docs.python.org/2/glossary.html\#term-decorator}{decorators} start with a \code{@}
+prefix, and take a number of parameters in parenthesis, much like in a function call.
+
+\href{https://docs.python.org/2/glossary.html\#term-decorator}{decorators} are placed before a normal python function.
+\begin{quote}
+
+\includegraphics{tutorial_step1_decorator_syntax.png}
+\end{quote}
+
+Multiple decorators can be stacked as necessary in whichever order:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{another\PYGZus{}task}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{n+nb}{range}\PYG{p}{(}\PYG{l+m+mi}{5}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+\emph{Ruffus} \href{https://docs.python.org/2/glossary.html\#term-decorator}{decorators} do not
+otherwise alter the underlying function. These can still be called normally.
+\end{quote}
+
+
+\subsection{Your first \emph{Ruffus} pipeline}
+\label{tutorials/new_tutorial/introduction:your-first-ruffus-pipeline}
+
+\subsubsection{1. Write down the file names}
+\label{tutorials/new_tutorial/introduction:write-down-the-file-names}\begin{quote}
+
+\emph{Ruffus} is designed for data moving through a computational pipeline as a series of files.
+
+It is also possible to use \emph{Ruffus} pipelines without using intermediate data files but for your
+first efforts, it is probably best not to subvert its canonical design.
+
+The first thing when designing a new \emph{Ruffus} pipeline is to sketch out the set of file names for
+the pipeline on paper:
+\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{tutorial_ruffus_files.jpg}}
+\end{quote}
+\begin{description}
+\item[{Here we have a number of DNA sequence files (\code{*.fasta})}] \leavevmode\begin{enumerate}
+\item {}
+mapped to a genome (\code{*.sam}), and
+
+\item {}
+compressed (\code{*.bam}) before being
+
+\item {}
+summarised statistically (\code{*.statistics})
+
+\end{enumerate}
+
+\end{description}
+
+The first striking thing is that all of the files following the same \textbf{consistent naming scheme}.
+
+\begin{notice}{note}{Note:}
+\DUspan{highlight-red}{The most important part of a Ruffus pipeline is to have a consistent naming scheme for your files.}
+
+This allows you to build sane pipelines.
+\end{notice}
+
+In this case, each of the files at the same stage share the same file extension, e.g. (\code{.sam}).
+This is usually the simplest and most sensible choice. (We shall see in later chapters
+that \emph{Ruffus} supports more complicated naming patterns so long as they are consistent.)
+\end{quote}
+
+
+\subsubsection{2. Write the python functions for each stage}
+\label{tutorials/new_tutorial/introduction:write-the-python-functions-for-each-stage}\begin{quote}
+
+Next, we can sketch out the python functions which do the actual work for the pipeline.
+\begin{quote}
+
+\begin{notice}{note}{Note:}\begin{enumerate}
+\item {}
+\DUspan{highlight-red}{These are normal python functions with the important proviso that}
+\begin{quote}
+\begin{enumerate}
+\item {}
+The first parameter contains the \textbf{Input} (file names)
+
+\item {}
+The second parameter contains the \textbf{Output} (file names)
+
+\end{enumerate}
+
+You can otherwise supply as many parameters as is required.
+\end{quote}
+
+\item {}
+\DUspan{highlight-red}{Each python function should only take a} \emph{Single} \textbf{Input} at a time
+\begin{quote}
+
+All the parallelism in your pipeline should be handled by \emph{Ruffus}. Make sure
+each function analyses one thing at a time.
+\end{quote}
+
+\end{enumerate}
+\end{notice}
+\end{quote}
+
+\emph{Ruffus} refers to a pipelined function as a {\hyperref[glossary:term-task]{\emph{task}}}.
+
+The code for our three task functions look something like:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 1 fasta\PYGZhy{}\PYGZgt{}sam}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st parameter is Input}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:} \PYG{c}{\PYGZsh{} 2nd parameter is Output}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Sketch of real mapping function}
+\PYG{l+s+sd}{ We can do the mapping ourselves}
+\PYG{l+s+sd}{ or call some other programme:}
+\PYG{l+s+sd}{ os.system(\PYGZdq{}stampy \PYGZpc{}s \PYGZpc{}s...\PYGZdq{} \PYGZpc{} (input\PYGZus{}file, output\PYGZus{}file))}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 2 sam\PYGZhy{}\PYGZgt{}bam}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{def} \PYG{n+nf}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input parameter}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:} \PYG{c}{\PYGZsh{} Output parameter}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Sketch of real compression function}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 3 bam\PYGZhy{}\PYGZgt{}statistics}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{def} \PYG{n+nf}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input parameter}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} Output parameter}
+ \PYG{n}{extra\PYGZus{}stats\PYGZus{}parameter}\PYG{p}{)}\PYG{p}{:} \PYG{c}{\PYGZsh{} Any number of extra parameters as required}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Sketch of real analysis function}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+If we were calling our functions manually, without the benefit of \emph{Ruffus}, we would need
+the following sequence of calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} STAGE 1}
+\PYG{n}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} STAGE 2}
+\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} STAGE 3}
+\PYG{n}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.statistics}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.statistics}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.statistics}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{3. Link the python functions into a pipeline}
+\label{tutorials/new_tutorial/introduction:link-the-python-functions-into-a-pipeline}\begin{quote}
+
+\emph{Ruffus} makes exactly the same function calls on your behalf. However, first, we need to
+tell \emph{Ruffus} what the arguments should be for each of the function calls.
+\begin{itemize}
+\item {}
+The \textbf{Input} is easy: This is either the starting file set (\code{*.fasta}) or whatever is produced
+by the previous stage.
+
+\item {}
+The \textbf{Output} file name is the same as the \textbf{Input} but with the appropriate extension.
+
+\end{itemize}
+
+These are specified using the \emph{Ruffus} {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} decorator as follows:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n}{starting\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 1 fasta\PYGZhy{}\PYGZgt{}sam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{starting\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = starting files}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .fasta}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .sam}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 2 sam\PYGZhy{}\PYGZgt{}bam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = previous stage}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .sam}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .bam}
+\PYG{k}{def} \PYG{n+nf}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 3 bam\PYGZhy{}\PYGZgt{}statistics}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = previous stage}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .bam}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.statistics}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Output suffix = .statistics}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{use\PYGZus{}linear\PYGZus{}model}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Extra statistics parameter}
+\PYG{k}{def} \PYG{n+nf}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}stats\PYGZus{}parameter}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Sketch of real analysis function}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{4. @transform syntax}
+\label{tutorials/new_tutorial/introduction:transform-syntax}\begin{quote}
+\begin{enumerate}
+\item {}
+\begin{DUlineblock}{0em}
+\item[] The 1st parameter for {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} is the \textbf{Input}.
+\item[] This is either the set of starting data or the name of the previous pipeline function.
+\item[] \emph{Ruffus} \emph{chains} together the stages of a pipeline by linking the \textbf{Output} of the previous stage into the \textbf{Input} of the next.
+\end{DUlineblock}
+
+\item {}
+\begin{DUlineblock}{0em}
+\item[] The 2nd parameter is the current {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}}
+\item[] (i.e. our \textbf{Input} file extensions of \code{".fasta"} or \code{".sam"} or \code{".bam"})
+\end{DUlineblock}
+
+\item {}
+\begin{DUlineblock}{0em}
+\item[] The 3rd parameter is what we want our \textbf{Output} file name to be after {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} string substitution (e.g. \code{.fasta - \textgreater{} .sam}).
+\item[] This works because we are using a sane naming scheme for our data files.
+\end{DUlineblock}
+
+\item {}
+Other parameters can be passed to \code{@transform} and they will be forwarded to our python
+pipeline function.
+
+\end{enumerate}
+
+The functions that do the actual work of each stage of the pipeline remain unchanged.
+The role of \emph{Ruffus} is to make sure each is called in the right order,
+with the right parameters, running in parallel (using multiprocessing if desired).
+\end{quote}
+
+\index{pipeline\_run!Tutorial}\index{Tutorial!pipeline\_run}
+
+\subsubsection{5. Run the pipeline!}
+\label{tutorials/new_tutorial/introduction:run-the-pipeline}\label{tutorials/new_tutorial/introduction:index-2}\label{tutorials/new_tutorial/introduction:new-manual-pipeline-run}\begin{quote}
+
+\begin{notice}{note}{Note:}
+\textbf{Key Ruffus Terminology}:
+
+A {\hyperref[glossary:term-task]{\emph{task}}} is an annotated python function which represents a recipe or stage of your pipeline.
+
+A {\hyperref[glossary:term-job]{\emph{job}}} is each time your recipe is applied to a piece of data, i.e. each time \emph{Ruffus} calls your function.
+
+Each \textbf{task} or pipeline recipe can thus have many \textbf{jobs} each of which can work in parallel on different data.
+\end{notice}
+
+Now we can run the pipeline with the \emph{Ruffus} function {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+This produces three sets of results in parallel, as you might expect:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [a.fasta \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{ Job = [b.fasta \PYGZhy{}\PYGZgt{} b.sam] completed}
+\PYG{g+go}{ Job = [c.fasta \PYGZhy{}\PYGZgt{} c.sam] completed}
+\PYG{g+go}{Completed Task = map\PYGZus{}dna\PYGZus{}sequence}
+\PYG{g+go}{ Job = [a.sam \PYGZhy{}\PYGZgt{} a.bam] completed}
+\PYG{g+go}{ Job = [b.sam \PYGZhy{}\PYGZgt{} b.bam] completed}
+\PYG{g+go}{ Job = [c.sam \PYGZhy{}\PYGZgt{} c.bam] completed}
+\PYG{g+go}{Completed Task = compress\PYGZus{}sam\PYGZus{}file}
+\PYG{g+go}{ Job = [a.bam \PYGZhy{}\PYGZgt{} a.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [b.bam \PYGZhy{}\PYGZgt{} b.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [c.bam \PYGZhy{}\PYGZgt{} c.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{Completed Task = summarise\PYGZus{}bam\PYGZus{}file}
+\end{Verbatim}
+\end{quote}
+
+To work out which functions to call, {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}
+finds the \textbf{last} {\hyperref[glossary:term-task]{\emph{task}}} function of your pipeline, then
+works out all the other functions this depends on, working backwards up the chain of
+dependencies automatically.
+
+We can specify this end point of your pipeline explicitly:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{target\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+This allows us to only run part of the pipeline, for example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{target\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\begin{notice}{note}{Note:}
+The {\hyperref[tutorials/new_tutorial/introduction_code:new-manual-introduction-code]{\emph{example code}}} can be copied and pasted into a python
+command shell.
+\end{notice}
+
+\index{transform!Tutorial}\index{Tutorial!transform}
+
+\section{\textbf{Chapter 2}: Transforming data in a pipeline with \emph{@transform}}
+\label{tutorials/new_tutorial/transform:index-0}\label{tutorials/new_tutorial/transform:new-manual-transform-chapter-num-transforming-data-in-a-pipeline-with-transform}\label{tutorials/new_tutorial/transform::doc}\label{tutorials/new_tutorial/transform:new-manual-transform}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} syntax
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/transform_code:new-manual-transform-code]{\emph{Chapter 1: Python Code for Transforming data in a pipeline with @transform}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Review}
+\label{tutorials/new_tutorial/transform:review}\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{theoretical_pipeline_schematic.png}}
+
+Computational pipelines transform your data in stages until the final result is produced.
+Ruffus automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+and tell Ruffus how these pipeline stages or {\hyperref[glossary:term-task]{\emph{task}}} functions are connected together.
+
+\begin{notice}{note}{Note:}
+\textbf{The best way to design a pipeline is to:}
+\begin{itemize}
+\item {}
+\textbf{write down the file names of the data as it flows across your pipeline}
+
+\item {}
+\textbf{write down the names of functions which transforms the data at each stage of the pipeline.}
+
+\end{itemize}
+\end{notice}
+\end{quote}
+
+
+\subsection{Task functions as recipes}
+\label{tutorials/new_tutorial/transform:task-functions-as-recipes}\begin{quote}
+
+Each {\hyperref[glossary:term-task]{\emph{task}}} function of the pipeline is a recipe or
+\href{http://www.gnu.org/software/make/manual/make.html\#Rule-Introduction}{rule}
+which can be applied repeatedly to our data.
+
+For example, one can have
+\begin{itemize}
+\item {}
+a \code{compile()} \emph{task} which will compile any number of source code files, or
+
+\item {}
+a \code{count\_lines()} \emph{task} which will count the number of lines in any file or
+
+\item {}
+an \code{align\_dna()} \emph{task} which will align the DNA of many chromosomes.
+
+\end{itemize}
+\end{quote}
+
+\index{one to one @transform!Tutorial}\index{Tutorial!one to one @transform}
+
+\subsection{\emph{@transform} is a 1 to 1 operation}
+\label{tutorials/new_tutorial/transform:transform-is-a-1-to-1-operation}\label{tutorials/new_tutorial/transform:index-1}\begin{quote}
+
+\code{@transform} is a 1:1 operation because for each input, it generates one output.
+
+\scalebox{0.500000}{\includegraphics{transform_1_to_1_example.png}}
+
+This is obvious when you count the number of jobs at each step. In our example pipeline, there are always
+three jobs moving through in step at each stage ({\hyperref[glossary:term-task]{\emph{task}}}).
+
+Each \textbf{Input} or \textbf{Output} is not limited, however, to a single filename. Each job can accept, for example,
+a pair of files as its \textbf{Input}, or generate more than one file or a dictionary or numbers as its \textbf{Output}.
+
+When each job outputs a pair of files, this does not generate two jobs downstream. It just means that the successive
+{\hyperref[glossary:term-task]{\emph{task}}} in the pipeline will receive a list or tuple of files as its input parameter.
+
+\begin{notice}{note}{Note:}
+The different sort of decorators in Ruffus determine the \emph{topology} of your pipeline,
+i.e. how the jobs from different tasks are linked together seamlessly.
+
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} always generates one \textbf{Output} for one \textbf{Input}.
+
+In the later parts of the tutorial, we will encounter more decorators which can \emph{split up}, or \emph{join together} or \emph{group} inputs.
+
+In other words, using other decorators \textbf{Input} and \textbf{Output} can have \textbf{many to one}, \textbf{many to many} etc. relationships.
+\end{notice}
+\end{quote}
+
+
+\subsubsection{A pair of files as the \textbf{Input}}
+\label{tutorials/new_tutorial/transform:a-pair-of-files-as-the-input}\begin{quote}
+
+Let us rewrite our previous example so that the \textbf{Input} of the first task
+are \href{http://en.wikipedia.org/wiki/DNA\_sequencing\_theory\#Pairwise\_end-sequencing}{matching pairs}
+of DNA sequence files, processed in tandem.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n}{starting\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.2.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.2.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.2.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{]}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 1 fasta\PYGZhy{}\PYGZgt{}sam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{starting\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = starting files}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .1.fastq}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .sam}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} remember there are two input files now}
+ \PYG{n}{ii1} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{)}
+ \PYG{n}{ii2} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+The only changes are to the first task:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{pipeline\PYGZus{}run()}
+\PYG{g+go}{ Job = [[a.1.fastq, a.2.fastq] \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{ Job = [[a.1.fastq, a.2.fastq] \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{ Job = [[a.1.fastq, a.2.fastq] \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{Completed Task = map\PYGZus{}dna\PYGZus{}sequence}
+\end{Verbatim}
+\end{quote}
+
+{\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} always matches only the first file name in each \textbf{Input}.
+\end{quote}
+
+\index{input / output parameters!Tutorial}\index{Tutorial!input / output parameters}
+
+\subsection{\textbf{Input} and \textbf{Output} parameters}
+\label{tutorials/new_tutorial/transform:input-and-output-parameters}\label{tutorials/new_tutorial/transform:index-2}\begin{quote}
+
+\textbf{Ruffus} chains together different tasks by taking the \textbf{Output} from one job
+and plugging it automatically as the \textbf{Input} of the next.
+
+The first two parameters of each job are the \textbf{Input} and \textbf{Output} parameters respectively.
+
+In the above example, we have:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [a.bam \PYGZhy{}\PYGZgt{} a.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [b.bam \PYGZhy{}\PYGZgt{} b.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [c.bam \PYGZhy{}\PYGZgt{} c.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{Completed Task = summarise\PYGZus{}bam\PYGZus{}file}
+\end{Verbatim}
+
+
+\begin{threeparttable}
+\capstart\caption{Parameters for summarise\_bam\_file()}
+
+\begin{tabulary}{\linewidth}{|L|L|L|}
+\hline
+\textbf{\relax
+\textbf{Inputs}
+} & \textbf{\relax
+\textbf{Outputs}
+} & \textbf{\relax
+\textbf{Extra}
+}\\\hline
+
+\code{"a.bam"}
+ &
+\code{"a.statistics"}
+ &
+\code{"use\_linear\_model"}
+\\\hline
+
+\code{"b.bam"}
+ &
+\code{"b.statistics"}
+ &
+\code{"use\_linear\_model"}
+\\\hline
+
+\code{"c.bam"}
+ &
+\code{"c.statistics"}
+ &
+\code{"use\_linear\_model"}
+\\\hline
+\end{tabulary}
+
+\end{threeparttable}
+
+\end{quote}
+
+\textbf{Extra} parameters are for the consumption of \code{summarise\_bam\_file()} and will not passed to the next task.
+
+Ruffus was designed for pipelines which save intermediate data in files. This is not
+compulsory but saving your data in files at each step provides many advantages:
+\begin{enumerate}
+\item {}
+Ruffus can use file system time stamps to check if your pipeline is up to date
+
+\item {}
+Your data is persistent across runs
+
+\item {}
+This is a good way to pass large amounts of data across processes and computational nodes
+
+\end{enumerate}
+
+Nevertheless, \emph{all} the {\hyperref[glossary:term-task]{\emph{task}}} parameters can include anything which suits your workflow, from lists of files, to numbers,
+sets or tuples. \emph{Ruffus} imposes few constraints on what \emph{you}
+would like to send to each stage of your pipeline.
+
+\emph{Ruffus} does, however, assume that if the \textbf{Input} and \textbf{Output} parameter contains strings, these will be interpreted as file names
+required by and produced by that job. As we shall see, the modification times of these file names
+indicate whether that part of the pipeline is up to date or needs to be rerun.
+\end{quote}
+
+\index{transforming in parallel!Tutorial}\index{Tutorial!transforming in parallel}
+
+\section{\textbf{Chapter 3}: More on \texttt{@transform}-ing data}
+\label{tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel-chapter-num-more-on-transform-ing-data}\label{tutorials/new_tutorial/transform_in_parallel:index-0}\label{tutorials/new_tutorial/transform_in_parallel::doc}\label{tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} syntax
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-code]{\emph{Chapter 3: Python Code for More on @transform-ing data}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Review}
+\label{tutorials/new_tutorial/transform_in_parallel:review}\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{theoretical_pipeline_schematic.png}}
+
+Computational pipelines transform your data in stages until the final result is produced.
+\emph{Ruffus} automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+and tell \emph{Ruffus} how these pipeline stages or {\hyperref[glossary:term-task]{\emph{task}}} functions are connected together.
+
+\begin{notice}{note}{Note:}
+\textbf{The best way to design a pipeline is to:}
+\begin{itemize}
+\item {}
+\textbf{write down the file names of the data as it flows across your pipeline}
+
+\item {}
+\textbf{write down the names of functions which transforms the data at each stage of the pipeline.}
+
+\end{itemize}
+\end{notice}
+
+{\hyperref[tutorials/new_tutorial/introduction:new-manual-introduction]{\emph{Chapter 1: An introduction to basic Ruffus syntax}}} described the bare bones of a simple \emph{Ruffus} pipeline.
+
+Using the \emph{Ruffus} {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} decorator, we were able to
+specify the data files moving through our pipeline so that our specified task functions
+could be invoked.
+
+This may seem like a lot of effort and complication for something so simple: a couple of
+simple python function calls we could have invoked ourselves.
+However, By letting \emph{Ruffus} manage your pipeline parameters, you will get the following features
+for free:
+\begin{enumerate}
+\item {}
+Only out-of-date parts of the pipeline will be re-run
+
+\item {}
+Multiple jobs can be run in parallel (on different processors if possible)
+
+\item {}
+Pipeline stages can be chained together automatically. This means you can apply your
+pipeline just as easily to 1000 files as to 3.
+
+\end{enumerate}
+\end{quote}
+
+
+\subsection{Running pipelines in parallel}
+\label{tutorials/new_tutorial/transform_in_parallel:running-pipelines-in-parallel}\begin{quote}
+
+Even though three sets of files have been specified for our initial pipeline, and they can be
+processed completely independently, by default \emph{Ruffus} runs each of them serially in succession.
+
+To ask \emph{Ruffus} to run them in parallel, all you have to do is to add a \code{multiprocess} parameter to \code{pipeline\_run}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+In this case, we are telling \emph{Ruffus} to run a maximum of 5 jobs at the same time. Since we only have
+three sets of data, that is as much parallelism as we are going to get...
+\end{quote}
+
+
+\subsection{Up-to-date jobs are not re-run unnecessarily}
+\label{tutorials/new_tutorial/transform_in_parallel:new-manual-only-rerun-out-of-date}\label{tutorials/new_tutorial/transform_in_parallel:up-to-date-jobs-are-not-re-run-unnecessarily}\begin{quote}
+
+A job will be run only if the output file timestamps are out of date.
+If you ran our example code a second time, nothing would happen because all the work is already complete.
+
+We can check the details by asking \emph{Ruffus} for more \code{verbose} output
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+\PYG{g+go}{ Task = map\PYGZus{}dna\PYGZus{}sequence}
+\PYG{g+go}{ All jobs up to date}
+\PYG{g+go}{ Task = compress\PYGZus{}sam\PYGZus{}file}
+\PYG{g+go}{ All jobs up to date}
+\PYG{g+go}{ Task = summarise\PYGZus{}bam\PYGZus{}file}
+\PYG{g+go}{ All jobs up to date}
+\end{Verbatim}
+\end{quote}
+\begin{description}
+\item[{Nothing happens because:}] \leavevmode\begin{itemize}
+\item {}
+\code{a.sam} was created later than \code{a.1.fastq} and \code{a.2.fastq}, and
+
+\item {}
+\code{a.bam} was created later than \code{a.sam} and
+
+\item {}
+\code{a.statistics} was created later than \code{a.bam}.
+
+\end{itemize}
+
+\end{description}
+
+and so on...
+\begin{description}
+\item[{Let us see what happens if we recreated the file \code{a.1.fastq} so that it appears as if 1 out of the original data files is out of date}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+The up to date jobs are cleverly ignored and only the out of date files are reprocessed.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{ \PYGZgt{}\PYGZgt{}\PYGZgt{} open(\PYGZdq{}a.1.fastq\PYGZdq{}, \PYGZdq{}w\PYGZdq{})}
+\PYG{g+go}{ \PYGZgt{}\PYGZgt{}\PYGZgt{} pipeline\PYGZus{}run(verbose=2)}
+\PYG{g+go}{ Job = [[b.1.fastq, b.2.fastq] \PYGZhy{}\PYGZgt{} b.sam] \PYGZsh{} unnecessary: already up to date}
+\PYG{g+go}{ Job = [[c.1.fastq, c.2.fastq] \PYGZhy{}\PYGZgt{} c.sam] \PYGZsh{} unnecessary: already up to date}
+\PYG{g+go}{ Job = [[a.1.fastq, a.2.fastq] \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{ Completed Task = map\PYGZus{}dna\PYGZus{}sequence}
+\PYG{g+go}{ Job = [b.sam \PYGZhy{}\PYGZgt{} b.bam] \PYGZsh{} unnecessary: already up to date}
+\PYG{g+go}{ Job = [c.sam \PYGZhy{}\PYGZgt{} c.bam] \PYGZsh{} unnecessary: already up to date}
+\PYG{g+go}{ Job = [a.sam \PYGZhy{}\PYGZgt{} a.bam] completed}
+\PYG{g+go}{ Completed Task = compress\PYGZus{}sam\PYGZus{}file}
+\PYG{g+go}{ Job = [b.bam \PYGZhy{}\PYGZgt{} b.statistics, use\PYGZus{}linear\PYGZus{}model] \PYGZsh{} unnecessary: already up to date}
+\PYG{g+go}{ Job = [c.bam \PYGZhy{}\PYGZgt{} c.statistics, use\PYGZus{}linear\PYGZus{}model] \PYGZsh{} unnecessary: already up to date}
+\PYG{g+go}{ Job = [a.bam \PYGZhy{}\PYGZgt{} a.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Completed Task = summarise\PYGZus{}bam\PYGZus{}file}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{output\_from!referring to functions before they are defined}\index{referring to functions before they are defined!output\_from}\index{output\_from!defining tasks out of order}\index{defining tasks out of order!output\_from}
+
+\subsection{Defining pipeline tasks out of order}
+\label{tutorials/new_tutorial/transform_in_parallel:new-manual-output-from}\label{tutorials/new_tutorial/transform_in_parallel:index-1}\label{tutorials/new_tutorial/transform_in_parallel:defining-pipeline-tasks-out-of-order}\begin{quote}
+
+The examples so far assumes that all your pipelined tasks are defined in order.
+(\code{first\_task} before \code{second\_task}). This is usually the most sensible way to arrange your code.
+
+If you wish to refer to tasks which are not yet defined, you can do so by quoting the function name as a string and wrapping
+it with the {\hyperref[decorators/indicator_objects:decorators-indicator-objects]{\emph{indicator class}}} {\hyperref[decorators/indicator_objects:decorators-output-from]{\emph{output\_from(...)}}} so that \emph{Ruffus}
+knowns this is a {\hyperref[glossary:term-task]{\emph{task}}} name, not a file name
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} second task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} task name string wrapped in output\PYGZus{}from(...)}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{output\PYGZus{}from}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} first task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Run}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+You can also refer to tasks (functions) in other modules, in which case the full
+qualified name must be used:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{output\PYGZus{}from}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{other\PYGZus{}module.first\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{@transform!multiple dependencies}\index{multiple dependencies!@transform}
+
+\subsection{Multiple dependencies}
+\label{tutorials/new_tutorial/transform_in_parallel:new-manual-transform-multiple-dependencies}\label{tutorials/new_tutorial/transform_in_parallel:index-2}\label{tutorials/new_tutorial/transform_in_parallel:multiple-dependencies}\begin{quote}
+
+Each task can depend on more than one antecedent simply by chaining to a list in {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} third\PYGZus{}task depends on both first\PYGZus{}task() and second\PYGZus{}task()}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{third\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+
+\code{third\_task()} depends on and follows both \code{first\_task()} and \code{second\_task()}. However, these latter two tasks are independent of each other
+and can and will run in parallel. This can be clearly shown for our example if we added a little randomness to the run time of each job:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+The execution of \code{first\_task()} and \code{second\_task()} jobs will be interleaved and they finish in no particular order:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{third\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} [job3.a.output.1, job3.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job6.a.start, job6.b.start] \PYGZhy{}\PYGZgt{} [job6.a.output.1, job6.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} [job1.a.output.1, job1.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job4.a.start, job4.b.start] \PYGZhy{}\PYGZgt{} [job4.a.output.1, job4.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job5.a.start, job5.b.start] \PYGZhy{}\PYGZgt{} [job5.a.output.1, job5.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} [job2.a.output.1, job2.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\end{Verbatim}
+\end{quote}
+
+\begin{notice}{note}{Note:}
+See the {\hyperref[tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-multiple-dependencies-code]{\emph{example code}}}
+\end{notice}
+\end{quote}
+
+\index{@follow!imposing order with}\index{imposing order with!@follow}
+
+\subsection{\emph{@follows}}
+\label{tutorials/new_tutorial/transform_in_parallel:follows}\label{tutorials/new_tutorial/transform_in_parallel:new-manual-follows}\label{tutorials/new_tutorial/transform_in_parallel:index-3}\begin{quote}
+
+If there is some extrinsic reason one non-dependent task has to precede the other, then this can be specified explicitly using {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} @follows specifies a preceding task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@follows}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{second\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+\end{Verbatim}
+\end{quote}
+
+{\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}} specifies either a preceding task (e.g. \code{first\_task}), or if
+it has not yet been defined, the name (as a string) of a task function (e.g. \code{"first\_task"}).
+
+With the addition of {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}}, all the jobs
+of \code{second\_task()} start \emph{after} those from \code{first\_task()} have finished:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{third\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} [job2.a.output.1, job2.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} [job3.a.output.1, job3.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} [job1.a.output.1, job1.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job4.a.start, job4.b.start] \PYGZhy{}\PYGZgt{} [job4.a.output.1, job4.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job6.a.start, job6.b.start] \PYGZhy{}\PYGZgt{} [job6.a.output.1, job6.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job5.a.start, job5.b.start] \PYGZhy{}\PYGZgt{} [job5.a.output.1, job5.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{@follows!mkdir (Manual)}\index{mkdir!@follows (Manual)}
+
+\subsection{Making directories automatically with \emph{@follows} and \emph{mkdir}}
+\label{tutorials/new_tutorial/transform_in_parallel:index-4}\label{tutorials/new_tutorial/transform_in_parallel:new-manual-follows-mkdir}\label{tutorials/new_tutorial/transform_in_parallel:making-directories-automatically-with-follows-and-mkdir}\begin{quote}
+
+{\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}} is also useful for making sure one or more destination directories
+exist before a task is run.
+
+\emph{Ruffus} provides special syntax to support this, using the special
+{\hyperref[decorators/mkdir:decorators-mkdir]{\emph{mkdir}}} indicator class. For example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} @follows specifies both a preceding task and a directory name}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{mkdir}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output/results/here}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{second\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+\end{Verbatim}
+\end{quote}
+
+Before \code{second\_task()} is run, the \code{output/results/here} directory will be created if necessary.
+\end{quote}
+
+\index{inputs parameters!globs}\index{globs!inputs parameters}\index{globs in input parameters!Tutorial}\index{Tutorial!globs in input parameters}
+
+\subsection{Globs in the \textbf{Input} parameter}
+\label{tutorials/new_tutorial/transform_in_parallel:globs-in-the-input-parameter}\label{tutorials/new_tutorial/transform_in_parallel:index-5}\label{tutorials/new_tutorial/transform_in_parallel:new-manual-globs-as-input}\begin{quote}
+\begin{itemize}
+\item {}
+As a syntactic convenience, \emph{Ruffus} also allows you to specify a \href{http://docs.python.org/library/glob.html}{\emph{glob}} pattern (e.g. \code{*.txt}) in the
+\textbf{Input} parameter.
+
+\item {}
+\href{http://docs.python.org/library/glob.html}{\emph{glob}} patterns will be automatically specify all matching file names as the \textbf{Input}.
+
+\item {}
+Any strings within \textbf{Input} which contain the letters: \code{*?{[}{]}} will be treated as a \href{http://docs.python.org/library/glob.html}{\emph{glob}} pattern.
+
+\end{itemize}
+
+The first function in our initial \emph{Ruffus} pipeline example could have been written as:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 1 fasta\PYGZhy{}\PYGZgt{}sam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = glob}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .fasta}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .sam}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{Mixing tasks, globs and file names!Tutorial}\index{Tutorial!Mixing tasks, globs and file names}
+
+\subsection{Mixing Tasks and Globs in the \textbf{Input} parameter}
+\label{tutorials/new_tutorial/transform_in_parallel:index-6}\label{tutorials/new_tutorial/transform_in_parallel:mixing-tasks-and-globs-in-the-input-parameter}\begin{quote}
+
+\href{http://docs.python.org/library/glob.html}{\emph{glob}} patterns, references to tasks and file names strings
+can be mixed freely in (nested) python lists and tuples in the \textbf{Input} parameter.
+
+For example, a task function can chain to the \textbf{Output} from multiple upstream tasks:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{task2}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = multiple tasks}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{aa*.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{o}{+} \PYG{n+nb}{all} \PYG{n}{files} \PYG{n}{matching} \PYG{n}{glob}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{zz.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{+} \PYG{n+nb}{file} \PYG{n}{name}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .fasta}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .sam}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+In all cases, \emph{Ruffus} tries to do the right thing, and to make the simple or
+obvious case require the simplest, least onerous syntax.
+
+If sometimes \emph{Ruffus} does not behave the way you expect, please write to the authors:
+it may be a bug!
+
+{\hyperref[tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout]{\emph{Chapter 5: Understanding how your pipeline works with pipeline\_printout(...)}}} and
+{\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{Chapter 6: Running Ruffus from the command line with ruffus.cmdline}}} will show you how to
+to make sure that your intentions are reflected in \emph{Ruffus} code.
+\end{quote}
+
+\index{originate!Tutorial}\index{Tutorial!originate}
+
+\section{\textbf{Chapter 4}: Creating files with \texttt{@originate}}
+\label{tutorials/new_tutorial/originate:index-0}\label{tutorials/new_tutorial/originate:new-manual-originate}\label{tutorials/new_tutorial/originate:new-manual-originate-chapter-num-creating-files-with-originate}\label{tutorials/new_tutorial/originate::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/originate:decorators-originate]{\emph{@originate syntax in detail}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/originate_code:new-manual-originate-code]{\emph{Chapter 4: Python Code for Creating files with @originate}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Simplifying our example with \emph{@originate}}
+\label{tutorials/new_tutorial/originate:simplifying-our-example-with-originate}\begin{quote}
+
+Our previous pipeline example started off with a set of files which we had to create first.
+
+This is a common task: pipelines have to start \emph{somewhere}.
+
+Ideally, though, we would only want to create these starting files if they didn't already exist. In other words, we want a sort of \code{@transform} which makes files from nothing (\code{None}?).
+
+This is exactly what {\hyperref[decorators/originate:decorators-originate]{\emph{@originate}}} helps you to do.
+
+Rewriting our pipeline with {\hyperref[decorators/originate:decorators-originate]{\emph{@originate}}} gives the following three steps:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} second task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ Job = [None -\textgreater{} [job1.a.start, job1.b.start]] completed
+ Job = [None -\textgreater{} [job2.a.start, job2.b.start]] completed
+ Job = [None -\textgreater{} [job3.a.start, job3.b.start]] completed
+Completed Task = create\_initial\_file\_pairs
+ Job = [[job1.a.start, job1.b.start] -\textgreater{} job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -\textgreater{} job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -\textgreater{} job3.a.output.1] completed
+Completed Task = first\_task
+ Job = [job1.a.output.1 -\textgreater{} job1.a.output.2] completed
+ Job = [job2.a.output.1 -\textgreater{} job2.a.output.2] completed
+ Job = [job3.a.output.1 -\textgreater{} job3.a.output.2] completed
+Completed Task = second\_task
+\end{Verbatim}
+\end{quote}
+
+\index{pipeline\_printout!Tutorial}\index{Tutorial!pipeline\_printout}
+
+\section{\textbf{Chapter 5}: Understanding how your pipeline works with \emph{pipeline\_printout(...)}}
+\label{tutorials/new_tutorial/pipeline_printout:index-0}\label{tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout}\label{tutorials/new_tutorial/pipeline_printout::doc}\label{tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout-chapter-num-understanding-how-your-pipeline-works-with-pipeline-printout}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} syntax
+
+\item {}
+{\hyperref[tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-code]{\emph{Python Code for this chapter}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}\begin{itemize}
+\item {}
+\textbf{Whether you are learning or developing ruffus pipelines, your best friend is} {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}}
+\textbf{This shows the exact parameters and files as they are passed through the pipeline.}
+
+\item {}
+\textbf{We also} \emph{strongly} \textbf{recommend you use the} \code{Ruffus.cmdline} \textbf{convenience module which}
+\textbf{will take care of all the command line arguments for you. See} {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{Chapter 6: Running Ruffus from the command line with ruffus.cmdline}}}.
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Printing out which jobs will be run}
+\label{tutorials/new_tutorial/pipeline_printout:printing-out-which-jobs-will-be-run}\begin{quote}
+
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} takes the same parameters as pipeline\_run but just prints
+the tasks which are and are not up-to-date.
+
+The \code{verbose} parameter controls how much detail is displayed.
+
+Let us take the pipelined code we previously wrote in
+\textbf{Chapter 3} {\hyperref[tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-code]{\emph{More on @transform-ing data and @originate}}}
+but call {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} instead of
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(...)}}}.
+This lists the tasks which will be run in the pipeline:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{Task = first\PYGZus{}task}
+\PYG{g+go}{Task = second\PYGZus{}task}
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\end{Verbatim}
+\end{quote}
+
+To see the input and output parameters of each job in the pipeline, try increasing the verbosity from the default (\code{1}) to \code{3}
+(See {\hyperref[tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-code]{\emph{code}}})
+
+This is very useful for checking that the input and output parameters have been specified correctly.
+\end{quote}
+
+
+\subsection{Determining which jobs are out-of-date or not}
+\label{tutorials/new_tutorial/pipeline_printout:determining-which-jobs-are-out-of-date-or-not}\begin{quote}
+
+It is often useful to see which tasks are or are not up-to-date. For example, if we
+were to run the pipeline in full, and then modify one of the intermediate files, the
+pipeline would be partially out of date.
+
+Let us start by run the pipeline in full but then modify \code{job1.a.output.1} so that the second task appears out-of-date:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} \PYGZdq{}touch\PYGZdq{} job1.stage1}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{job1.a.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{close}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Run {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} with a verbosity of \code{5}.
+
+This will tell you exactly why \code{second\_task(...)} needs to be re-run:
+because \code{job1.a.output.1} has a file modification time \emph{after} \code{job1.a.output.2} (highlighted):
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which are up\PYGZhy{}to\PYGZhy{}date:}
+
+\PYG{g+go}{Task = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{Task = first\PYGZus{}task}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = second\PYGZus{}task}
+\PYG{g+go}{ Job = [job1.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.output.2]}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} File modification times shown for out of date files}
+\PYG{g+go}{ Job needs update:}
+\PYG{g+go}{ Input files:}
+\PYG{g+go}{ * 22 Jul 2014 15:29:19.33: job1.a.output.1}
+\PYG{g+go}{ Output files:}
+\PYG{g+go}{ * 22 Jul 2014 15:29:07.53: job1.a.output.2}
+
+\PYG{g+go}{ Job = [job2.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.output.2]}
+\PYG{g+go}{ Job = [job3.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.output.2]}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\end{Verbatim}
+\end{quote}
+
+N.B. At a verbosity of 5, even jobs which are up-to-date in \code{second\_task} are displayed.
+\end{quote}
+
+
+\subsection{Verbosity levels}
+\label{tutorials/new_tutorial/pipeline_printout:verbosity-levels}\begin{quote}
+
+The verbosity levels for {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} and {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(...)}}}
+can be specified from \code{verbose = 0} (print out nothing) to the extreme verbosity of \code{verbose=6}. A verbosity of above 10 is reserved for the internal
+debugging of Ruffus
+\begin{itemize}
+\item {}
+level \textbf{0} : \emph{nothing}
+
+\item {}
+level \textbf{1} : \emph{Out-of-date Task names}
+
+\item {}
+level \textbf{2} : \emph{All Tasks (including any task function docstrings)}
+
+\item {}
+level \textbf{3} : \emph{Out-of-date Jobs in Out-of-date Tasks, no explanation}
+
+\item {}
+level \textbf{4} : \emph{Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings}
+
+\item {}
+level \textbf{5} : \emph{All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)}
+
+\item {}
+level \textbf{6} : \emph{All jobs in All Tasks whether out of date or not}
+
+\item {}
+level \textbf{10}: \emph{logs messages useful only for debugging ruffus pipeline code}
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{Abbreviating long file paths with \texttt{verbose\_abbreviated\_path}}
+\label{tutorials/new_tutorial/pipeline_printout:abbreviating-long-file-paths-with-verbose-abbreviated-path}\label{tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout-verbose-abbreviated-path}\begin{quote}
+
+Pipelines often produce interminable lists of deeply nested filenames. It would be nice to be able to abbreviate this
+to just enough information to follow the progress.
+
+The \code{verbose\_abbreviated\_path} parameter specifies that {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} and {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(...)}}} only display
+\begin{quote}
+\begin{enumerate}
+\item {}
+the \code{NNN} th top level sub-directories to be included, or that
+
+\item {}
+the message to be truncated to a specified \code{{}`MMM} characters (to fit onto a line, for example). \code{MMM} is specified by setting \code{verbose\_abbreviated\_path = -MMM}, i.e. negative values.
+
+Note that the number of characters specified is just the separate lengths of the input and output parameters,
+not the entire indented line. You many need to specify a smaller limit that you expect (e.g. \code{60} rather than \emph{80})
+
+\end{enumerate}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{n}{NNN}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{n}{MMM}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\code{verbose\_abbreviated\_path} defaults to \code{2}
+
+For example:
+\begin{quote}
+
+Given \code{{[}"aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"{]}}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{} Original relative paths}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{c}{\PYGZsh{} Full abspath}
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{c}{\PYGZsh{} Specifed level of nested directories}
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[.../dddd.txt, .../gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{2}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[.../cc/dddd.txt, .../ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{3}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+
+ \PYG{c}{\PYGZsh{} Truncated to MMM characters}
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{60}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}???\PYGZgt{} /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Getting a list of all tasks in a pipeline}
+\label{tutorials/new_tutorial/pipeline_printout:getting-a-list-of-all-tasks-in-a-pipeline}\begin{quote}
+
+If you just wanted a list of all tasks (Ruffus decorated function names), then you can
+just run Run {\hyperref[pipeline_functions:pipeline-functions-pipeline-get-task-names]{\emph{pipeline\_get\_task\_names(...)}}}.
+
+This doesn't touch any pipeline code or even check to see if the pipeline is connected up properly.
+
+However, it is sometimes useful to allow users at the command line to choose from a list of
+possible tasks as a target.
+\end{quote}
+
+\index{command line!Tutorial}\index{Tutorial!command line}
+
+\section{\textbf{Chapter 6}: Running \emph{Ruffus} from the command line with ruffus.cmdline}
+\label{tutorials/new_tutorial/command_line:new-manual-cmdline-chapter-num-running-ruffus-from-the-command-line-with-ruffus-cmdline}\label{tutorials/new_tutorial/command_line:index-0}\label{tutorials/new_tutorial/command_line::doc}\label{tutorials/new_tutorial/command_line:new-manual-cmdline}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual table of Contents}}}
+
+\end{itemize}
+
+
+
+We find that much of our \emph{Ruffus} pipeline code is built on the same template and this is generally
+a good place to start developing a new pipeline.
+
+From version 2.4, \emph{Ruffus} includes an optional \code{Ruffus.cmdline} module that provides
+support for a set of common command line arguments. This makes writing \emph{Ruffus} pipelines much more pleasant.
+\phantomsection\label{tutorials/new_tutorial/command_line:new-manual-cmdline-get-argparse}\phantomsection\label{tutorials/new_tutorial/command_line:new-manual-cmdline-run}
+
+\subsection{Template for argparse}
+\label{tutorials/new_tutorial/command_line:template-for-argparse}\label{tutorials/new_tutorial/command_line:new-manual-cmdline-run}\label{tutorials/new_tutorial/command_line:new-manual-cmdline-setup-logging}\label{tutorials/new_tutorial/command_line:new-manual-cmdline-get-argparse}\begin{quote}
+
+All you need to do is copy these 6 lines
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{ruffus.cmdline} \PYG{k+kn}{as} \PYG{n+nn}{cmdline}
+
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{get\PYGZus{}argparse}\PYG{p}{(}\PYG{n}{description}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{WHAT DOES THIS PIPELINE DO?}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} \PYGZlt{}\PYGZlt{}\PYGZlt{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{} add your own command line options like \PYGZhy{}\PYGZhy{}input\PYGZus{}file here}
+\PYG{c}{\PYGZsh{} parser.add\PYGZus{}argument(\PYGZdq{}\PYGZhy{}\PYGZhy{}input\PYGZus{}file\PYGZdq{})}
+
+\PYG{n}{options} \PYG{o}{=} \PYG{n}{parser}\PYG{o}{.}\PYG{n}{parse\PYGZus{}args}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} standard python logger which can be synchronised across concurrent Ruffus tasks}
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} \PYGZlt{}\PYGZlt{}\PYGZlt{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{} pipelined functions go here}
+
+\PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{run} \PYG{p}{(}\PYG{n}{options}\PYG{p}{)}
+\end{Verbatim}
+
+You are recommended to use the standard \href{http://docs.python.org/2.7/library/argparse.html}{argparse} module
+but the deprecated \href{http://docs.python.org/2.7/library/optparse.html}{optparse} module works as well. (See {\hyperref[tutorials/new_tutorial/command_line:code-template-optparse]{\emph{below}}} for the template)
+\end{quote}
+
+
+\subsection{Command Line Arguments}
+\label{tutorials/new_tutorial/command_line:command-line-arguments}\begin{quote}
+
+\code{Ruffus.cmdline} by default provides these predefined options:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYGZhy{}v, \PYGZhy{}\PYGZhy{}verbose
+ \PYGZhy{}\PYGZhy{}version
+\PYGZhy{}L, \PYGZhy{}\PYGZhy{}log\PYGZus{}file
+
+ \PYG{c}{\PYGZsh{} tasks}
+\PYGZhy{}T, \PYGZhy{}\PYGZhy{}target\PYGZus{}tasks
+ \PYGZhy{}\PYGZhy{}forced\PYGZus{}tasks
+\PYGZhy{}j, \PYGZhy{}\PYGZhy{}jobs
+ \PYGZhy{}\PYGZhy{}use\PYGZus{}threads
+
+
+ \PYG{c}{\PYGZsh{} printout}
+\PYGZhy{}n, \PYGZhy{}\PYGZhy{}just\PYGZus{}print
+
+ \PYG{c}{\PYGZsh{} flow chart}
+ \PYGZhy{}\PYGZhy{}flowchart
+ \PYGZhy{}\PYGZhy{}key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph
+ \PYGZhy{}\PYGZhy{}draw\PYGZus{}graph\PYGZus{}horizontally
+ \PYGZhy{}\PYGZhy{}flowchart\PYGZus{}format
+
+
+ \PYG{c}{\PYGZsh{} check sum}
+ \PYGZhy{}\PYGZhy{}touch\PYGZus{}files\PYGZus{}only
+ \PYGZhy{}\PYGZhy{}checksum\PYGZus{}file\PYGZus{}name
+ \PYGZhy{}\PYGZhy{}recreate\PYGZus{}database
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{1) Logging}
+\label{tutorials/new_tutorial/command_line:logging}\begin{quote}
+
+The script provides for logging both to the command line:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}v
+myscript \PYGZhy{}\PYGZhy{}verbose
+\end{Verbatim}
+\end{quote}
+
+and an optional log file:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} keep tabs on yourself}
+myscript \PYGZhy{}\PYGZhy{}log\PYGZus{}file /var/log/secret.logbook
+\end{Verbatim}
+\end{quote}
+
+Logging is ignored if neither \code{-{-}verbose} or \code{-{-}log\_file} are specified on the command line
+
+\code{Ruffus.cmdline} automatically allows you to write to a shared log file via a proxy from multiple processes.
+However, you do need to use \code{logging\_mutex} for the log files to be synchronised properly across different jobs:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{with} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{:}
+
+ \PYG{n}{logger\PYGZus{}proxy}\PYG{o}{.}\PYG{n}{info}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Look Ma. No hands}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Logging is set up so that you can write
+\end{quote}
+
+
+\subsubsection{A) Only to the log file:}
+\label{tutorials/new_tutorial/command_line:a-only-to-the-log-file}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{logger}\PYG{o}{.}\PYG{n}{info}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A message}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{B) Only to the display:}
+\label{tutorials/new_tutorial/command_line:b-only-to-the-display}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{logger}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A message}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{C) To both simultaneously:}
+\label{tutorials/new_tutorial/command_line:c-to-both-simultaneously}\label{tutorials/new_tutorial/command_line:new-manual-cmdline-message}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.cmdline} \PYG{k+kn}{import} \PYG{n}{MESSAGE}
+
+\PYG{n}{logger}\PYG{o}{.}\PYG{n}{log}\PYG{p}{(}\PYG{n}{MESSAGE}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A message}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{2) Tracing pipeline progress}
+\label{tutorials/new_tutorial/command_line:tracing-pipeline-progress}\begin{quote}
+
+This is extremely useful for understanding what is happening with your pipeline, what tasks and which
+jobs are up-to-date etc.
+
+See {\hyperref[tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout]{\emph{Chapter 5: Understanding how your pipeline works with pipeline\_printout(...)}}}
+
+To trace the pipeline, call script with the following options
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} well\PYGZhy{}mannered, reserved}
+myscript \PYGZhy{}\PYGZhy{}just\PYGZus{}print
+myscript \PYGZhy{}n
+
+or
+
+\PYG{c}{\PYGZsh{} extremely loquacious}
+myscript \PYGZhy{}\PYGZhy{}just\PYGZus{}print \PYGZhy{}\PYGZhy{}verbose 5
+myscript \PYGZhy{}n \PYGZhy{}v5
+\end{Verbatim}
+\end{quote}
+
+Increasing levels of verbosity (\code{-{-}verbose} to \code{-{-}verbose 5}) provide more detailed output
+\end{quote}
+
+
+\subsection{3) Printing a flowchart}
+\label{tutorials/new_tutorial/command_line:printing-a-flowchart}\begin{quote}
+
+This is the subject of {\hyperref[tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph]{\emph{Chapter 7: Displaying the pipeline visually with pipeline\_printout\_graph(...)}}}.
+
+Flowcharts can be specified using the following option:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}\PYGZhy{}flowchart xxxchart.svg
+\end{Verbatim}
+\end{quote}
+
+The extension of the flowchart file indicates what format the flowchart should take,
+for example, \code{svg}, \code{jpg} etc.
+
+Override with \code{-{-}flowchart\_format}
+\end{quote}
+
+
+\subsection{4) Running in parallel on multiple processors}
+\label{tutorials/new_tutorial/command_line:running-in-parallel-on-multiple-processors}\begin{quote}
+
+Optionally specify the number of parallel strands of execution and which is the last \emph{target} task to run.
+The pipeline will run starting from any out-of-date tasks which precede the \emph{target} and proceed no further
+beyond the \emph{target}.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}\PYGZhy{}jobs 15 \PYGZhy{}\PYGZhy{}target\PYGZus{}tasks \PYG{l+s+s2}{\PYGZdq{}final\PYGZus{}task\PYGZdq{}}
+myscript \PYGZhy{}j 15
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{5) Setup checkpointing so that \emph{Ruffus} knows which files are out of date}
+\label{tutorials/new_tutorial/command_line:setup-checkpointing-so-that-ruffus-knows-which-files-are-out-of-date}\begin{quote}
+
+The {\hyperref[tutorials/new_tutorial/checkpointing:new-manual-checkpointing]{\emph{checkpoint file}}} uses to the value set in the
+environment (\code{DEFAULT\_RUFFUS\_HISTORY\_FILE}).
+
+If this is not set, it will default to \code{.ruffus\_history.sqlite} in the current working directory.
+
+Either can be changed on the command line:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}\PYGZhy{}checksum\PYGZus{}file\PYGZus{}name mychecksum.sqlite
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Recreating checkpoints}
+\label{tutorials/new_tutorial/command_line:recreating-checkpoints}\begin{quote}
+
+Create or update the checkpoint file so that all existing files in completed jobs appear up to date
+
+Will stop sensibly if current state is incomplete or inconsistent
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{myscript} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZhy{}}\PYG{n}{recreate\PYGZus{}database}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Touch files}
+\label{tutorials/new_tutorial/command_line:touch-files}\begin{quote}
+
+As far as possible, create empty files with the correct timestamp to make the pipeline appear up to date.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}\PYGZhy{}touch\PYGZus{}files\PYGZus{}only
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{6) Skipping specified options}
+\label{tutorials/new_tutorial/command_line:skipping-specified-options}\begin{quote}
+
+Note that particular options can be skipped (not added to the command line), if they conflict with your own options, for example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} see below for how to use get\PYGZus{}argparse}
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{get\PYGZus{}argparse}\PYG{p}{(} \PYG{n}{description}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{WHAT DOES THIS PIPELINE DO?}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} Exclude the following options: \PYGZhy{}\PYGZhy{}log\PYGZus{}file \PYGZhy{}\PYGZhy{}key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph}
+ \PYG{n}{ignored\PYGZus{}args} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{log\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{7) Specifying verbosity and abbreviating long paths}
+\label{tutorials/new_tutorial/command_line:specifying-verbosity-and-abbreviating-long-paths}\begin{quote}
+
+The verbosity can be specified on the command line
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}\PYGZhy{}verbose 5
+
+\PYG{c}{\PYGZsh{} verbosity of 5 + 1 = 6}
+myscript \PYGZhy{}\PYGZhy{}verbose 5 \PYGZhy{}\PYGZhy{}verbose
+
+\PYG{c}{\PYGZsh{} verbosity reset to 2}
+myscript \PYGZhy{}\PYGZhy{}verbose 5 \PYGZhy{}\PYGZhy{}verbose \PYGZhy{}\PYGZhy{}verbose 2
+\end{Verbatim}
+\end{quote}
+
+If the printed paths are too long, and need to be abbreviated, or alternatively, if you want see the full absolute paths of your input and output parameters,
+you can specify an extension to the verbosity. See the manual discussion of {\hyperref[tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout-verbose-abbreviated-path]{\emph{verbose\_abbreviated\_path}}} for
+more details. This is specified as \code{-{-}verbose VERBOSITY:VERBOSE\_ABBREVIATED\_PATH}. (No spaces!)
+
+For example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{} verbosity of 4}
+ myscript.py \PYGZhy{}\PYGZhy{}verbose 4
+
+ \PYG{c}{\PYGZsh{} display three levels of nested directories}
+ myscript.py \PYGZhy{}\PYGZhy{}verbose 4:3
+
+ \PYG{c}{\PYGZsh{} restrict input and output parameters to 60 letters}
+ myscript.py \PYGZhy{}\PYGZhy{}verbose 4:\PYGZhy{}60
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{8) Displaying the version}
+\label{tutorials/new_tutorial/command_line:displaying-the-version}\begin{quote}
+
+Note that the version for your script will default to \code{"\%(prog)s 1.0"} unless specified:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{get\PYGZus{}argparse}\PYG{p}{(} \PYG{n}{description}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{WHAT DOES THIS PIPELINE DO?}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{version} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}programme.py v. 2.23}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Template for optparse}
+\label{tutorials/new_tutorial/command_line:code-template-optparse}\label{tutorials/new_tutorial/command_line:template-for-optparse}\begin{quote}
+
+deprecated since python 2.7
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Using optparse (new in python v 2.6)}
+\PYG{c}{\PYGZsh{}}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{get\PYGZus{}optgparse}\PYG{p}{(}\PYG{n}{version}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZpc{}}\PYG{l+s}{prog 1.0}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{usage} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{ }\PYG{l+s}{\PYGZpc{}}\PYG{l+s}{prog [options]}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} \PYGZlt{}\PYGZlt{}\PYGZlt{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{} add your own command line options like \PYGZhy{}\PYGZhy{}input\PYGZus{}file here}
+\PYG{c}{\PYGZsh{} parser.add\PYGZus{}option(\PYGZdq{}\PYGZhy{}i\PYGZdq{}, \PYGZdq{}\PYGZhy{}\PYGZhy{}input\PYGZus{}file\PYGZdq{}, dest=\PYGZdq{}input\PYGZus{}file\PYGZdq{}, help=\PYGZdq{}Input file\PYGZdq{})}
+
+\PYG{p}{(}\PYG{n}{options}\PYG{p}{,} \PYG{n}{remaining\PYGZus{}args}\PYG{p}{)} \PYG{o}{=} \PYG{n}{parser}\PYG{o}{.}\PYG{n}{parse\PYGZus{}args}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} logger which can be passed to ruffus tasks}
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{this\PYGZus{}program}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} \PYGZlt{}\PYGZlt{}\PYGZlt{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{} pipelined functions go here}
+
+\PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{run} \PYG{p}{(}\PYG{n}{options}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{pipeline\_printout\_graph!Tutorial}\index{Tutorial!pipeline\_printout\_graph}
+
+\section{\textbf{Chapter 7}: Displaying the pipeline visually with \emph{pipeline\_printout\_graph(...)}}
+\label{tutorials/new_tutorial/pipeline_printout_graph:index-0}\label{tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph-chapter-num-displaying-the-pipeline-visually-with-pipeline-printout-graph}\label{tutorials/new_tutorial/pipeline_printout_graph::doc}\label{tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph(...)}}} syntax
+
+\item {}
+{\hyperref[decorators/graphviz:decorators-graphviz]{\emph{@graphviz(...)}}} syntax
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/pipeline_printout_graph_code:new-manual-pipeline-printout-graph-code]{\emph{Chapter 7: Python Code for Displaying the pipeline visually with pipeline\_printout\_graph(...)}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Printing out a flowchart of our pipeline}
+\label{tutorials/new_tutorial/pipeline_printout_graph:printing-out-a-flowchart-of-our-pipeline}\begin{quote}
+
+It is all very well being able to trace the data flow through the pipeline as text.
+Sometimes, however, we need a bit of eye-candy!
+
+We can see a flowchart for our fledgling pipeline by executing:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{flowchart.svg}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{svg}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{no\PYGZus{}key\PYGZus{}legend} \PYG{o}{=} \PYG{n+nb+bp}{False}\PYG{p}{)}
+\end{Verbatim}
+
+\scalebox{0.700000}{\includegraphics{simple_tutorial_stage5_flowchart.png}}
+\end{quote}
+
+Flowcharts can be printed in a large number of formats including \code{jpg}, \code{svg}, \code{png} and \code{pdf}.
+
+\begin{notice}{note}{Note:}
+Flowcharts rely on the \code{dot} programme from \href{http://www.graphviz.org/}{Graphviz}.
+
+Please make sure this is installed.
+\end{notice}
+
+There are 8 standard colour schemes, but you can further customise all the colours to your satisfaction:
+\begin{quote}
+
+\includegraphics{flowchart_colour_schemes.png}
+\end{quote}
+
+See {\hyperref[tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours]{\emph{here}}} for example code.
+\end{quote}
+
+
+\subsection{Command line options made easier with \texttt{ruffus.cmdline}}
+\label{tutorials/new_tutorial/pipeline_printout_graph:command-line-options-made-easier-with-ruffus-cmdline}\begin{quote}
+
+If you are using \code{ruffus.cmdline}, then you can easily ask for a flowchart from the command line:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+your\PYGZus{}script.py \PYGZhy{}\PYGZhy{}flowchart pipeline\PYGZus{}flow\PYGZus{}chart.png
+\end{Verbatim}
+\end{quote}
+
+The output format is deduced from the extension but can be specified manually:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} specify format. Otherwise, deduced from the extension}
+your\PYGZus{}script.py \PYGZhy{}\PYGZhy{}flowchart pipeline\PYGZus{}flow\PYGZus{}chart.png \PYGZhy{}\PYGZhy{}flowchart\PYGZus{}format png
+\end{Verbatim}
+\end{quote}
+
+Print the flow chart horizontally or vertically...
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} flowchart proceeds from left to right , rather than from top to bottom}
+your\PYGZus{}script.py \PYGZhy{}\PYGZhy{}flowchart pipeline\PYGZus{}flow\PYGZus{}chart.png \PYGZhy{}\PYGZhy{}draw\PYGZus{}graph\PYGZus{}horizontally
+\end{Verbatim}
+\end{quote}
+
+...with or without a key legend
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} Draw key legend}
+your\PYGZus{}script.py \PYGZhy{}\PYGZhy{}flowchart pipeline\PYGZus{}flow\PYGZus{}chart.png \PYGZhy{}\PYGZhy{}key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Horribly complicated pipelines!}
+\label{tutorials/new_tutorial/pipeline_printout_graph:horribly-complicated-pipelines}\begin{quote}
+
+Flowcharts are especially useful if you have really complicated pipelines, such as
+\begin{quote}
+
+\scalebox{0.700000}{\includegraphics{simple_tutorial_complex_flowchart.png}}
+\end{quote}
+\end{quote}
+
+
+\subsection{Circular dependency errors in pipelines!}
+\label{tutorials/new_tutorial/pipeline_printout_graph:circular-dependency-errors-in-pipelines}\begin{quote}
+
+Especially, if the pipeline is not set up properly, and vicious circular dependencies
+are present:
+\begin{quote}
+
+\scalebox{0.700000}{\includegraphics{simple_tutorial_complex_flowchart_error.png}}
+\end{quote}
+\end{quote}
+
+
+\subsection{\texttt{@graphviz}: Customising the appearance of each task}
+\label{tutorials/new_tutorial/pipeline_printout_graph:graphviz-customising-the-appearance-of-each-task}\begin{quote}
+
+The graphic for each task can be further customised as you please by adding
+\href{http://www.graphviz.org/doc/info/attrs.html}{graphviz attributes} such as the URL, shape, colour
+directly to that node using the decorator \code{{}`@graphviz}.
+
+For example, we can customise the graphic for \code{myTask()} to look like:
+\begin{quote}
+
+\scalebox{0.300000}{\includegraphics{history_html_flowchart2.png}}
+\end{quote}
+
+by adding the requisite attributes as follows:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@graphviz}\PYG{p}{(}\PYG{n}{URL}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{http://cnn.com}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{fillcolor} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FFCCCC}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{color} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF0000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{pencolor}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF0000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{fontcolor}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}4B6000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{label\PYGZus{}suffix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{???}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{label\PYGZus{}prefix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{What is this?\PYGZlt{}BR/\PYGZgt{} }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{label} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}What \PYGZlt{}FONT COLOR=}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s}{red}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s}{\PYGZgt{}is\PYGZlt{}/FONT\PYGZgt{}this\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{shape}\PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{component}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{height} \PYG{o}{=} \PYG{l+m+mf}{1.5}\PYG{p}{,} \PYG{n}{peripheries} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,}
+ \PYG{n}{style}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task2}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} Can use dictionary if you wish...}
+\PYG{n}{graphviz\PYGZus{}params} \PYG{o}{=} \PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{URL}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{http://cnn.com}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF00FF}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{\PYGZcb{}}
+\PYG{n+nd}{@graphviz}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n}{graphviz\PYGZus{}params}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{myTask}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,}\PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+You can even using HTML formatting in task names, including specifying line wraps (as in the above example),
+using the \code{label} parameter. However, HTML labels \textbf{must} be enclosed in \code{\textless{}} and \code{\textgreater{}}.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{label} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}Line \PYGZlt{}BR/\PYGZgt{} wrapped task\PYGZus{}name()\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+Otherwise, you can also opt to keep the task name and wrap it with a prefix and suffix:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{label\PYGZus{}suffix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{??? }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{label\PYGZus{}prefix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{: What is this?}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\begin{description}
+\item[{The \code{URL} attribute allows the generation of clickable svg, and also client / server}] \leavevmode
+side image maps usable in web pages.
+See \href{http://www.graphviz.org/content/output-formats\#dimap}{Graphviz documentation}
+
+\end{description}
+\end{quote}
+
+\index{formatter!Tutorial}\index{Tutorial!formatter}\index{suffix!Tutorial}\index{Tutorial!suffix}\index{regex!Tutorial}\index{Tutorial!regex}\index{output file names!Tutorial}\index{Tutorial!output file names}
+
+\section{\textbf{Chapter 8}: Specifying output file names with \emph{formatter()} and \emph{regex()}}
+\label{tutorials/new_tutorial/output_file_names:new-manual-output-file-names-chapter-num-specifying-output-file-names-with-formatter-and-regex}\label{tutorials/new_tutorial/output_file_names:index-0}\label{tutorials/new_tutorial/output_file_names:new-manual-output-file-names}\label{tutorials/new_tutorial/output_file_names::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} syntax
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/output_file_names_code:new-manual-output-file-names-code]{\emph{Chapter 8: Python Code for Specifying output file names with formatter() and regex()}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Review}
+\label{tutorials/new_tutorial/output_file_names:review}\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{theoretical_pipeline_schematic.png}}
+
+Computational pipelines transform your data in stages until the final result is produced.
+The most straightforward way to use Ruffus is to hold the intermediate results after each stage
+in a series of files with related file names.
+
+Part of telling Ruffus how these pipeline stages or {\hyperref[glossary:term-task]{\emph{task}}} functions are connected
+together is to write simple rules for how to the file names for each stage follow on from each other.
+Ruffus helps you to specify these file naming rules.
+
+\begin{notice}{note}{Note:}
+\textbf{The best way to design a pipeline is to:}
+\begin{itemize}
+\item {}
+\textbf{Write down the file names of the data as it flows across your pipeline.}
+Do these file names follow a \emph{pattern} ?
+
+\item {}
+\textbf{Write down the names of functions which transforms the data at each stage of the pipeline.}
+
+\end{itemize}
+\end{notice}
+\end{quote}
+
+
+\subsection{A different file name \emph{suffix()} for each pipeline stage}
+\label{tutorials/new_tutorial/output_file_names:new-manual-suffix}\label{tutorials/new_tutorial/output_file_names:a-different-file-name-suffix-for-each-pipeline-stage}\begin{quote}
+
+The easiest and cleanest way to write Ruffus pipelines is to use a different suffix
+for each stage of your pipeline.
+
+We used this approach in {\hyperref[tutorials/new_tutorial/introduction:new-manual-introduction]{\emph{Chapter 1: An introduction to basic Ruffus syntax}}} and in {\hyperref[tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-code]{\emph{code}}} from {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel]{\emph{Chapter 3: More on @transform-ing data}}}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{}Task Name: File suffices}
+ \PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{} \PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}
+ create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs *.start
+ first\PYGZus{}task *.output.1
+ second\PYGZus{}task *.output.2
+\end{Verbatim}
+
+There is a long standing convention of using file suffices to denote file type: For example, a \textbf{``compile''} task might convert \textbf{source} files of type \code{*.c} to \textbf{object} files of type \code{*.o}.
+\begin{description}
+\item[{We can think of Ruffus tasks comprising :}] \leavevmode\begin{itemize}
+\item {}
+recipes in \code{@transform(...)} for transforming file names: changing \code{.c} to a \code{.o} (e.g. \code{AA.c -\textgreater{} AA.o} \code{BB.c -\textgreater{} BB.o})
+
+\item {}
+recipes in a task function \code{def foo\_bar()} for transforming your data: from \textbf{source} \code{.c} to \textbf{object} \code{.o}
+
+\end{itemize}
+
+\end{description}
+
+Let us review the Ruffus syntax for doing this:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ at transform\PYG{o}{(} create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs, \PYG{c}{\PYGZsh{} Input: Name of previous task(s)}
+ suffix\PYG{o}{(}\PYG{l+s+s2}{\PYGZdq{}.start\PYGZdq{}}\PYG{o}{)}, \PYG{c}{\PYGZsh{} Matching suffix}
+ \PYG{l+s+s2}{\PYGZdq{}.output.1\PYGZdq{}}\PYG{o}{)} \PYG{c}{\PYGZsh{} Replacement string}
+def first\PYGZus{}task\PYG{o}{(}input\PYGZus{}files, output\PYGZus{}file\PYG{o}{)}:
+ with open\PYG{o}{(}output\PYGZus{}file, \PYG{l+s+s2}{\PYGZdq{}w\PYGZdq{}}\PYG{o}{)}: pass
+\end{Verbatim}
+\end{quote}
+\begin{enumerate}
+\item {}
+\textbf{Input}:
+\begin{quote}
+\begin{description}
+\item[{The first parameter for \code{@transform} can be a mixture of one or more:}] \leavevmode\begin{itemize}
+\item {}
+previous tasks (e.g. \code{create\_initial\_file\_pairs})
+
+\item {}
+file names (all python strings are treated as paths)
+
+\item {}
+glob specifications (e.g \code{*.c}, \code{/my/path/*.foo})
+
+\end{itemize}
+
+\end{description}
+
+Each element provides an input for the task. So if the previous task \code{create\_initial\_file\_pairs} has five outputs, the next \code{@transform} task will accept
+these as five separate inputs leading to five independent jobs.
+\end{quote}
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}}:
+\begin{quote}
+
+The second parameter \code{suffix(".start")} must match the end of the first string in each input.
+For example, \code{create\_initial\_file\_pairs} produces the list \code{{[}'job1.a.start', 'job1.b.start'{]}}, then \code{suffix(".start")} must matches the first string, i.e. \code{'job1.a.start'}.
+If the input is nested structure, this would be iterated through recursively to find the first string.
+
+\begin{notice}{note}{Note:}
+Inputs which do not match the suffix are discarded altogether.
+\end{notice}
+\end{quote}
+
+\item {}
+\textbf{Replacement}:
+\begin{quote}
+
+The third parameter is the replacement for the suffix.
+The pair of input strings in the step3 example produces the following output parameter
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{input\PYGZus{}parameters} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}
+\PYG{n}{matching\PYGZus{}input} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}
+\PYG{n}{output\PYGZus{}parameter} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.output.1}\PYG{l+s}{\PYGZsq{}}
+\end{Verbatim}
+\end{quote}
+
+When the pipeline is run, this results in the following equivalent call to \code{first\_task(...)}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{first\PYGZus{}task}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.output.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{:}
+\end{Verbatim}
+\end{quote}
+
+The replacement parameter can itself be a list or any arbitrary complicated structure:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ at transform\PYG{o}{(}create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs, \PYG{c}{\PYGZsh{} Input}
+ suffix\PYG{o}{(}\PYG{l+s+s2}{\PYGZdq{}.a.start\PYGZdq{}}\PYG{o}{)}, \PYG{c}{\PYGZsh{} Matching suffix}
+ \PYG{o}{[}\PYG{l+s+s2}{\PYGZdq{}.output.a.1\PYGZdq{}}, \PYG{l+s+s2}{\PYGZdq{}.output.b.1\PYGZdq{}}, 45\PYG{o}{]}\PYG{o}{)} \PYG{c}{\PYGZsh{} Replacement list}
+def first\PYGZus{}task\PYG{o}{(}input\PYGZus{}files, output\PYGZus{}parameters\PYG{o}{)}:
+ print \PYG{l+s+s2}{\PYGZdq{}input\PYGZus{}parameters = \PYGZdq{}}, input\PYGZus{}files
+ print \PYG{l+s+s2}{\PYGZdq{}output\PYGZus{}parameters = \PYGZdq{}}, output\PYGZus{}parameters
+\end{Verbatim}
+\end{quote}
+
+In which case, all the strings are used as replacements, other values are left untouched, and we obtain the following:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} job \PYGZsh{}1}
+\PYG{n+nv}{input} \PYG{o}{=} \PYG{o}{[}\PYG{l+s+s1}{\PYGZsq{}job1.a.start\PYGZsq{}}, \PYG{l+s+s1}{\PYGZsq{}job1.b.start\PYGZsq{}}\PYG{o}{]}
+\PYG{n+nv}{output} \PYG{o}{=} \PYG{o}{[}\PYG{l+s+s1}{\PYGZsq{}job1.output.a.1\PYGZsq{}}, \PYG{l+s+s1}{\PYGZsq{}job1.output.b.1\PYGZsq{}}, 45\PYG{o}{]}
+
+\PYG{c}{\PYGZsh{} job \PYGZsh{}2}
+\PYG{n+nv}{input} \PYG{o}{=} \PYG{o}{[}\PYG{l+s+s1}{\PYGZsq{}job2.a.start\PYGZsq{}}, \PYG{l+s+s1}{\PYGZsq{}job2.b.start\PYGZsq{}}\PYG{o}{]}
+\PYG{n+nv}{output} \PYG{o}{=} \PYG{o}{[}\PYG{l+s+s1}{\PYGZsq{}job2.output.a.1\PYGZsq{}}, \PYG{l+s+s1}{\PYGZsq{}job2.output.b.1\PYGZsq{}}, 45\PYG{o}{]}
+
+\PYG{c}{\PYGZsh{} job \PYGZsh{}3}
+\PYG{n+nv}{input} \PYG{o}{=} \PYG{o}{[}\PYG{l+s+s1}{\PYGZsq{}job3.a.start\PYGZsq{}}, \PYG{l+s+s1}{\PYGZsq{}job3.b.start\PYGZsq{}}\PYG{o}{]}
+\PYG{n+nv}{output} \PYG{o}{=} \PYG{o}{[}\PYG{l+s+s1}{\PYGZsq{}job3.output.a.1\PYGZsq{}}, \PYG{l+s+s1}{\PYGZsq{}job3.output.b.1\PYGZsq{}}, 45\PYG{o}{]}
+\end{Verbatim}
+\end{quote}
+
+Note how task function is called with the value \code{45} \emph{verbatim} because it is not a string.
+\end{quote}
+
+\end{enumerate}
+\end{quote}
+
+
+\subsection{\emph{formatter()} manipulates pathnames and regular expression}
+\label{tutorials/new_tutorial/output_file_names:formatter-manipulates-pathnames-and-regular-expression}\label{tutorials/new_tutorial/output_file_names:new-manual-formatter}\begin{quote}
+
+{\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} replacement is the cleanest and easiest way to generate suitable output file names for each stage in a pipeline.
+Often, however, we require more complicated manipulations to specify our file names.
+For example,
+\begin{itemize}
+\item {}
+It is common to have to change directories from a \emph{data} directory to a \emph{working} directory as the first step of a pipeline.
+
+\item {}
+Data management can be simplified by separate files from each pipeline stage into their own directory.
+
+\item {}
+Information may have to be decoded from data file names, e.g. \code{"experiment373.IBM.03March2002.txt"}
+
+\end{itemize}
+
+Though {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} is much more powerful, the principle and syntax are the same:
+we take string elements from the \textbf{Input} and perform some replacements to generate the \textbf{Output} parameters.
+
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}}
+\begin{itemize}
+\item {}
+Allows easy manipulation of path subcomponents in the style of \href{http://docs.python.org/2/library/os.path.html\#os.path.split}{os.path.split()}, and \href{http://docs.python.org/2/library/os.path.html\#os.path.basename}{os.path.basename}
+
+\item {}
+Uses familiar python \href{http://docs.python.org/2/library/string.html\#string-formatting}{string.format} syntax (See \href{http://docs.python.org/2/library/string.html\#format-examples}{string.format examples}. )
+
+\item {}
+Supports optional regular expression (\href{http://docs.python.org/2/library/re.html\#re.MatchObject.group}{re}) matches including named captures.
+
+\item {}
+Can refer to any file path (i.e. python string) in each input and is not limited like {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} to the first string.
+
+\item {}
+Can even refer to individual letters within a match
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{Path name components}
+\label{tutorials/new_tutorial/output_file_names:path-name-components}\begin{quote}
+
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} breaks down each input pathname into path name components which can then be recombined in whichever way by the replacement string.
+
+Given an example string of :
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{input\PYGZus{}string} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/directory/to/a/file.name.ext}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+the path components are:
+\begin{itemize}
+\item {}
+\code{basename}: The \href{http://docs.python.org/2/library/os.path.html\#os.path.basename}{base name} \emph{excluding} \href{http://docs.python.org/2/library/os.path.html\#os.path.splitext}{extension}, \code{"file.name"}
+
+\item {}
+\code{ext} : The \href{http://docs.python.org/2/library/os.path.html\#os.path.splitext}{extension}, \code{".ext"}
+
+\item {}
+\code{path} : The \href{http://docs.python.org/2/library/os.path.html\#os.path.dirname}{dirname}, \code{"/directory/to/a"}
+
+\item {}
+\code{subdir} : A list of sub-directories in the \code{path} in reverse order, \code{{[}"a", "to", "directory", "/"{]}}
+
+\item {}
+\code{subpath} : A list of descending sub-paths in reverse order, \code{{[}"/directory/to/a", "/directory/to", "/directory", "/"{]}}
+
+\end{itemize}
+\end{quote}
+
+The replacement string refers to these components by using python \href{http://docs.python.org/2/library/string.html\#string-formatting}{string.format} style curly braces. \code{"\{NAME\}"}
+
+We refer to an element from the Nth input string by index, for example:
+\begin{itemize}
+\item {}
+\code{"\{ext{[}0{]}\}"} is the extension of the first file name string in \textbf{Input}.
+
+\item {}
+\code{"\{basename{[}1{]}\}"} is the basename of the second file name in \textbf{Input}.
+
+\item {}
+\code{"\{basename{[}1{]}{[}0:3{]}\}"} are the first three letters from the basename of the second file name in \textbf{Input}.
+
+\end{itemize}
+
+\code{subdir}, \code{subpath} were designed to help you navigate directory hierachies with the minimum of fuss.
+For example, you might want to graft a hierachical path to another location:
+\code{"\{subpath{[}0{]}{[}2{]}\}/from/\{subdir{[}0{]}{[}0{]}\}/\{basename{[}0{]}\}"} neatly replaces just one directory (\code{"to"}) in the path with another (\code{"from"}):
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{replacement\PYGZus{}string} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][2]\PYGZcb{}/from/\PYGZob{}subdir[0][0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{n}{input\PYGZus{}string} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/directory/to/a/file.name.ext}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{result\PYGZus{}string} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/directory/from/a/file.name.ext}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Filter and parse using regular expressions}
+\label{tutorials/new_tutorial/output_file_names:new-manual-formatter-regex}\label{tutorials/new_tutorial/output_file_names:filter-and-parse-using-regular-expressions}\begin{quote}
+
+\href{http://docs.python.org/2/library/re.html\#re.MatchObject.group}{Regular expression} matches can be used with the similar syntax.
+Our example string can be parsed using the following regular expression:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{input\PYGZus{}string} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/directory/to/a/file.name.ext}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{/directory/(.+)/(?P\PYGZlt{}MYFILENAME\PYGZgt{})}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.ext}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+We capture part of the path using \code{(.+)}, and the base name using \code{(?P\textless{}MYFILENAME\textgreater{})}.
+These \href{http://docs.python.org/2/library/re.html\#re.MatchObject.group}{matching subgroups} can be referred to by index
+but for greater clarity the second named capture can also be referred to by name, i.e. \code{\{MYFILENAME\}}.
+\end{quote}
+
+The regular expression components for the first string can thus be referred to as follows:
+\begin{itemize}
+\item {}
+\code{\{0{[}0{]}\}} : The entire match captured by index, \code{"/directory/to/a/file.name.ext"}
+
+\item {}
+\code{\{1{[}0{]}\}} : The first match captured by index, \code{"to/a"}
+
+\item {}
+\code{\{2{[}0{]}\}} : The second match captured by index, \code{"file.name"}
+
+\item {}
+\code{\{MYFILENAME{[}0{]}\}} : The match captured by name, \code{"file.name"}
+
+\end{itemize}
+
+If each input consists of a list of paths such as \code{{[}'job1.a.start', 'job1.b.start', 'job1.c.start'{]}}, we can match each of them separately
+by using as many regular expressions as necessary. For example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{input\PYGZus{}string} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}
+\PYG{c}{\PYGZsh{} Regular expression matches for 1st, 2nd but not 3rd element}
+\PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Or if you only wanted regular expression matches for the second file name (string), pad with \code{None}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{input\PYGZus{}string} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}
+\PYG{c}{\PYGZsh{} Regular expression matches for 2nd but not 1st or 3rd elements}
+\PYG{n}{formatter}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Using \emph{@transform()} with \emph{formatter()}}
+\label{tutorials/new_tutorial/output_file_names:using-transform-with-formatter}\begin{quote}
+
+We can put these together in the following example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} formatter}
+\PYG{c}{\PYGZsh{}}
+
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job(?P\PYGZlt{}JOBNUMBER\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extract job number}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job[123].b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Match only \PYGZdq{}b\PYGZdq{} files}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement list}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[1]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{45}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}files}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This produces:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{input\PYGZus{}parameters = [\PYGZsq{}job1.a.start\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}job1.b.start\PYGZsq{}]}
+\PYG{g+go}{output\PYGZus{}parameters = [\PYGZsq{}/home/lg/src/temp/jobs1.output.a.1\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}/home/lg/src/temp/jobs1.output.b.1\PYGZsq{}, 45]}
+
+\PYG{g+go}{input\PYGZus{}parameters = [\PYGZsq{}job2.a.start\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}job2.b.start\PYGZsq{}]}
+\PYG{g+go}{output\PYGZus{}parameters = [\PYGZsq{}/home/lg/src/temp/jobs2.output.a.1\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}/home/lg/src/temp/jobs2.output.b.1\PYGZsq{}, 45]}
+\end{Verbatim}
+
+Notice that \code{job3} has \code{'job3.c.start'} as the second file.
+This fails to match the regular expression and is discarded.
+
+\begin{notice}{note}{Note:}
+Failed regular expression mismatches are ignored.
+
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} regular expressions are thus very useful in filtering out all
+files which do not match your specified criteria.
+
+If your some of your task inputs have a mixture of different file types, a simple \code{Formatter(".txt\$")}, for example, will make
+your code a lot simpler...
+\end{notice}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{string substitution for ``extra'' arguments}
+\label{tutorials/new_tutorial/output_file_names:string-substitution-for-extra-arguments}\begin{quote}
+
+The first two arguments for Ruffus task functions are special because they are the \textbf{Input} and \textbf{Output}
+parameters which link different stages of a pipeline.
+
+Python strings in these arguments are names of data files whose modification times indicate whether the pipeline is up to date or not.
+
+Other arguments to task functions are not passed down the pipeline but consumed.
+Any python strings they contain do not need to be file names. These extra arguments are very useful
+for passing data to pipelined tasks, such as shared values, loggers, programme options etc.
+
+One helpful feature is that strings in these extra arguments are also subject to {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} string substitution.
+This means you can leverage the parsing capabilities of Ruffus to decode any information about the pipeline data files,
+These might include the directories you are running in and parts of the file name.
+
+For example, if we would want to know which files go with which ``job number'' in the previous example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} print job number as an extra argument}
+\PYG{c}{\PYGZsh{}}
+
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job(?P\PYGZlt{}JOBNUMBER\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extract job number}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job[123].b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Match only \PYGZdq{}b\PYGZdq{} files}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement list}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[1]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}JOBNUMBER[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}\PYG{p}{,} \PYG{n}{job\PYGZus{}number}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{n}{job\PYGZus{}number}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{:}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}files}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{1 : [\PYGZsq{}job1.a.start\PYGZsq{}, \PYGZsq{}job1.b.start\PYGZsq{}]}
+\PYG{g+go}{2 : [\PYGZsq{}job2.a.start\PYGZsq{}, \PYGZsq{}job2.b.start\PYGZsq{}]}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Changing directories using \emph{formatter()} in a zoo...}
+\label{tutorials/new_tutorial/output_file_names:changing-directories-using-formatter-in-a-zoo}\label{tutorials/new_tutorial/output_file_names:new-manual-output-file-names-formatter-zoo}\begin{quote}
+
+Here is a more fun example. We would like to feed the denizens of a zoo. Unfortunately, the file names for
+these are spread over several directories. Ideally, we would like their food supply to be grouped more
+sensibly. And, of course, we only want to feed the animals, not the plants.
+
+I have colour coded the input and output files for this task to show how we would like to rearrange them:
+\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{simple_tutorial_zoo_animals_formatter_example.jpg}}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Make directories}
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}
+ \PYG{c}{\PYGZsh{} List of animals and plants}
+ \PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.handreared.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.tame.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile/reptiles.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose/flowering.handreared.plants}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Food for the \PYGZob{}tameness:11s\PYGZcb{} \PYGZob{}animal\PYGZus{}name:9s\PYGZcb{} = \PYGZob{}output\PYGZus{}file:90s\PYGZcb{} will be placed in \PYGZob{}new\PYGZus{}directory\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+We can see that the food for each animal are now grouped by clade in the same directory, which makes a lot more sense...
+
+Note how we used \code{subpath{[}0{]}{[}1{]}} to move down one level of the file path to build a new file name.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{Food for the wild crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles}
+\PYG{g+go}{Food for the tame dog = ./mammals/tame.dog.food will be placed in ./mammals}
+\PYG{g+go}{Food for the wild dog = ./mammals/wild.dog.food will be placed in ./mammals}
+\PYG{g+go}{Food for the handreared lion = ./mammals/handreared.lion.food will be placed in ./mammals}
+\PYG{g+go}{Food for the wild lion = ./mammals/wild.lion.food will be placed in ./mammals}
+\PYG{g+go}{Food for the wild tiger = ./mammals/wild.tiger.food will be placed in ./mammals}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{\emph{regex()} manipulates via regular expressions}
+\label{tutorials/new_tutorial/output_file_names:regex-manipulates-via-regular-expressions}\label{tutorials/new_tutorial/output_file_names:new-manual-regex}\begin{quote}
+
+If you are a hard core regular expressions fan, you may want to use {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} instead of {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} or {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}}.
+
+\begin{notice}{note}{Note:}
+{\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} uses regular expressions like {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} but
+\begin{itemize}
+\item {}
+It only matches the first file name in the input. As described above, {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} can match any one or more of the input filename strings.
+
+\item {}
+It does not understand file paths so you may have to perform your own directory / file name parsing.
+
+\item {}
+String replacement uses syntax borrowed from \href{http://docs.python.org/2/library/re.html\#re.sub}{re.sub()}, rather than building a result from parsed regular expression (and file path) components
+
+\end{itemize}
+
+In general {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} is more powerful and was introduced from version 2.4 is intended to be a more user friendly replacement for {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}}.
+\end{notice}
+
+Let us see how the previous zoo example looks with {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}}:
+\begin{quote}
+
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} code:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Food for the \PYGZob{}tameness:11s\PYGZcb{} \PYGZob{}animal\PYGZus{}name:9s\PYGZcb{} = \PYGZob{}output\PYGZus{}file:90s\PYGZcb{} will be placed in \PYGZob{}new\PYGZus{}directory\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+
+{\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} code:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*?/?)(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+)/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}tame\PYGZgt{}.}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Food for the \PYGZob{}tameness:11s\PYGZcb{} \PYGZob{}animal\PYGZus{}name:9s\PYGZcb{} = \PYGZob{}output\PYGZus{}file:90s\PYGZcb{} will be placed in \PYGZob{}new\PYGZus{}directory\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+The regular expression to parse the input file path safely was a bit hairy to write, and it is not
+clear that it handles all edge conditions (e.g. files in the root directory). Apart from that, if the
+limitations of {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} do not preclude its use, then the two approaches
+are not so different in practice.
+\end{quote}
+
+\index{mkdir!Tutorial}\index{Tutorial!mkdir}
+
+\section{\textbf{Chapter 9}: Preparing directories for output with \emph{@mkdir()}}
+\label{tutorials/new_tutorial/mkdir:index-0}\label{tutorials/new_tutorial/mkdir::doc}\label{tutorials/new_tutorial/mkdir:new-manual-mkdir-chapter-num-preparing-directories-for-output-with-mkdir}\label{tutorials/new_tutorial/mkdir:new-manual-mkdir}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/follows:decorators-follows]{\emph{@follows(mkdir()) syntax in detail}}}
+
+\item {}
+{\hyperref[decorators/mkdir:decorators-mkdir]{\emph{@mkdir syntax in detail}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/mkdir_code:new-manual-mkdir-code]{\emph{Chapter 9: Python Code for Preparing directories for output with @mkdir()}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/mkdir:overview}\begin{quote}
+
+In \textbf{Chapter 3}, we saw that we could use {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows-mkdir]{\emph{@follows(mkdir())}}} to
+ensure that output directories exist:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} create\PYGZus{}new\PYGZus{}files() @follows mkdir}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{mkdir}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output/results/here}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output/results/here/a.start\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output/results/here/b.start\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}new\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This ensures that the decorated task follows ({\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows-mkdir]{\emph{@follows}}}) the
+making of the specified directory (\code{mkdir()}).
+
+Sometimes, however, the \textbf{Output} is intended not for any single directory but a group
+of destinations depending on the parsed contents of \textbf{Input} paths.
+\end{quote}
+
+
+\subsection{Creating directories after string substitution in a zoo...}
+\label{tutorials/new_tutorial/mkdir:creating-directories-after-string-substitution-in-a-zoo}\begin{quote}
+
+You may remember {\hyperref[tutorials/new_tutorial/output_file_names:new-manual-output-file-names-formatter-zoo]{\emph{this example}}} from \textbf{Chapter 8}:
+
+We want to feed the denizens of a zoo. The original file names are spread over several directories and we
+group their food supply by the \emph{clade} of the animal in the following manner:
+\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{simple_tutorial_zoo_animals_formatter_example.jpg}}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}40s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}90s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} this blows up}
+ \PYG{c}{\PYGZsh{} open(output\PYGZus{}file, \PYGZdq{}w\PYGZdq{})}
+\end{Verbatim}
+\end{quote}
+
+The example code from \textbf{Chapter 8} is, however, incomplete. If we were to actually create the specified
+files we would realise that we had forgotten to create the destination directories \code{reptiles}, \code{mammals} first!
+\end{quote}
+
+
+\subsubsection{using \emph{formatter()}}
+\label{tutorials/new_tutorial/mkdir:using-formatter}\begin{quote}
+
+We could of course create directories manually.
+However, apart from being tedious and error prone, we have already gone to some lengths
+to parse out the diretories for {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}.
+Why don't we use the same logic to make the directories?
+
+Can you see the parallels between the syntax for {\hyperref[decorators/mkdir:decorators-mkdir]{\emph{@mkdir}}} and {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}?
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} create directories for each clade}
+\PYG{n+nd}{@mkdir}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}) \PYGZsh{} new\PYGZus{}directory}
+
+\PYG{c}{\PYGZsh{} Put animals of each clade in the same directory}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}40s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}90s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} this works now}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+See the {\hyperref[tutorials/new_tutorial/mkdir_code:new-manual-mkdir-code]{\emph{example code}}}
+\end{quote}
+
+
+\subsubsection{using \emph{regex()}}
+\label{tutorials/new_tutorial/mkdir:using-regex}\begin{quote}
+
+If you are particularly fond of using regular expression to parse file paths,
+you could also use {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} create directories for each clade}
+\PYG{n+nd}{@mkdir}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*?)/?(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+)/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+
+\PYG{c}{\PYGZsh{} Put animals of each clade in the same directory}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}40s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}90s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} this works now}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{Up to date!Tutorial}\index{Tutorial!Up to date}\index{Task completion!Tutorial}\index{Tutorial!Task completion}\index{Exceptions!Tutorial}\index{Tutorial!Exceptions}\index{Interrupted Pipeline!Tutorial}\index{Tutorial!Interrupted Pipeline}
+
+\section{\textbf{Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}
+\label{tutorials/new_tutorial/checkpointing:new-manual-checkpointing}\label{tutorials/new_tutorial/checkpointing:index-0}\label{tutorials/new_tutorial/checkpointing::doc}\label{tutorials/new_tutorial/checkpointing:new-manual-checkpointing-chapter-num-checkpointing-interrupted-pipelines-and-exceptions}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/checkpointing_code:new-manual-checkpointing-code]{\emph{Chapter 10: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/checkpointing:overview}\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{theoretical_pipeline_schematic.png}}
+
+Computational pipelines transform your data in stages until the final result is produced.
+
+By default, \emph{Ruffus} uses file modification times for the \textbf{input} and \textbf{output} to determine
+whether each stage of a pipeline is up-to-date or not. But what happens when the task
+function is interrupted, whether from the command line or by error, half way through writing the output?
+
+In this case, the half-formed, truncated and corrupt \textbf{Output} file will look newer than its \textbf{Input} and hence up-to-date.
+\end{quote}
+
+\index{Tutorial!interrupting tasks}\index{interrupting tasks!Tutorial}
+
+\subsection{Interrupting tasks}
+\label{tutorials/new_tutorial/checkpointing:interrupting-tasks}\label{tutorials/new_tutorial/checkpointing:new-manual-interrupting-tasks}\label{tutorials/new_tutorial/checkpointing:index-1}\begin{quote}
+
+Let us try with an example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}\PYG{o}{,} \PYG{n+nn}{time}
+
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} long task to interrupt}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{long\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{ff}\PYG{p}{:}
+ \PYG{n}{ff}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Unfinished...}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} sleep for 2 seconds here so you can interrupt me}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Job started. Press \PYGZca{}C to interrupt me now...}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{)}
+ \PYG{n}{ff}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{Finished}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Job completed.}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{} Run}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{long\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+When this script runs, it pauses in the middle with this message:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+Job started. Press \textasciicircum{}C to interrupt me now...
+\end{Verbatim}
+
+If you interrupted the script by pressing Control-C at this point, you will see that \code{job1.output} contains only \code{Unfinished...}.
+However, if you should rerun the interrupted pipeline again, Ruffus ignores the corrupt, incomplete file:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{long\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{Job started. Press \PYGZca{}C to interrupt me now...}
+\PYG{g+go}{Job completed}
+\end{Verbatim}
+\end{quote}
+
+And if you had run \code{pipeline\_printout}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{long\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{3}\PYG{p}{)}
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = long\PYGZus{}task}
+\PYG{g+go}{ Job = [job1.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.output]}
+\PYG{g+go}{ \PYGZsh{} Job needs update: Previous incomplete run leftover: [job1.output]}
+\end{Verbatim}
+\end{quote}
+
+We can see that \emph{Ruffus} magically knows that the previous run was incomplete, and that \code{job1.output} is detritus that needs to be discarded.
+\end{quote}
+
+
+\subsection{Checkpointing: only log completed jobs}
+\label{tutorials/new_tutorial/checkpointing:checkpointing-only-log-completed-jobs}\label{tutorials/new_tutorial/checkpointing:new-manual-logging-completed-jobs}\begin{quote}
+
+All is revealed if you were to look in the working directory. \emph{Ruffus} has created a file called \code{.ruffus\_history.sqlite}.
+In this \href{https://sqlite.org/}{SQLite} database, \emph{Ruffus} logs only those files which are the result of a completed job,
+all other files are suspect.
+This file checkpoint database is a fail-safe, not a substitute for checking file modification times. If the \textbf{Input} or \textbf{Output} files are
+modified, the pipeline will rerun.
+
+By default, \emph{Ruffus} saves only file timestamps to the SQLite database but you can also add a checksum of the pipeline task function body or parameters.
+This behaviour can be controlled by setting the \code{checksum\_level} parameter
+in \code{pipeline\_run()}. For example, if you do not want to save any timestamps or checksums:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{checksum\PYGZus{}level} \PYG{o}{=} \PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{n}{CHECKSUM\PYGZus{}FILE\PYGZus{}TIMESTAMPS} \PYG{o}{=} \PYG{l+m+mi}{0} \PYG{c}{\PYGZsh{} only rerun when the file timestamps are out of date (classic mode)}
+\PYG{n}{CHECKSUM\PYGZus{}HISTORY\PYGZus{}TIMESTAMPS} \PYG{o}{=} \PYG{l+m+mi}{1} \PYG{c}{\PYGZsh{} Default: also rerun when the history shows a job as being out of date}
+\PYG{n}{CHECKSUM\PYGZus{}FUNCTIONS} \PYG{o}{=} \PYG{l+m+mi}{2} \PYG{c}{\PYGZsh{} also rerun when function body has changed}
+\PYG{n}{CHECKSUM\PYGZus{}FUNCTIONS\PYGZus{}AND\PYGZus{}PARAMS} \PYG{o}{=} \PYG{l+m+mi}{3} \PYG{c}{\PYGZsh{} also rerun when function parameters or function body change}
+\end{Verbatim}
+\end{quote}
+
+\begin{notice}{note}{Note:}
+Checksums are calculated from the \href{http://docs.python.org/2/library/pickle.html}{pickled} string for the function code and parameters.
+If pickling fails, Ruffus will degrade gracefully to saving just the timestamp in the SQLite database.
+\end{notice}
+\end{quote}
+
+
+\subsection{Do not share the same checkpoint file across for multiple pipelines!}
+\label{tutorials/new_tutorial/checkpointing:new-manual-history-files-cannot-be-shared}\label{tutorials/new_tutorial/checkpointing:do-not-share-the-same-checkpoint-file-across-for-multiple-pipelines}\begin{quote}
+
+The name of the Ruffus python script is not saved in the checkpoint file along side timestamps and checksums.
+That means that you can rename your pipeline source code file without having to rerun the pipeline!
+The tradeoff is that if multiple pipelines are run from the same directory, and save their histories to the
+same SQlite database file, and if their file names overlap (all of these are bad ideas anyway!), this is
+bound to be a source of confusion.
+
+Luckily, the name and path of the checkpoint file can be also changed for each pipeline
+\end{quote}
+
+
+\subsection{Setting checkpoint file names}
+\label{tutorials/new_tutorial/checkpointing:new-manual-changing-history-file-name}\label{tutorials/new_tutorial/checkpointing:setting-checkpoint-file-names}\begin{quote}
+
+\begin{notice}{warning}{Warning:}
+Some file systems do not appear to support SQLite at all:
+
+There are reports that SQLite databases have \href{http://beets.radbox.org/blog/sqlite-nightmare.html}{file locking problems} on Lustre.
+
+The best solution would be to keep the SQLite database on an alternate compatible file system away from the working directory if possible.
+\end{notice}
+\end{quote}
+
+
+\subsubsection{environment variable \texttt{DEFAULT\_RUFFUS\_HISTORY\_FILE}}
+\label{tutorials/new_tutorial/checkpointing:environment-variable-default-ruffus-history-file}\begin{quote}
+
+The name of the checkpoint file is the value of the environment variable \code{DEFAULT\_RUFFUS\_HISTORY\_FILE}.
+\begin{quote}
+
+export DEFAULT\_RUFFUS\_HISTORY\_FILE=/some/where/.ruffus\_history.sqlite
+\end{quote}
+
+This gives considerable flexibility, and allows a system-wide policy to be set so that all Ruffus checkpoint files are set logically to particular paths.
+
+\begin{notice}{note}{Note:}
+It is your responsibility to make sure that the requisite destination directories for the checkpoint files exist beforehand!
+\end{notice}
+
+Where this is missing, the checkpoint file defaults to \code{.ruffus\_history.sqlite} in your working directory
+\end{quote}
+
+
+\subsubsection{Setting the checkpoint file name manually}
+\label{tutorials/new_tutorial/checkpointing:setting-the-checkpoint-file-name-manually}\begin{quote}
+
+This checkpoint file name can always be overridden as a parameter to Ruffus functions:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{history\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{XXX}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{history\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{XXX}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph}\PYG{p}{(}\PYG{n}{history\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{XXX}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+There is also built in support in \code{Ruffus.cmdline}. So if you use this module, you can simply add to your command line:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} use a custom checkpoint file}
+myscript \PYGZhy{}\PYGZhy{}checksum\PYGZus{}file\PYGZus{}name .myscript.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+\end{quote}
+
+This takes precedence over everything else.
+\end{quote}
+
+
+\subsection{Useful checkpoint file name policies \texttt{DEFAULT\_RUFFUS\_HISTORY\_FILE}}
+\label{tutorials/new_tutorial/checkpointing:useful-checkpoint-file-name-policies-default-ruffus-history-file}\begin{quote}
+
+If the pipeline script is called \code{test/bin/scripts/run.me.py}, then these are the resulting checkpoint files locations:
+\end{quote}
+
+
+\subsubsection{Example 1: same directory, different name}
+\label{tutorials/new_tutorial/checkpointing:example-1-same-directory-different-name}\begin{quote}
+
+If the environment variable is:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{export }\PYG{n+nv}{DEFAULT\PYGZus{}RUFFUS\PYGZus{}HISTORY\PYGZus{}FILE}\PYG{o}{=}.\PYG{o}{\PYGZob{}}basename\PYG{o}{\PYGZcb{}}.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+
+Then the job checkpoint database for \code{run.me.py} will be \code{.run.me.ruffus\_history.sqlite}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+/test/bin/scripts/run.me.py
+/common/path/for/job\PYGZus{}history/scripts/.run.me.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Example 2: Different directory, same name}
+\label{tutorials/new_tutorial/checkpointing:example-2-different-directory-same-name}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{export }\PYG{n+nv}{DEFAULT\PYGZus{}RUFFUS\PYGZus{}HISTORY\PYGZus{}FILE}\PYG{o}{=}/common/path/for/job\PYGZus{}history/.\PYG{o}{\PYGZob{}}basename\PYG{o}{\PYGZcb{}}.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+/common/path/for/job\PYGZus{}history/.run.me.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Example 2: Different directory, same name but keep one level of subdirectory to disambiguate}
+\label{tutorials/new_tutorial/checkpointing:example-2-different-directory-same-name-but-keep-one-level-of-subdirectory-to-disambiguate}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{export }\PYG{n+nv}{DEFAULT\PYGZus{}RUFFUS\PYGZus{}HISTORY\PYGZus{}FILE}\PYG{o}{=}/common/path/for/job\PYGZus{}history/\PYG{o}{\PYGZob{}}subdir\PYG{o}{[}0\PYG{o}{]}\PYG{o}{\PYGZcb{}}/.\PYG{o}{\PYGZob{}}basename\PYG{o}{\PYGZcb{}}.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+/common/path/for/job\PYGZus{}history/scripts/.run.me.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Example 2: nested in common directory}
+\label{tutorials/new_tutorial/checkpointing:example-2-nested-in-common-directory}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{export }\PYG{n+nv}{DEFAULT\PYGZus{}RUFFUS\PYGZus{}HISTORY\PYGZus{}FILE}\PYG{o}{=}/common/path/for/job\PYGZus{}history/\PYG{o}{\PYGZob{}}path\PYG{o}{\PYGZcb{}}/.\PYG{o}{\PYGZob{}}basename\PYG{o}{\PYGZcb{}}.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+/common/path/for/job\PYGZus{}history/test/bin/scripts/.run.me.ruffus\PYGZus{}history.sqlite
+\end{Verbatim}
+\end{quote}
+
+\index{Tutorial!Regenerating the checkpoint file}\index{Regenerating the checkpoint file!Tutorial}
+
+\subsection{Regenerating the checkpoint file}
+\label{tutorials/new_tutorial/checkpointing:new-manual-regenerating-history-file}\label{tutorials/new_tutorial/checkpointing:regenerating-the-checkpoint-file}\label{tutorials/new_tutorial/checkpointing:index-2}\begin{quote}
+
+Occasionally you may need to re-generate the checkpoint file.
+
+This could be necessary:
+\begin{itemize}
+\item {}
+because you are upgrading from a previous version of Ruffus without checkpoint file support
+
+\item {}
+on the rare occasions when the SQLite file becomes corrupted and has to deleted
+
+\item {}
+if you wish to circumvent the file checking of Ruffus after making some manual changes!
+
+\end{itemize}
+
+To do this, it is only necessary to call \code{pipeline\_run} appropriately:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{CHECKSUM\PYGZus{}REGENERATE} \PYG{o}{=} \PYG{l+m+mi}{2}
+\PYG{n}{pipeline}\PYG{p}{(}\PYG{n}{touch\PYGZus{}files\PYGZus{}only} \PYG{o}{=} \PYG{n}{CHECKSUM\PYGZus{}REGENERATE}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Similarly, if you are using \code{Ruffus.cmdline}, you can call:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+myscript \PYGZhy{}\PYGZhy{}recreate\PYGZus{}database
+\end{Verbatim}
+\end{quote}
+
+Note that this regenerates the checkpoint file to reflect the existing \emph{Input}, \emph{Output} files on disk.
+In other words, the onus is on you to make sure there are no half-formed, corrupt files. On the other hand,
+the pipeline does not need to have been previously run successfully for this to work. Essentially, Ruffus,
+pretends to run the pipeline, while logging all the files with consistent file modication times, stopping
+at the first tasks which appear out of date or incomplete.
+\end{quote}
+
+\index{rules!for rerunning jobs}\index{for rerunning jobs!rules}
+
+\subsection{Rules for determining if files are up to date}
+\label{tutorials/new_tutorial/checkpointing:new-manual-skip-up-to-date-rules}\label{tutorials/new_tutorial/checkpointing:rules-for-determining-if-files-are-up-to-date}\label{tutorials/new_tutorial/checkpointing:index-3}\begin{quote}
+
+The following simple rules are used by \emph{Ruffus}.
+\begin{enumerate}
+\item {}
+The pipeline stage will be rerun if:
+\begin{itemize}
+\item {}
+If any of the \textbf{Input} files are new (newer than the \textbf{Output} files)
+
+\item {}
+If any of the \textbf{Output} files are missing
+
+\end{itemize}
+
+\item {}
+In addition, it is possible to run jobs which create files from scratch.
+\begin{itemize}
+\item {}
+If no \textbf{Input} file names are supplied, the job will only run if any \emph{output} file is missing.
+
+\end{itemize}
+
+\item {}
+Finally, if no \textbf{Output} file names are supplied, the job will always run.
+
+\end{enumerate}
+\end{quote}
+
+\index{Exception!Missing input files}\index{Missing input files!Exception}
+
+\subsection{Missing files generate exceptions}
+\label{tutorials/new_tutorial/checkpointing:index-4}\label{tutorials/new_tutorial/checkpointing:missing-files-generate-exceptions}\begin{quote}
+
+If the \emph{inputs} files for a job are missing, the task function will have no way
+to produce its \emph{output}. In this case, a \code{MissingInputFileError} exception will be raised
+automatically. For example,
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+task.MissingInputFileError: No way to run job: Input file ['a.1'] does not exist
+for Job = ["a.1" -\textgreater{} "a.2", "A file"]
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{Manual!Timestamp resolution}\index{Timestamp resolution!Manual}
+
+\subsection{Caveats: Coarse Timestamp resolution}
+\label{tutorials/new_tutorial/checkpointing:index-5}\label{tutorials/new_tutorial/checkpointing:caveats-coarse-timestamp-resolution}\begin{quote}
+
+Note that modification times have precision to the nearest second under some older file systems
+(ext2/ext3?). This may be also be true for networked file systems.
+
+\emph{Ruffus} supplements the file system time resolution by independently recording the timestamp at
+full OS resolution (usually to at least the millisecond) at job completion, when presumably the \textbf{Output}
+files will have been created.
+
+However, \emph{Ruffus} only does this if the discrepancy between file time and system time is less than a second
+(due to poor file system timestamp resolution). If there are large mismatches between the two, due for example
+to network time slippage, misconfiguration etc, \emph{Ruffus} reverts to using the file system time and adds a one second
+delay between jobs (via \code{time.sleep()}) to make sure input and output file stamps are different.
+
+If you know that your filesystem has coarse-grained timestamp resolution, you can always revert to this very conservative behaviour,
+at the prices of some annoying 1s pauses, by setting {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(one\_second\_per\_job = True)}}}
+\end{quote}
+
+\index{Manual!flag files}\index{flag files!Manual}
+
+\subsection{Flag files: Checkpointing for the paranoid}
+\label{tutorials/new_tutorial/checkpointing:index-6}\label{tutorials/new_tutorial/checkpointing:flag-files-checkpointing-for-the-paranoid}\begin{quote}
+
+One other way of checkpointing your pipelines is to create an extra ``flag'' file as an additional
+\textbf{Output} file name. The flag file is only created or updated when everything else in the
+job has completed successifully and written to disk. A missing or out of date flag file then
+would be a sign for Ruffus that the task never completed properly in the first place.
+
+This used to be much the best way of performing checkpointing in Ruffus and is still
+the most bulletproof way of proceeding. For example, even the loss or corruption
+of the checkpoint file, would not affect things greatly.
+
+Nevertheless flag files are largely superfluous in modern \emph{Ruffus}.
+\end{quote}
+
+\index{decorators\_compendium!Tutorial}\index{Tutorial!decorators\_compendium}
+
+\section{\textbf{Chapter 11}: Pipeline topologies and a compendium of \emph{Ruffus} decorators}
+\label{tutorials/new_tutorial/decorators_compendium:index-0}\label{tutorials/new_tutorial/decorators_compendium:new-manual-decorators-compendium}\label{tutorials/new_tutorial/decorators_compendium:new-manual-decorators-compendium-chapter-num-pipeline-topologies-and-a-compendium-of-ruffus-decorators}\label{tutorials/new_tutorial/decorators_compendium::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{decorators}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/decorators_compendium:overview}\begin{quote}
+
+Computational pipelines transform your data in stages until the final result is produced.
+
+You can visualise your pipeline data flowing like water down a system of pipes.
+\emph{Ruffus} has many ways of joining up your pipes to create different topologies.
+
+\begin{notice}{note}{Note:}
+\textbf{The best way to design a pipeline is to:}
+\begin{itemize}
+\item {}
+\textbf{Write down the file names of the data as it flows across your pipeline.}
+
+\item {}
+\textbf{Draw lines between the file names to show how they should be connected together.}
+
+\end{itemize}
+\end{notice}
+\end{quote}
+
+
+\subsection{\emph{@transform}}
+\label{tutorials/new_tutorial/decorators_compendium:transform}\begin{quote}
+
+So far, our data files have been flowing through our pipelines independently in lockstep.
+
+\scalebox{0.500000}{\includegraphics{bestiary_transform.png}}
+
+If we drew a graph of the data files moving through the pipeline, all of our flowcharts would look like something like this.
+
+The {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} decorator connects up your data files in 1 to 1 operations, ensuring that for every \textbf{Input}, a corresponding \textbf{Output} is
+generated, ready to got into the next pipeline stage. If we start with three sets of starting data, we would end up with three final sets of results.
+\end{quote}
+
+
+\subsection{A bestiary of \emph{Ruffus} decorators}
+\label{tutorials/new_tutorial/decorators_compendium:a-bestiary-of-ruffus-decorators}\begin{quote}
+
+Very often, we would like to transform our data in more complex ways, this is where other \emph{Ruffus} decorators come in.
+
+\scalebox{0.500000}{\includegraphics{bestiary_decorators.png}}
+\end{quote}
+
+
+\subsection{\emph{@originate}}
+\label{tutorials/new_tutorial/decorators_compendium:originate}\begin{itemize}
+\item {}
+Introduced in \textbf{Chapter 3} {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel]{\emph{More on @transform-ing data and @originate}}},
+{\hyperref[decorators/originate:decorators-originate]{\emph{@originate}}} generates \textbf{Output} files from scratch without the benefits of any \textbf{Input} files.
+
+\end{itemize}
+
+
+\subsection{\emph{@merge}}
+\label{tutorials/new_tutorial/decorators_compendium:merge}\begin{itemize}
+\item {}
+A \textbf{many to one} operator.
+
+\item {}
+The last decorator at the far right to the figure, {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} merges multiple \textbf{Input} into one \textbf{Output}.
+
+\end{itemize}
+
+
+\subsection{\emph{@split}}
+\label{tutorials/new_tutorial/decorators_compendium:split}\begin{itemize}
+\item {}
+A \textbf{one to many} operator,
+
+\item {}
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}} is the evil twin of {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}}. It takes a single set of \textbf{Input} and splits them into multiple smaller pieces.
+
+\item {}
+The best part of {\hyperref[decorators/split:decorators-split]{\emph{@split}}} is that we don't necessarily have to decide ahead of time \emph{how many} smaller pieces it should produce. If we have encounter a larger file,
+we might need to split it up into more fragments for greater parallelism.
+
+\item {}
+Since {\hyperref[decorators/split:decorators-split]{\emph{@split}}} is a \textbf{one to many} operator, if you pass it \textbf{many} inputs (e.g. via {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}, it performs an implicit {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} step to make one
+set of \textbf{Input} that you can redistribute into a different number of pieces. If you are looking to split \emph{each} \textbf{Input} into further smaller fragments, then you
+need {\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}
+
+\end{itemize}
+
+
+\subsection{\emph{@subdivide}}
+\label{tutorials/new_tutorial/decorators_compendium:subdivide}\begin{itemize}
+\item {}
+A \textbf{many to even more} operator.
+
+\item {}
+It takes each of multiple \textbf{Input}, and further subdivides them.
+
+\item {}
+Uses {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}}, {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} to generate \textbf{Output} names from its \textbf{Input} files but like {\hyperref[decorators/split:decorators-split]{\emph{@split}}}, we don't have to decide ahead of time
+\emph{how many} smaller pieces each \textbf{Input} should be further divided into. For example, a large \textbf{Input} files might be subdivided into 7 pieces while the next job might,
+however, split its \textbf{Input} into just 4 pieces.
+
+\end{itemize}
+
+
+\subsection{\emph{@collate}}
+\label{tutorials/new_tutorial/decorators_compendium:collate}\begin{itemize}
+\item {}
+A \textbf{many to fewer} operator.
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} is the opposite twin of \code{subdivide}: it takes multiple \textbf{Output} and groups or collates them into bundles of \textbf{Output}.
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} uses {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} to generate \textbf{Output} names.
+
+\item {}
+All \textbf{Input} files which map to the same \textbf{Output} are grouped together into one job (one task function call) which
+produces one \textbf{Output}.
+
+\end{itemize}
+
+
+\subsection{Combinatorics}
+\label{tutorials/new_tutorial/decorators_compendium:combinatorics}\begin{quote}
+
+More rarely, we need to generate a set of \textbf{Output} based on a combination or permutation or product of the \textbf{Input}.
+
+For example, in bioinformatics, we might need to look for all instances of a set of genes in the genomes of a different number of species.
+In other words, we need to find the {\hyperref[decorators/product:decorators-product]{\emph{@product}}} of XXX genes x YYY species.
+
+\emph{Ruffus} provides decorators modelled on the ``Combinatoric generators'' in the Standard Python \href{http://docs.python.org/2/library/itertools.html}{itertools} library.
+
+To use combinatoric decorators, you need to explicitly include them from \emph{Ruffus}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{ruffus}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+\end{Verbatim}
+
+\scalebox{0.500000}{\includegraphics{bestiary_combinatorics.png}}
+\end{quote}
+
+
+\subsection{\emph{@product}}
+\label{tutorials/new_tutorial/decorators_compendium:product}\begin{itemize}
+\item {}
+Given several sets of \textbf{Input}, it generates all versus all \textbf{Output}. For example, if there are four sets of \textbf{Input} files, {\hyperref[decorators/product:decorators-product]{\emph{@product}}} will generate \code{WWW x XXX x YYY x ZZZ} \textbf{Output}.
+
+\item {}
+Uses {\hyperref[decorators/transform:decorators-transform]{\emph{formatter}}} to generate unique \textbf{Output} names from components parsed from \emph{any} parts of \emph{any} specified files in
+all \textbf{Input} sets. In the above example, this allows the generation of \code{WWW x XXX x YYY x ZZZ} unique names.
+
+\end{itemize}
+
+
+\subsection{\emph{@combinations}}
+\label{tutorials/new_tutorial/decorators_compendium:combinations}\begin{itemize}
+\item {}
+Given one set of \textbf{Input}, it generates the combinations of r-length tuples among them.
+
+\item {}
+Uses {\hyperref[decorators/transform:decorators-transform]{\emph{formatter}}} to generate unique \textbf{Output} names from components parsed from \emph{any} parts of \emph{any} specified files in all \textbf{Input} sets.
+
+\item {}
+For example, given \textbf{Input} called \code{A}, \code{B} and \code{C}, it will generate: \code{A-B}, \code{A-C}, \code{B-C}
+
+\item {}
+The order of \textbf{Input} items is ignored so either \code{A-B} or \code{B-A} will be included, not both
+
+\item {}
+Self-vs-self combinations (\code{A-A}) are excluded.
+
+\end{itemize}
+
+
+\subsection{\emph{@combinations\_with\_replacement}}
+\label{tutorials/new_tutorial/decorators_compendium:combinations-with-replacement}\begin{itemize}
+\item {}
+Given one set of \textbf{Input}, it generates the combinations of r-length tuples among them but includes self-vs-self conbinations.
+
+\item {}
+Uses {\hyperref[decorators/transform:decorators-transform]{\emph{formatter}}} to generate unique \textbf{Output} names from components parsed from \emph{any} parts of \emph{any} specified files in all \textbf{Input} sets.
+
+\item {}
+For example, given \textbf{Input} called \code{A}, \code{B} and \code{C}, it will generate: \code{A-A}, \code{A-B}, \code{A-C}, \code{B-B}, \code{B-C}, \code{C-C}
+
+\end{itemize}
+
+
+\subsection{\emph{@permutations}}
+\label{tutorials/new_tutorial/decorators_compendium:permutations}\begin{itemize}
+\item {}
+Given one set of \textbf{Input}, it generates the permutations of r-length tuples among them. This excludes self-vs-self combinations but includes all orderings (\code{A-B} and \code{B-A}).
+
+\item {}
+Uses {\hyperref[decorators/transform:decorators-transform]{\emph{formatter}}} to generate unique \textbf{Output} names from components parsed from \emph{any} parts of \emph{any} specified files in all \textbf{Input} sets.
+
+\item {}
+For example, given \textbf{Input} called \code{A}, \code{B} and \code{C}, it will generate: \code{A-A}, \code{A-B}, \code{A-C}, \code{B-A}, \code{B-C}, \code{C-A}, \code{C-B}
+
+\end{itemize}
+
+\index{split!Tutorial}\index{Tutorial!split}
+
+\section{\textbf{Chapter 12}: Splitting up large tasks / files with \textbf{@split}}
+\label{tutorials/new_tutorial/split:index-0}\label{tutorials/new_tutorial/split:new-manual-split}\label{tutorials/new_tutorial/split::doc}\label{tutorials/new_tutorial/split:new-manual-split-chapter-num-splitting-up-large-tasks-files-with-split}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}} syntax
+
+\item {}
+{\hyperref[tutorials/new_tutorial/split_code:new-manual-split-code]{\emph{Example code for this chapter}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/split:overview}\begin{quote}
+
+A common requirement in computational pipelines is to split up a large task into
+small jobs which can be run on different processors, (or sent to a computational
+cluster). Very often, the number of jobs depends dynamically on the size of the
+task, and cannot be known beforehand.
+
+\emph{Ruffus} uses the {\hyperref[decorators/split:decorators-split]{\emph{@split}}} decorator to indicate that
+the {\hyperref[glossary:term-task]{\emph{task}}} function will produce an indeterminate number of independent \emph{Outputs} from a single \emph{Input}.
+\end{quote}
+
+
+\subsection{Example: Calculate variance for a large list of numbers in parallel}
+\label{tutorials/new_tutorial/split:example-calculate-variance-for-a-large-list-of-numbers-in-parallel}\begin{quote}
+
+Suppose we wanted to calculate the \href{http://en.wikipedia.org/wiki/Variance}{variance} for
+100,000 numbers, how can we parallelise the calculation so that we can get an answer as
+speedily as possible?
+
+We need to
+\begin{itemize}
+\item {}
+break down the problem into manageable chunks
+
+\item {}
+solve these in parallel, possibly on a computational cluster and then
+
+\item {}
+merge the partial solutions back together for a final result.
+
+\end{itemize}
+
+To complicate things, we usually do not want to hard-code the number of parallel chunks beforehand.
+The degree of parallelism is often only apparent as we process our data.
+
+\textbf{Ruffus} was designed to solve such problems which are common, for example, in bioinformatics and genomics.
+
+A flowchart for our variance problem might look like this:
+
+\scalebox{0.300000}{\includegraphics{manual_split_merge_example.jpg}}
+
+(In this toy example, we create our own starting data in \code{create\_random\_numbers()}.)
+\end{quote}
+
+
+\subsection{Output files for \emph{@split}}
+\label{tutorials/new_tutorial/split:output-files-for-split}\begin{quote}
+
+The \emph{Ruffus} decorator {\hyperref[decorators/split:decorators-split]{\emph{@split}}} is designed specifically with this run-time flexibility in mind:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{create\PYGZus{}random\PYGZus{}numbers}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}problem} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This will split the incoming \code{input\_file\_names} into \code{NNN} number of \emph{outputs} where \code{NNN} is not predetermined:
+
+The \emph{output} (second) parameter of {\hyperref[decorators/split:decorators-split]{\emph{@split}}} often contains a \href{http://docs.python.org/library/glob.html}{\emph{glob}} pattern like the \code{*.chunks} above.
+
+Only \textbf{after} the task function has completed, will Ruffus match the \textbf{Output} parameter (\code{*.chunks})
+against the files which have been created by \code{split\_problem()} (e.g. \code{1.chunks}, \code{2.chunks}, \code{3.chunks})
+\end{quote}
+
+
+\subsection{Be careful in specifying \textbf{Output} globs}
+\label{tutorials/new_tutorial/split:be-careful-in-specifying-output-globs}\begin{quote}
+
+Note that it is your responsibility to keep the \textbf{Output} specification tight enough so that Ruffus does not
+pick up extraneous files.
+
+You can specify multiple \href{http://docs.python.org/library/glob.html}{\emph{glob}} patterns to match \emph{all} the files which are the
+result of the splitting task function. These can even cover different directories,
+or groups of file names. This is a more extreme example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a*.bits}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b*.pieces}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{somewhere\PYGZus{}else/c*.stuff}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}function} \PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Code to split up }\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input.file}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Clean up previous pipeline runs}
+\label{tutorials/new_tutorial/split:clean-up-previous-pipeline-runs}\begin{quote}
+
+Problem arise when the current directory contains results of previous pipeline runs.
+\begin{itemize}
+\item {}
+For example, if the previous analysis involved a large data set, there might be 3 chunks: \code{1.chunks}, \code{2.chunks}, \code{3.chunks}.
+
+\item {}
+In the current analysis, there might be a smaller data set which divides into only 2 chunks, \code{1.chunks} and \code{2.chunks}.
+
+\item {}
+Unfortunately, \code{3.chunks} from the previous run is still hanging around and will be included erroneously by the glob \code{*.chunks}.
+
+\end{itemize}
+
+\begin{notice}{warning}{Warning:}
+\textbf{Your first duty in} {\hyperref[decorators/split:decorators-split]{\emph{@split}}} \textbf{tasks functions should be to clean up}
+\end{notice}
+
+To help you clean up thoroughly, Ruffus initialises the \textbf{output} parameter to all files which match specification.
+
+The first order of business is thus invariably to cleanup ( delete with \code{os.unlink}) all files in \textbf{Output}.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} split initial file}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{create\PYGZus{}random\PYGZus{}numbers}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}problem} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ splits random numbers file into xxx files of chunk\PYGZus{}size each}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} clean up any files from previous runs}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}for ff in glob.glob(\PYGZdq{}*.chunks\PYGZdq{}):}
+ \PYG{k}{for} \PYG{n}{ff} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{ff}\PYG{p}{)}
+\end{Verbatim}
+
+(The first time you run the example code, \code{*.chunks} will initialise \code{output\_files} to an empty list. )
+\end{quote}
+
+
+\subsection{1 to many}
+\label{tutorials/new_tutorial/split:to-many}\label{tutorials/new_tutorial/split:new-manual-split-one-to-many}\begin{quote}
+
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}} is a one to many operator because its
+outputs are a list of \emph{independent} items.
+
+If {\hyperref[decorators/split:decorators-split]{\emph{@split}}} generates 5 files, then this will lead to 5 jobs downstream.
+
+This means we can just connect our old friend {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} to our pipeline
+and the results of {\hyperref[decorators/split:decorators-split]{\emph{@split}}} will be analysed in parallel. This code should look
+familiar:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Calculate sum and sum of squares for each chunk file}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{split\PYGZus{}problem}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{sum\PYGZus{}of\PYGZus{}squares} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+
+Which results in output like this:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[random\PYGZus{}numbers.list] \PYGZhy{}\PYGZgt{} *.chunks] completed}
+\PYG{g+go}{Completed Task = split\PYGZus{}problem}
+\PYG{g+go}{ Job = [1.chunks \PYGZhy{}\PYGZgt{} 1.sums] completed}
+\PYG{g+go}{ Job = [10.chunks \PYGZhy{}\PYGZgt{} 10.sums] completed}
+\PYG{g+go}{ Job = [2.chunks \PYGZhy{}\PYGZgt{} 2.sums] completed}
+\PYG{g+go}{ Job = [3.chunks \PYGZhy{}\PYGZgt{} 3.sums] completed}
+\PYG{g+go}{ Job = [4.chunks \PYGZhy{}\PYGZgt{} 4.sums] completed}
+\PYG{g+go}{ Job = [5.chunks \PYGZhy{}\PYGZgt{} 5.sums] completed}
+\PYG{g+go}{ Job = [6.chunks \PYGZhy{}\PYGZgt{} 6.sums] completed}
+\PYG{g+go}{ Job = [7.chunks \PYGZhy{}\PYGZgt{} 7.sums] completed}
+\PYG{g+go}{ Job = [8.chunks \PYGZhy{}\PYGZgt{} 8.sums] completed}
+\PYG{g+go}{ Job = [9.chunks \PYGZhy{}\PYGZgt{} 9.sums] completed}
+\PYG{g+go}{Completed Task = sum\PYGZus{}of\PYGZus{}squares}
+\end{Verbatim}
+\end{quote}
+
+Have a look at the {\hyperref[tutorials/new_tutorial/split_code:new-manual-split-code]{\emph{Example code for this chapter}}}
+\end{quote}
+
+
+\subsection{Nothing to many}
+\label{tutorials/new_tutorial/split:new-manual-split-nothing-to-many}\label{tutorials/new_tutorial/split:nothing-to-many}\begin{quote}
+
+Normally we would use {\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{@originate}}} to create files from
+scratch, for example at the beginning of the pipeline.
+
+However, sometimes, it is not possible to determine ahead of time how many files you
+will be creating from scratch. {\hyperref[decorators/split:decorators-split]{\emph{@split}}} can also be useful even in such cases:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{random} \PYG{k+kn}{import} \PYG{n}{randint}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{os}
+
+\PYG{c}{\PYGZsh{} Create between 2 and 5 files}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{no\PYGZus{}input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} cleanup first}
+ \PYG{k}{for} \PYG{n}{oo} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{oo}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} make new files}
+ \PYG{k}{for} \PYG{n}{ii} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{randint}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{,}\PYG{l+m+mi}{5}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{ii}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.processed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{process\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} *.start] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}initial\PYGZus{}files}
+\PYG{g+go}{ Job = [0.start \PYGZhy{}\PYGZgt{} 0.processed] completed}
+\PYG{g+go}{ Job = [1.start \PYGZhy{}\PYGZgt{} 1.processed] completed}
+\PYG{g+go}{Completed Task = process\PYGZus{}files}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{merge!Tutorial}\index{Tutorial!merge}
+
+\section{\textbf{Chapter 13}: \texttt{@merge} multiple input into a single result}
+\label{tutorials/new_tutorial/merge:index-0}\label{tutorials/new_tutorial/merge:new-manual-merge}\label{tutorials/new_tutorial/merge::doc}\label{tutorials/new_tutorial/merge:new-manual-merge-chapter-num-merge-multiple-input-into-a-single-result}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} syntax
+
+\item {}
+{\hyperref[tutorials/new_tutorial/merge_code:new-manual-merge-code]{\emph{Example code for this chapter}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Overview of \emph{@merge}}
+\label{tutorials/new_tutorial/merge:overview-of-merge}\begin{quote}
+
+The {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{previous chapter}}} explained how \textbf{Ruffus} allows large
+jobs to be split into small pieces with {\hyperref[decorators/split:decorators-split]{\emph{@split}}} and analysed
+in parallel using for example, our old friend {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}.
+
+Having done this, our next task is to recombine the fragments into a seamless whole.
+
+This is the role of the {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} decorator.
+\end{quote}
+
+
+\subsection{\emph{@merge} is a many to one operator}
+\label{tutorials/new_tutorial/merge:merge-is-a-many-to-one-operator}\begin{quote}
+
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} tasks multiple \emph{inputs} and produces a single \emph{output}, \textbf{Ruffus}
+is again agnostic as to the sort of data contained within this single \emph{output}. It can be a single
+(string) file name, an arbitrary complicated nested structure with numbers, objects etc.
+Or even a list.
+
+The main thing is that downstream tasks will interpret this output as a single entity leading to a single
+job.
+
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}} and {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} are, in other words, about network topology.
+
+Because of this {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} is also very useful for summarising the progress
+in our pipeline. At key selected points, we can gather data from the multitude of data or disparate \emph{inputs}
+and {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} them to a single set of summaries.
+\end{quote}
+
+
+\subsection{Example: Combining partial solutions: Calculating variances}
+\label{tutorials/new_tutorial/merge:example-combining-partial-solutions-calculating-variances}\begin{quote}
+
+The {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{previous chapter}}} we had almost completed all the pieces of our flowchart:
+
+\scalebox{0.300000}{\includegraphics{manual_split_merge_example.jpg}}
+
+What remains is to take the partial solutions from the different \code{.sums} files
+and turn these into the variance as follows:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{variance} \PYG{o}{=} \PYG{p}{(}\PYG{n}{sum\PYGZus{}squared} \PYG{o}{\PYGZhy{}} \PYG{n+nb}{sum} \PYG{o}{*} \PYG{n+nb}{sum} \PYG{o}{/} \PYG{n}{N}\PYG{p}{)}\PYG{o}{/}\PYG{n}{N}
+\end{Verbatim}
+
+where \code{N} is the number of values
+
+See the \href{http://en.wikipedia.org/wiki/Algorithms\_for\_calculating\_variance}{wikipedia} entry for a discussion of
+why this is a very naive approach.
+\end{quote}
+
+To do this, all we have to do is iterate through all the values in \code{*.sums},
+add up the \code{sums} and \code{sum\_squared}, and apply the above (naive) formula.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} @merge files together}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@merge}\PYG{p}{(}\PYG{n}{sum\PYGZus{}of\PYGZus{}squares}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{variance.result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{calculate\PYGZus{}variance} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Calculate variance naively}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} initialise variables}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{all\PYGZus{}sum\PYGZus{}squared} \PYG{o}{=} \PYG{l+m+mf}{0.0}
+ \PYG{n}{all\PYGZus{}sum} \PYG{o}{=} \PYG{l+m+mf}{0.0}
+ \PYG{n}{all\PYGZus{}cnt\PYGZus{}values} \PYG{o}{=} \PYG{l+m+mf}{0.0}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} added up all the sum\PYGZus{}squared, and sum and cnt\PYGZus{}values from all the chunks}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}name} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{n}{sum\PYGZus{}squared}\PYG{p}{,} \PYG{n+nb}{sum}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}values} \PYG{o}{=} \PYG{n+nb}{map}\PYG{p}{(}\PYG{n+nb}{float}\PYG{p}{,} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{o}{.}\PYG{n}{readlines}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{all\PYGZus{}sum\PYGZus{}squared} \PYG{o}{+}\PYG{o}{=} \PYG{n}{sum\PYGZus{}squared}
+ \PYG{n}{all\PYGZus{}sum} \PYG{o}{+}\PYG{o}{=} \PYG{n+nb}{sum}
+ \PYG{n}{all\PYGZus{}cnt\PYGZus{}values} \PYG{o}{+}\PYG{o}{=} \PYG{n}{cnt\PYGZus{}values}
+ \PYG{n}{all\PYGZus{}mean} \PYG{o}{=} \PYG{n}{all\PYGZus{}sum} \PYG{o}{/} \PYG{n}{all\PYGZus{}cnt\PYGZus{}values}
+ \PYG{n}{variance} \PYG{o}{=} \PYG{p}{(}\PYG{n}{all\PYGZus{}sum\PYGZus{}squared} \PYG{o}{\PYGZhy{}} \PYG{n}{all\PYGZus{}sum} \PYG{o}{*} \PYG{n}{all\PYGZus{}mean}\PYG{p}{)}\PYG{o}{/}\PYG{p}{(}\PYG{n}{all\PYGZus{}cnt\PYGZus{}values}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} print output}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{variance}\PYG{p}{)}
+\end{Verbatim}
+
+This results in the following equivalent function call:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{calculate\PYGZus{}variance} \PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{3.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{4.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{5.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{6.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{7.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{8.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{9.sums, }\PYG{l+s}{\PYGZdq{}}\PYG{l+m+mf}{10.}\PYG{n}{sums}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{], }\PYG{l+s}{\PYGZdq{}}\PYG{n}{variance}\PYG{o}{.}\PYG{n}{result}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{)}
+\end{Verbatim}
+\end{quote}
+
+and the following display:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] \PYGZhy{}\PYGZgt{} variance.result] completed}
+\PYG{g+go}{Completed Task = calculate\PYGZus{}variance}
+\end{Verbatim}
+\end{quote}
+
+The final result is in \code{variance.result}
+
+Have a look at the {\hyperref[tutorials/new_tutorial/merge_code:new-manual-merge-code]{\emph{complete example code for this chapter}}}.
+\end{quote}
+
+\index{multiprocessing!Tutorial}\index{Tutorial!multiprocessing}
+
+\section{\textbf{Chapter 14}: Multiprocessing, \texttt{drmaa} and Computation Clusters}
+\label{tutorials/new_tutorial/multiprocessing:index-0}\label{tutorials/new_tutorial/multiprocessing::doc}\label{tutorials/new_tutorial/multiprocessing:new-manual-multiprocessing}\label{tutorials/new_tutorial/multiprocessing:new-manual-multiprocessing-chapter-num-multiprocessing-drmaa-and-computation-clusters}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{@jobs\_limit}}} syntax
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}} syntax
+
+\item {}
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}} syntax
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-code]{\emph{Chapter 14: Python Code for Multiprocessing, drmaa and Computation Clusters}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/multiprocessing:overview}
+\index{pipeline\_run(multiprocess)!Tutorial}\index{Tutorial!pipeline\_run(multiprocess)}
+
+\subsubsection{Multi Processing}
+\label{tutorials/new_tutorial/multiprocessing:multi-processing}\label{tutorials/new_tutorial/multiprocessing:index-1}\begin{quote}
+
+\emph{Ruffus} uses python \href{http://docs.python.org/library/multiprocessing.html}{multiprocessing} to run
+each job in a separate process.
+
+This means that jobs do \emph{not} necessarily complete in the order of the defined parameters.
+Task hierachies are, of course, inviolate: upstream tasks run before downstream, dependent tasks.
+
+Tasks that are independent (i.e. do not precede each other) may be run in parallel as well.
+
+The number of concurrent jobs can be set in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+If \code{multiprocess} is set to 1, then jobs will be run on a single process.
+\end{quote}
+
+\index{data sharing across processes!Tutorial}\index{Tutorial!data sharing across processes}
+
+\subsubsection{Data sharing}
+\label{tutorials/new_tutorial/multiprocessing:index-2}\label{tutorials/new_tutorial/multiprocessing:data-sharing}\begin{quote}
+
+Running jobs in separate processes allows \emph{Ruffus} to make full use of the multiple
+processors in modern computers. However, some \href{http://docs.python.org/library/multiprocessing.html\#multiprocessing-programming}{multiprocessing guidelines}
+should be borne in mind when writing \emph{Ruffus} pipelines. In particular:
+\begin{itemize}
+\item {}
+Try not to pass large amounts of data between jobs, or at least be aware that this has to be marshalled
+across process boundaries.
+
+\item {}
+Only data which can be \href{http://docs.python.org/library/pickle.html}{pickled} can be passed as
+parameters to \emph{Ruffus} task functions. Happily, that applies to almost any native Python data type.
+The use of the rare, unpicklable object will cause python to complain (fail) loudly when \emph{Ruffus} pipelines
+are run.
+
+\end{itemize}
+\end{quote}
+
+\index{@jobs\_limit!Tutorial}\index{Tutorial!@jobs\_limit}
+
+\subsection{Restricting parallelism with \emph{@jobs\_limit}}
+\label{tutorials/new_tutorial/multiprocessing:new-manual-jobs-limit}\label{tutorials/new_tutorial/multiprocessing:restricting-parallelism-with-jobs-limit}\label{tutorials/new_tutorial/multiprocessing:index-3}\begin{quote}
+
+Calling {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(multiprocess = NNN)}}} allows
+multiple jobs (from multiple independent tasks) to be run in parallel. However, there
+are some operations that consume so many resources that we might want them to run
+with less or no concurrency.
+
+For example, we might want to download some files via FTP but the server restricts
+requests from each IP address. Even if the rest of the pipeline is running 100 jobs in
+parallel, the FTP downloading must be restricted to 2 files at a time. We would really
+like to keep the pipeline running as is, but let this one operation run either serially,
+or with little concurrency.
+\begin{itemize}
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(multiprocess = NNN)}}} sets the pipeline-wide concurrency but
+
+\item {}
+{\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{@jobs\_limit(MMM)}}} sets concurrency at \code{MMM} only for jobs in the decorated task.
+
+\end{itemize}
+
+The optional name (e.g. \code{@jobs\_limit(3, "ftp\_download\_limit")}) allows the same limit to
+be shared across multiple tasks. To be pedantic: a limit of \code{3} jobs at a time would be applied
+across all tasks which have a \code{@jobs\_limit} named \code{"ftp\_download\_limit"}.
+
+The {\hyperref[tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-code]{\emph{example code}}} uses up to 10 processes across the
+pipeline, but runs the \code{stage1\_big} and \code{stage1\_small} tasks 3 at a time (shared across
+both tasks). \code{stage2} jobs run 5 at a time.
+\end{quote}
+
+
+\subsection{Using \texttt{drmaa} to dispatch work to Computational Clusters or Grid engines from Ruffus jobs}
+\label{tutorials/new_tutorial/multiprocessing:using-drmaa-to-dispatch-work-to-computational-clusters-or-grid-engines-from-ruffus-jobs}\label{tutorials/new_tutorial/multiprocessing:new-manual-ruffus-drmaa-wrapper-run-job}\begin{quote}
+
+Ruffus has been widely used to manage work on computational clusters or grid engines. Though Ruffus
+task functions cannot (yet!) run natively and transparently on remote cluster nodes, it is trivial
+to dispatch work across the cluster.
+
+From version 2.4 onwards, Ruffus includes an optional helper module which interacts with
+\href{https://github.com/drmaa-python/drmaa-python}{python bindings} for the widely used \href{http://en.wikipedia.org/wiki/DRMAA}{drmaa}
+Open Grid Forum API specification. This allows jobs to dispatch work to a computational cluster and wait until it completes.
+
+Here are the necessary steps
+\end{quote}
+
+
+\subsubsection{1) Use a shared drmaa session:}
+\label{tutorials/new_tutorial/multiprocessing:use-a-shared-drmaa-session}\begin{quote}
+
+Before your pipeline runs:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} start shared drmaa session for all jobs / tasks in pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{k+kn}{import} \PYG{n+nn}{drmaa}
+\PYG{n}{drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa}\PYG{o}{.}\PYG{n}{Session}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{initialize}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Cleanup after your pipeline completes:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} pipeline functions go here}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{if} \PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZus{}\PYGZus{}main\PYGZus{}\PYGZus{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{exit}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{2) import \texttt{ruffus.drmaa\_wrapper}}
+\label{tutorials/new_tutorial/multiprocessing:import-ruffus-drmaa-wrapper}\begin{quote}
+\begin{itemize}
+\item {}
+The optional \code{ruffus.drmaa\_wrapper} module needs to be imported explicitly:
+
+\end{itemize}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} imported ruffus.drmaa\PYGZus{}wrapper explicitly}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.drmaa\PYGZus{}wrapper} \PYG{k+kn}{import} \PYG{n}{run\PYGZus{}job}\PYG{p}{,} \PYG{n}{error\PYGZus{}drmaa\PYGZus{}job}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{3) call \emph{drmaa\_wrapper.run\_job()}}
+\label{tutorials/new_tutorial/multiprocessing:call-drmaa-wrapper-run-job}\begin{quote}
+
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}} dispatches the work to a cluster node within a normal Ruffus job and waits for completion
+
+This is the equivalent of \href{http://docs.python.org/2/library/os.html\#os.system}{os.system} or
+\href{http://docs.python.org/2/library/subprocess.html\#subprocess.check\_call}{subprocess.check\_output} but the code will run remotely as specified:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{} ruffus.drmaa\PYGZus{}wrapper.run\PYGZus{}job}
+ \PYG{n}{stdout\PYGZus{}res}\PYG{p}{,} \PYG{n}{stderr\PYGZus{}res} \PYG{o}{=} \PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{n}{cmd\PYGZus{}str} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{touch }\PYG{l+s}{\PYGZdq{}} \PYG{o}{+} \PYG{n}{output\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}name} \PYG{o}{=} \PYG{n}{job\PYGZus{}name}\PYG{p}{,}
+ \PYG{n}{logger} \PYG{o}{=} \PYG{n}{logger}\PYG{p}{,}
+ \PYG{n}{drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa\PYGZus{}session}\PYG{p}{,}
+ \PYG{n}{run\PYGZus{}locally} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{local\PYGZus{}run}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}other\PYGZus{}options} \PYG{o}{=} \PYG{n}{job\PYGZus{}other\PYGZus{}options}\PYG{p}{)}
+\end{Verbatim}
+
+The complete code is available {\hyperref[tutorials/new_tutorial/multiprocessing_code:using-ruffus-drmaa-wrapper]{\emph{here}}}
+\end{quote}
+\begin{itemize}
+\item {}
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}} is a convenience wrapper around the \href{https://github.com/drmaa-python/drmaa-python}{python drmaa bindings}
+\href{http://drmaa-python.readthedocs.org/en/latest/tutorials.html\#waiting-for-a-job}{RunJob} function.
+It takes care of writing drmaa \emph{job templates} for you.
+
+\item {}
+Each call creates a separate drmaa \emph{job template}.
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{4) Use multithread: \emph{pipeline\_run(multithread = NNN)}}
+\label{tutorials/new_tutorial/multiprocessing:use-multithread-pipeline-run-multithread-nnn}\begin{quote}
+
+\begin{notice}{warning}{Warning:}
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}}
+\begin{quote}
+
+\textbf{requires} \code{pipeline\_run} {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{(multithread = NNN)}}}
+
+\textbf{and will not work with} \code{pipeline\_run} {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{(multiprocess = NNN)}}}
+\end{quote}
+\end{notice}
+\begin{description}
+\item[{Using multithreading rather than multiprocessing}] \leavevmode\begin{itemize}
+\item {}
+allows the drmaa session to be shared
+
+\item {}
+prevents ``processing storms'' which lock up the queue submission node when hundreds or thousands of grid engine / cluster commands complete at the same time.
+
+\end{itemize}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run} \PYG{p}{(}\PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{,} \PYG{n}{multithread} \PYG{o}{=} \PYG{n}{NNN}\PYG{p}{,} \PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{)}
+\end{Verbatim}
+
+or if you are using ruffus.cmdline:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{run} \PYG{p}{(}\PYG{n}{options}\PYG{p}{,} \PYG{n}{multithread} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{jobs}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+Normally multithreading reduces the amount of parallelism in python due to the python \href{http://en.wikipedia.org/wiki/Global\_Interpreter\_Lock}{Global interpreter Lock (GIL)}.
+However, as the work load is almost entirely on another computer (i.e. a cluster / grid engine node) with a separate python interpreter, any cost benefit calculations of this sort are moot.
+\end{quote}
+
+
+\subsubsection{5) Develop locally}
+\label{tutorials/new_tutorial/multiprocessing:develop-locally}\begin{quote}
+
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}} provides two convenience parameters for developing grid engine pipelines:
+\begin{itemize}
+\item {}
+commands can run locally, i.e. on the local machine rather than on cluster nodes:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{n}{cmd\PYGZus{}str}\PYG{p}{,} \PYG{n}{run\PYGZus{}locally} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\item {}
+Output files can be \href{http://en.wikipedia.org/wiki/Touch\_(Unix)}{touch}ed, i.e. given the appearance of the work having being done without actually running the commands
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{n}{cmd\PYGZus{}str}\PYG{p}{,} \PYG{n}{touch\PYGZus{}only} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\end{itemize}
+\end{quote}
+
+\index{pipeline\_run touch mode!Tutorial}\index{Tutorial!pipeline\_run touch mode}\index{touch mode pipeline\_run!Tutorial}\index{Tutorial!touch mode pipeline\_run}
+
+\subsection{Forcing a pipeline to appear up to date}
+\label{tutorials/new_tutorial/multiprocessing:index-4}\label{tutorials/new_tutorial/multiprocessing:forcing-a-pipeline-to-appear-up-to-date}\label{tutorials/new_tutorial/multiprocessing:new-manual-pipeline-run-touch}\begin{quote}
+
+Sometimes, we \emph{know} that a pipeline has run to completion, that everything is up-to-date. However, Ruffus still insists on the basis
+of file modification times that you need to rerun.
+
+For example, sometimes a trivial accounting modification needs to be made to a data file.
+Even though you know that this changes nothing in practice, Ruffus will detect the modification and
+ask to rerun everything from that point forwards.
+
+One way to convince Ruffus that everything is fine is to manually \href{http://en.wikipedia.org/wiki/Touch\_(Unix)}{touch}
+all subsequent data files one by one in sequence so that the file timestamps follow the appropriate progression.
+
+You can also ask \emph{Ruffus} to do this automatically for you by running the pipeline in \href{http://en.wikipedia.org/wiki/Touch\_(Unix)}{touch}
+mode:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(} \PYG{n}{touch\PYGZus{}files\PYGZus{}only} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}} will run your pipeline script normally working backwards from any specified final target, or else the
+last task in the pipeline. It works out where it should begin running, i.e. with the first out-of-date data files.
+After that point, instead of calling your pipeline task functions, each missing or out-of-date file is
+\href{http://en.wikipedia.org/wiki/Touch\_(Unix)}{touch-ed} in turn so that the file modification dates
+follow on successively.
+
+This turns out to be useful way to check that your pipeline runs correctly by creating a series of dummy (empty files).
+However, \emph{Ruffus} does not know how to read your mind to know which files to create from {\hyperref[decorators/split:decorators-split]{\emph{@split}}} or
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} tasks.
+
+Using {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{ruffus.cmdline}}} from version 2.4, you can just specify:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+your script \PYGZhy{}\PYGZhy{}touch\PYGZus{}files\PYGZus{}only \PYG{o}{[}\PYGZhy{}\PYGZhy{}other\PYGZus{}options\PYGZus{}of\PYGZus{}your\PYGZus{}own\PYGZus{}etc\PYG{o}{]}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{logging!Tutorial}\index{Tutorial!logging}
+
+\section{\textbf{Chapter 15}: Logging progress through a pipeline}
+\label{tutorials/new_tutorial/logging:new-manual-logging-chapter-num-logging-progress-through-a-pipeline}\label{tutorials/new_tutorial/logging:index-0}\label{tutorials/new_tutorial/logging::doc}\label{tutorials/new_tutorial/logging:new-manual-logging}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the {\hyperref[tutorials/new_tutorial/logging_code:new-manual-logging-code]{\emph{example code}}}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/logging:overview}\begin{quote}
+
+There are two parts to logging with \textbf{Ruffus}:
+\begin{itemize}
+\item {}
+Logging progress through the pipeline
+\begin{quote}
+
+This produces the sort of output displayed in this manual:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{Task = parallel\PYGZus{}io\PYGZus{}task}
+\PYG{g+go}{ Job = [\PYGZdq{}a.1\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}a.2\PYGZdq{}, \PYGZdq{}A file\PYGZdq{}] completed}
+\PYG{g+go}{ Job = [\PYGZdq{}b.1\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}b.2\PYGZdq{}, \PYGZdq{}B file\PYGZdq{}] unnecessary: already up to date}
+\PYG{g+go}{Completed Task = parallel\PYGZus{}io\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+
+\item {}
+Logging your own messages from within your pipelined functions.
+\begin{quote}
+
+Because \textbf{Ruffus} may run each task function in separate process on a separate
+CPU (multiprocessing), some attention has to be paid to how to send and
+synchronise your log messages across process boundaries.
+\end{quote}
+
+\end{itemize}
+
+We shall deal with these in turn.
+\end{quote}
+
+
+\subsection{Logging task/job completion}
+\label{tutorials/new_tutorial/logging:new-manual-logging-pipeline}\label{tutorials/new_tutorial/logging:logging-task-job-completion}\begin{quote}
+
+By default, \emph{Ruffus} logs each task and each job as it is completed to
+\href{http://docs.python.org/2/library/sys.html\#sys.stderr}{sys.stderr}.
+
+By default, Ruffus logs to \code{STDERR}: {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(logger = stderr\_logger)}}}.
+
+If you want to turn off all tracking messages as the pipeline runs, apart from setting \code{verbose = 0}, you
+can also use the aptly named Ruffus \code{black\_hole\_logger}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{logger} \PYG{o}{=} \PYG{n}{black\PYGZus{}hole\PYGZus{}logger}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{pipeline\_run verbosity!Tutorial}\index{Tutorial!pipeline\_run verbosity}
+
+\subsubsection{Controlling logging verbosity}
+\label{tutorials/new_tutorial/logging:controlling-logging-verbosity}\label{tutorials/new_tutorial/logging:index-1}\begin{quote}
+
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}} currently has five levels of verbosity, set by the optional \code{verbose}
+parameter which defaults to 1:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+verbose = 0: nothing
+verbose = 1: logs completed jobs/tasks;
+verbose = 2: logs up to date jobs in incomplete tasks
+verbose = 3: logs reason for running job
+verbose = 4: logs messages useful only for debugging ruffus pipeline code
+\end{Verbatim}
+
+\code{verbose} \textgreater{} \code{5} are intended for debugging \textbf{Ruffus} by the developers and the details
+are liable to change from release to release
+\end{quote}
+\end{quote}
+
+\index{logging with ruffus.cmdline!Tutorial}\index{Tutorial!logging with ruffus.cmdline}
+
+\subsection{Use \emph{ruffus.cmdline}}
+\label{tutorials/new_tutorial/logging:use-ruffus-cmdline}\label{tutorials/new_tutorial/logging:index-2}\begin{quote}
+
+As always, it is easiest to use {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{ruffus.cmdline}}}.
+
+Set your script to
+\begin{quote}
+\begin{itemize}
+\item {}
+write messages to \code{STDERR} with the \code{-{-}verbose} option and
+
+\item {}
+to a log file with the \code{-{-}log\_file} option.
+
+\end{itemize}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+ \PYG{c}{\PYGZsh{} Python logger which can be synchronised across concurrent Ruffus tasks}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+ \PYG{n+nd}{@transform}\PYG{p}{(} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{job1.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+ \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{logger}\PYG{o}{=}\PYG{n}{logger}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{logging customising!Tutorial}\index{Tutorial!logging customising}
+
+\subsection{Customising logging}
+\label{tutorials/new_tutorial/logging:customising-logging}\label{tutorials/new_tutorial/logging:index-3}\begin{quote}
+
+You can also specify exactly how logging works by providing a \href{http://docs.python.org/library/logging.html}{logging} object
+to {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}} .
+This log object should have \code{debug()} and \code{info()} methods.
+
+Instead of writing your own, it is usually more convenient to use the python
+\href{http://docs.python.org/library/logging.html}{logging}
+module which provides logging classes with rich functionality.
+
+The {\hyperref[tutorials/new_tutorial/logging_code:new-manual-logging-code]{\emph{example code}}} sets up a logger to a rotating set of files
+\end{quote}
+
+\index{logging your own message!Tutorial}\index{Tutorial!logging your own message}
+
+\subsection{Log your own messages}
+\label{tutorials/new_tutorial/logging:log-your-own-messages}\label{tutorials/new_tutorial/logging:index-4}\label{tutorials/new_tutorial/logging:new-manual-logging-per-job}\begin{quote}
+
+You need to take a little care when logging your custom messages \emph{within} your pipeline.
+\begin{itemize}
+\item {}
+If your Ruffus pipeline may run in parallel, make sure that logging is synchronised.
+
+\item {}
+If your Ruffus pipeline may run across separate processes, send your logging object across process boundaries.
+
+\end{itemize}
+
+\href{http://docs.python.org/library/logging.html}{logging} objects can not be
+\href{http://docs.python.org/library/pickle.html}{pickled} and shared naively across
+processes. Instead, we need to create proxies which forward the logging to a single
+shared log.
+
+The {\hyperref[proxy_logger:proxy-logger]{\emph{ruffus.proxy\_logger}}} module provides an easy way to share
+\href{http://docs.python.org/library/logging.html}{logging} objects among
+jobs. This requires just two simple steps:
+\end{quote}
+
+\begin{notice}{note}{Note:}\begin{itemize}
+\item {}
+This is a good template for sharing \href{http://docs.python.org/2/library/pickle.html\#what-can-be-pickled-and-unpickled}{non-picklable objects}
+across processes.
+
+\end{itemize}
+\end{notice}
+
+
+\subsubsection{1. Set up logging}
+\label{tutorials/new_tutorial/logging:new-manual-sharing-proxy-object}\label{tutorials/new_tutorial/logging:set-up-logging}\begin{quote}
+
+Things are easiest if you are using \code{ruffus.cmdline}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} standard python logger which can be synchronised across concurrent Ruffus tasks}
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+\end{Verbatim}
+
+Otherwise, manually:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.proxy\PYGZus{}logger} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{p}{(}\PYG{n}{logger}\PYG{p}{,}
+ \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)} \PYG{o}{=} \PYG{n}{make\PYGZus{}shared\PYGZus{}logger\PYGZus{}and\PYGZus{}proxy} \PYG{p}{(}\PYG{n}{setup\PYGZus{}std\PYGZus{}shared\PYGZus{}logger}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}logger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{file\PYGZus{}name}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/my/lg.log}\PYG{l+s}{\PYGZdq{}}\PYG{p}{\PYGZcb{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{2. Share the proxy}
+\label{tutorials/new_tutorial/logging:share-the-proxy}\begin{quote}
+
+Now, pass:
+\begin{itemize}
+\item {}
+\code{logger} (which forwards logging calls across jobs) and
+
+\item {}
+\code{logging\_mutex} (which prevents different jobs which are logging simultaneously
+from being jumbled up)
+
+\end{itemize}
+
+to each job:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{initial\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} pass log and synchronisation as parameters}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:} \PYG{c}{\PYGZsh{} pass log and synchronisation as parameters}
+ \PYG{k}{pass}
+
+ \PYG{c}{\PYGZsh{} synchronise logging}
+ \PYG{k}{with} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{info}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Here we go logging...}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\index{@subdivide!Tutorial}\index{Tutorial!@subdivide}\index{@collate!Tutorial}\index{Tutorial!@collate}
+
+\section{\textbf{Chapter 16}: \emph{@subdivide} tasks to run efficiently and regroup with \emph{@collate}}
+\label{tutorials/new_tutorial/subdivide_collate:new-manual-subdivide-collate-chapter-num-subdivide-tasks-to-run-efficiently-and-regroup-with-collate}\label{tutorials/new_tutorial/subdivide_collate:index-0}\label{tutorials/new_tutorial/subdivide_collate::doc}\label{tutorials/new_tutorial/subdivide_collate:new-manual-subdivide-collate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} syntax
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} syntax
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/subdivide_collate:overview}\begin{quote}
+
+In \textbf{Chapter 12} and \textbf{Chapter 13}, we saw how a large
+task can be {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{@split}}} into small jobs to be analysed efficiently
+in parallel. Ruffus can then {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{@merge}}} these back together
+to give a single, unified result.
+
+This assumes that your pipeline is processing one item at a time. Usually, however, we
+will have, for example, 10 large pieces of data in play, each of which has to be
+subdivided into smaller pieces for analysis before being put back together.
+
+This is the role of {\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} and {\hyperref[decorators/collate:decorators-collate]{\emph{@subdivide}}}.
+
+Like {\hyperref[decorators/split:decorators-split]{\emph{@split}}}, the number of output files
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} produces for \emph{each} \textbf{Input} is not predetermined.
+
+On the other hand, these output files should be named in such a way that they can
+later be grouped back together later using {\hyperref[decorators/collate:decorators-collate]{\emph{@subdivide}}}.
+
+This will be clearer with some worked examples.
+\end{quote}
+
+
+\subsection{\emph{@subdivide} in parallel}
+\label{tutorials/new_tutorial/subdivide_collate:new-manual-subdivide}\label{tutorials/new_tutorial/subdivide_collate:subdivide-in-parallel}\begin{quote}
+
+Let us start from 3 files with varying number of lines. We wish to process these two
+lines at a time but we do not know ahead of time how long each file is:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{random}\PYG{o}{,} \PYG{n+nn}{sys}
+
+\PYG{c}{\PYGZsh{} Create files a random number of lines}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}test\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}lines} \PYG{o}{=} \PYG{n}{random}\PYG{o}{.}\PYG{n}{randint}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{3}\PYG{p}{)} \PYG{o}{*} \PYG{l+m+mi}{2}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{ii} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{cnt\PYGZus{}lines}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{oo}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{data item = }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{ii}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ has }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ lines}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}lines}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} subdivide the input files into NNN fragment files of 2 lines each}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@subdivide}\PYG{p}{(} \PYG{n}{create\PYGZus{}test\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}.*.fragment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{subdivide\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name\PYGZus{}stem}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} cleanup any previous results}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{oo} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{oo}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Output files contain two lines each}
+ \PYG{c}{\PYGZsh{} (new output files every even line)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{cnt\PYGZus{}output\PYGZus{}files} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{ii}\PYG{p}{,} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{enumerate}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{ii} \PYG{o}{\PYGZpc{}} \PYG{l+m+mi}{2} \PYG{o}{==} \PYG{l+m+mi}{0}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}output\PYGZus{}files} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{output\PYGZus{}file\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{.}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.fragment}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name\PYGZus{}stem}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}output\PYGZus{}files}\PYG{p}{)}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Subdivide }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Analyse each fragment independently}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{subdivide\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.fragment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.analysed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{analyse\PYGZus{}fragments}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Analysing }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{oo}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{analysed }\PYG{l+s}{\PYGZdq{}} \PYG{o}{+} \PYG{n}{line}\PYG{p}{)}
+\end{Verbatim}
+
+This produces the following output:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+\PYG{g+go}{ a.start has 2 lines}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.start] completed}
+\PYG{g+go}{ b.start has 6 lines}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.start] completed}
+\PYG{g+go}{ c.start has 6 lines}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} c.start] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}test\PYGZus{}files}
+
+\PYG{g+go}{ Subdivide a.start \PYGZhy{}\PYGZgt{} /home/lg/temp/a.1.fragment}
+\PYG{g+go}{ Job = [a.start \PYGZhy{}\PYGZgt{} a.*.fragment, a] completed}
+
+\PYG{g+go}{ Subdivide b.start \PYGZhy{}\PYGZgt{} /home/lg/temp/b.1.fragment}
+\PYG{g+go}{ Subdivide b.start \PYGZhy{}\PYGZgt{} /home/lg/temp/b.2.fragment}
+\PYG{g+go}{ Subdivide b.start \PYGZhy{}\PYGZgt{} /home/lg/temp/b.3.fragment}
+\PYG{g+go}{ Job = [b.start \PYGZhy{}\PYGZgt{} b.*.fragment, b] completed}
+
+\PYG{g+go}{ Subdivide c.start \PYGZhy{}\PYGZgt{} /home/lg/temp/c.1.fragment}
+\PYG{g+go}{ Subdivide c.start \PYGZhy{}\PYGZgt{} /home/lg/temp/c.2.fragment}
+\PYG{g+go}{ Subdivide c.start \PYGZhy{}\PYGZgt{} /home/lg/temp/c.3.fragment}
+\PYG{g+go}{ Job = [c.start \PYGZhy{}\PYGZgt{} c.*.fragment, c] completed}
+
+\PYG{g+go}{Completed Task = subdivide\PYGZus{}files}
+
+\PYG{g+go}{ Analysing /home/lg/temp/a.1.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/a.1.analysed}
+\PYG{g+go}{ Job = [a.1.fragment \PYGZhy{}\PYGZgt{} a.1.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/b.1.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/b.1.analysed}
+\PYG{g+go}{ Job = [b.1.fragment \PYGZhy{}\PYGZgt{} b.1.analysed] completed}
+
+\PYG{g+go}{ [ ...SEE EXAMPLE CODE FOR MORE LINES ...]}
+
+\PYG{g+go}{Completed Task = analyse\PYGZus{}fragments}
+\end{Verbatim}
+
+\code{a.start} has two lines and results in a single \code{.fragment} file,
+while there are 3 \code{b.*.fragment} files because it has 6 lines.
+Whatever their origin, all of the different fragment files are treated equally
+in \code{analyse\_fragments()} and processed (in parallel) in the same way.
+\end{quote}
+
+
+\subsection{Grouping using \emph{@collate}}
+\label{tutorials/new_tutorial/subdivide_collate:new-manual-collate}\label{tutorials/new_tutorial/subdivide_collate:grouping-using-collate}\begin{quote}
+
+All that is left in our example is to reassemble the analysed fragments back together into
+3 sets of results corresponding to the original 3 pieces of starting data.
+
+This is straightforward by eye: the file names all have the same pattern: \code{{[}abc{]}.*.analysed}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+a.1.analysed -\textgreater{} a.final\_result
+b.1.analysed -\textgreater{} b.final\_result
+b.2.analysed -\textgreater{} ..
+b.3.analysed -\textgreater{} ..
+c.1.analysed -\textgreater{} c.final\_result
+c.2.analysed -\textgreater{} ..
+\end{Verbatim}
+\end{quote}
+
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} does something similar:
+\begin{quote}
+\begin{enumerate}
+\item {}
+Specify a string substitution e.g. \code{c.??.analysed -\textgreater{} c.final\_result} and
+
+\item {}
+Ask \emph{ruffus} to group together any \textbf{Input} (e.g. \code{c.1.analysed}, \code{c.2.analysed})
+that will result in the same \textbf{Output} (e.g. \code{c.final\_result})
+
+\end{enumerate}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} {}`{}`XXX.??.analysed \PYGZhy{}\PYGZgt{} XXX.final\PYGZus{}result{}`{}`}
+\PYG{c}{\PYGZsh{} Group results using original names}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@collate}\PYG{p}{(} \PYG{n}{analyse\PYGZus{}fragments}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} split file name into [abc].NUMBER.analysed}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/(?P\PYGZlt{}NAME\PYGZgt{}[abc]+)}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.analysed\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}NAME[0]\PYGZcb{}.final\PYGZus{}result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{recombine\PYGZus{}analyses}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Recombine }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{oo}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+\end{Verbatim}
+
+This produces the following output:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{ Recombine /home/lg/temp/a.1.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/a.final\PYGZus{}result}
+\PYG{g+go}{ Job = [[a.1.analysed] \PYGZhy{}\PYGZgt{} a.final\PYGZus{}result] completed}
+\PYG{g+go}{ Recombine /home/lg/temp/b.1.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/b.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/b.2.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/b.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/b.3.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/b.final\PYGZus{}result}
+\PYG{g+go}{ Job = [[b.1.analysed, b.2.analysed, b.3.analysed] \PYGZhy{}\PYGZgt{} b.final\PYGZus{}result] completed}
+\PYG{g+go}{ Recombine /home/lg/temp/c.1.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/c.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/c.2.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/c.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/c.3.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/c.final\PYGZus{}result}
+\PYG{g+go}{ Job = [[c.1.analysed, c.2.analysed, c.3.analysed] \PYGZhy{}\PYGZgt{} c.final\PYGZus{}result] completed}
+\PYG{g+go}{Completed Task = recombine\PYGZus{}analyses}
+\end{Verbatim}
+\end{quote}
+
+\begin{notice}{warning}{Warning:}\begin{itemize}
+\item {}
+\textbf{Input} file names are grouped together not in a guaranteed order.
+\begin{quote}
+
+For example, the fragment files may not be sent to \code{recombine\_analyses(input\_file\_names, ...)}
+in alphabetically or any other useful order.
+
+You may want to sort \textbf{Input} before concatenation.
+\end{quote}
+
+\item {}
+All \textbf{Input} are grouped together if they have both the same \textbf{Output} \emph{and} \textbf{Extra}
+parameters. If any string substitution is specified in any of the other \textbf{Extra} parameters
+to {\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}, they must give the same answers for \textbf{Input}
+in the same group.
+
+\end{itemize}
+\end{notice}
+\end{quote}
+
+\index{combinatorics!Tutorial}\index{Tutorial!combinatorics}
+
+\section{\textbf{Chapter 17}: \emph{@combinations}, \emph{@permutations} and all versus all \emph{@product}}
+\label{tutorials/new_tutorial/combinatorics:index-0}\label{tutorials/new_tutorial/combinatorics:new-manual-combinatorics}\label{tutorials/new_tutorial/combinatorics::doc}\label{tutorials/new_tutorial/combinatorics:new-manual-combinatorics-chapter-num-combinations-permutations-and-all-versus-all-product}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}}
+
+\item {}
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}}
+
+\item {}
+{\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}}
+
+\item {}
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}}
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/combinatorics_code:new-manual-combinatorics-code]{\emph{Chapter 17: Python Code for @combinations, @permutations and all versus all @product}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/combinatorics:overview}\begin{quote}
+
+A surprising number of computational problems involve some sort of all versus all calculations.
+Previously, this would have required all the parameters to be supplied using a custom function
+on the fly with {\hyperref[decorators/files_ex:decorators-files-on-the-fly]{\emph{@files}}}.
+
+From version 2.4, \emph{Ruffus} supports {\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}},
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}}, {\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}},
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}}.
+
+These provide as far as possible all the functionality of the four combinatorics iterators
+from the standard python \href{http://docs.python.org/2/library/itertools.html}{itertools}
+functions of the same name.
+\end{quote}
+
+
+\subsection{Generating output with \emph{formatter()}}
+\label{tutorials/new_tutorial/combinatorics:generating-output-with-formatter}\begin{quote}
+
+String replacement always takes place via {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}}. Unfortunately,
+the other \emph{Ruffus} workhorses of {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} and {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}}
+do not have sufficient syntactic flexibility.
+
+Each combinatorics decorator deals with multiple sets of inputs whether this might be:
+\begin{itemize}
+\item {}
+a self-self comparison (such as {\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}},
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}}, {\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}}) or,
+
+\item {}
+a self-other comparison ({\hyperref[decorators/product:decorators-product]{\emph{@product}}})
+
+\end{itemize}
+
+The replacement strings thus require an extra level of indirection to refer to
+parsed components.
+\begin{enumerate}
+\item {}
+The first level refers to which \emph{set} of inputs.
+
+\item {}
+The second level refers to which input file in any particular \emph{set} of inputs.
+
+\end{enumerate}
+
+For example, if the \emph{inputs} are \textbf{{[}A1,A2{]},{[}B1,B2{]},{[}C1,C2{]} vs {[}P1,P2{]},{[}Q1,Q2{]},{[}R1,R2{]} vs {[}X1,X2{]},{[}Y1,Y2{]},{[}Z1,Z2{]}},
+then \code{'\{basename{[}2{]}{[}0{]}\}'} is the \href{http://docs.python.org/2/library/os.path.html\#os.path.basename}{basename} for
+\begin{itemize}
+\item {}
+the third set of inputs (\textbf{X,Y,Z}) and
+
+\item {}
+the first file name string in each \textbf{Input} of that set (\textbf{X1, Y1, Z1})
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{All vs all comparisons with \emph{@product}}
+\label{tutorials/new_tutorial/combinatorics:all-vs-all-comparisons-with-product}\label{tutorials/new_tutorial/combinatorics:new-manual-product}\begin{quote}
+
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}} generates the Cartesian \textbf{product} between sets of input files,
+i.e. all vs all comparisons.
+
+The effect is analogous to a nested for loop.
+
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}} can be useful, for example, in bioinformatics for finding
+the corresponding genes (orthologues) for a set of proteins in multiple species.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{product}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} product(\PYGZsq{}ABC\PYGZsq{}, \PYGZsq{}XYZ\PYGZsq{}) \PYGZhy{}\PYGZhy{}\PYGZgt{} AX AY AZ BX BY BZ CX CY CZ}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{product}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ABC}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{XYZ}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}AX\PYGZsq{}, \PYGZsq{}AY\PYGZsq{}, \PYGZsq{}AZ\PYGZsq{}, \PYGZsq{}BX\PYGZsq{}, \PYGZsq{}BY\PYGZsq{}, \PYGZsq{}BZ\PYGZsq{}, \PYGZsq{}CX\PYGZsq{}, \PYGZsq{}CY\PYGZsq{}, \PYGZsq{}CZ\PYGZsq{}]}
+\end{Verbatim}
+
+This example Calculates the \textbf{@product} of \textbf{A,B} and \textbf{P,Q} and \textbf{X,Y} files
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Three sets of initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ab}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{p.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{q.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}pq}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{x.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{x.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{y.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{y.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}xy}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @product}
+\PYG{n+nd}{@product}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ab}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 1}
+
+ \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}pq}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 2}
+
+ \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}xy}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 3}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}.product}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{}}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extra parameter: basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{product\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{} basenames = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}file}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZsh{} basenames = a p x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a p y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a q x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a q y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b p x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b p y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b q x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b q y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}y.product}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Permute all k-tuple orderings of inputs without repeats using \emph{@permutations}}
+\label{tutorials/new_tutorial/combinatorics:permute-all-k-tuple-orderings-of-inputs-without-repeats-using-permutations}\label{tutorials/new_tutorial/combinatorics:new-manual-permutations}\begin{quote}
+\begin{description}
+\item[{Generates the \textbf{permutations} for all the elements of a set of \textbf{Input} (e.g. \textbf{A B C D}),}] \leavevmode\begin{itemize}
+\item {}
+r-length tuples of \emph{input} elements
+
+\item {}
+excluding repeated elements (\textbf{A A})
+
+\item {}
+and order of the tuples is significant (both \textbf{A B} and \textbf{B A}).
+
+\end{itemize}
+
+\end{description}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{permutations}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} permutations(\PYGZsq{}ABCD\PYGZsq{}, 2) \PYGZhy{}\PYGZhy{}\PYGZgt{} AB AC AD BA BC BD CA CB CD DA DB DC}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{permutations}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ABCD}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}AB\PYGZsq{}, \PYGZsq{}AC\PYGZsq{}, \PYGZsq{}AD\PYGZsq{}, \PYGZsq{}BA\PYGZsq{}, \PYGZsq{}BC\PYGZsq{}, \PYGZsq{}BD\PYGZsq{}, \PYGZsq{}CA\PYGZsq{}, \PYGZsq{}CB\PYGZsq{}, \PYGZsq{}CD\PYGZsq{}, \PYGZsq{}DA\PYGZsq{}, \PYGZsq{}DB\PYGZsq{}, \PYGZsq{}DC\PYGZsq{}]}
+\end{Verbatim}
+
+This following example calculates the \textbf{@permutations} of \textbf{A,B,C,D} files
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @permutations}
+\PYG{n+nd}{@permutations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 2 at a time}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}.permutations}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{permutations\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+go}{A \PYGZhy{} B}
+\PYG{g+go}{A \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} A}
+\PYG{g+go}{B \PYGZhy{} C}
+\PYG{g+go}{B \PYGZhy{} D}
+\PYG{g+go}{C \PYGZhy{} A}
+\PYG{g+go}{C \PYGZhy{} B}
+\PYG{g+go}{C \PYGZhy{} D}
+\PYG{g+go}{D \PYGZhy{} A}
+\PYG{g+go}{D \PYGZhy{} B}
+\PYG{g+go}{D \PYGZhy{} C}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Select unordered k-tuples within inputs excluding repeated elements using \emph{@combinations}}
+\label{tutorials/new_tutorial/combinatorics:new-manual-combinations}\label{tutorials/new_tutorial/combinatorics:select-unordered-k-tuples-within-inputs-excluding-repeated-elements-using-combinations}\begin{quote}
+\begin{description}
+\item[{Generates the \textbf{combinations} for all the elements of a set of \textbf{Input} (e.g. \textbf{A B C D}),}] \leavevmode\begin{itemize}
+\item {}
+r-length tuples of \emph{input} elements
+
+\item {}
+without repeated elements (\textbf{A A})
+
+\item {}
+where order of the tuples is irrelevant (either \textbf{A B} or \textbf{B A}, not both).
+
+\end{itemize}
+
+\end{description}
+
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}} can be useful, for example, in calculating a transition probability matrix
+for a set of states. The diagonals are meaningless ``self-self'' transitions which are excluded.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{combinations}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} combinations(\PYGZsq{}ABCD\PYGZsq{}, 3) \PYGZhy{}\PYGZhy{}\PYGZgt{} ABC ABD ACD BCD}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{combinations}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ABCD}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}ABC\PYGZsq{}, \PYGZsq{}ABD\PYGZsq{}, \PYGZsq{}ACD\PYGZsq{}, \PYGZsq{}BCD\PYGZsq{}]}
+\end{Verbatim}
+
+This example calculates the \textbf{@combinations} of \textbf{A,B,C,D} files
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @combinations}
+\PYG{n+nd}{@combinations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 3 at a time}
+ \PYG{l+m+mi}{3}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][1]\PYGZcb{}.combinations}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combinations\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{A \PYGZhy{} B \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} B \PYGZhy{} D}
+\PYG{g+go}{A \PYGZhy{} C \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} C \PYGZhy{} D}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Select unordered k-tuples within inputs \emph{including} repeated elements with \emph{@combinations\_with\_replacement}}
+\label{tutorials/new_tutorial/combinatorics:select-unordered-k-tuples-within-inputs-including-repeated-elements-with-combinations-with-replacement}\label{tutorials/new_tutorial/combinatorics:new-manual-combinations-with-replacement}\begin{quote}
+\begin{description}
+\item[{Generates the \textbf{combinations\_with\_replacement} for all the elements of a set of \textbf{Input} (e.g. \textbf{A B C D}),}] \leavevmode\begin{itemize}
+\item {}
+r-length tuples of \emph{input} elements
+
+\item {}
+including repeated elements (\textbf{A A})
+
+\item {}
+where order of the tuples is irrelevant (either \textbf{A B} or \textbf{B A}, not both).
+
+\end{itemize}
+
+\end{description}
+
+{\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}} can be useful,
+for example, in bioinformatics for finding evolutionary relationships between genetic elements such as proteins
+and genes. Self-self comparisons can be used a baseline for scaling similarity scores.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{combinations\PYGZus{}with\PYGZus{}replacement}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} combinations\PYGZus{}with\PYGZus{}replacement(\PYGZsq{}ABCD\PYGZsq{}, 2) \PYGZhy{}\PYGZhy{}\PYGZgt{} AA AB AC AD BB BC BD CC CD DD}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{combinations\PYGZus{}with\PYGZus{}replacement}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ABCD}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}AA\PYGZsq{}, \PYGZsq{}AB\PYGZsq{}, \PYGZsq{}AC\PYGZsq{}, \PYGZsq{}AD\PYGZsq{}, \PYGZsq{}BB\PYGZsq{}, \PYGZsq{}BC\PYGZsq{}, \PYGZsq{}BD\PYGZsq{}, \PYGZsq{}CC\PYGZsq{}, \PYGZsq{}CD\PYGZsq{}, \PYGZsq{}DD\PYGZsq{}]}
+\end{Verbatim}
+
+This example calculates the \textbf{@combinations\_with\_replacement} of \textbf{A,B,C,D} files
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @combinations\PYGZus{}with\PYGZus{}replacement}
+\PYG{n+nd}{@combinations\PYGZus{}with\PYGZus{}replacement}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 2 at a time}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}.combinations\PYGZus{}with\PYGZus{}replacement}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combinations\PYGZus{}with\PYGZus{}replacement\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{A \PYGZhy{} A}
+\PYG{g+go}{A \PYGZhy{} B}
+\PYG{g+go}{A \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} B}
+\PYG{g+go}{B \PYGZhy{} C}
+\PYG{g+go}{B \PYGZhy{} D}
+\PYG{g+go}{C \PYGZhy{} C}
+\PYG{g+go}{C \PYGZhy{} D}
+\PYG{g+go}{D \PYGZhy{} D}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{@active\_if!Tutorial}\index{Tutorial!@active\_if}
+
+\section{\textbf{Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph{@active\_if}}
+\label{tutorials/new_tutorial/active_if:new-manual-active-if}\label{tutorials/new_tutorial/active_if:index-0}\label{tutorials/new_tutorial/active_if::doc}\label{tutorials/new_tutorial/active_if:new-manual-active-if-chapter-num-turning-parts-of-the-pipeline-on-and-off-at-runtime-with-active-if}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/active_if:decorators-active-if]{\emph{@active\_if syntax in detail}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/active_if:overview}\begin{quote}
+
+It is sometimes useful to be able to switch on and off parts of a pipeline. For example, a pipeline
+might have two difference code paths depending on the type of data it is being asked to analyse.
+
+One surprisingly easy way to do this is to use a python \code{if} statement around particular task functions:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+ \PYG{n}{run\PYGZus{}task1} \PYG{o}{=} \PYG{n+nb+bp}{True}
+
+ \PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.foo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.foo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{create\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+ \PYG{k}{if} \PYG{n}{run\PYGZus{}task1}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} might not run}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{foobar}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{foobar}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{wrap\PYGZus{}up}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+ \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\begin{description}
+\item[{This simple solution has a number of drawbacks:}] \leavevmode\begin{enumerate}
+\item {}
+The on/off decision is a one off event that happens when the script is loaded. Ideally, we
+would like some flexibility, and postpone the decision until \code{pipeline\_run()} is invoked.
+
+\item {}
+When \code{if} is false, the entire task function becomes invisible, and if there are any
+downstream tasks, as in the above example, \emph{Ruffus} will complain loudly about
+missing dependencies.
+
+\end{enumerate}
+
+\end{description}
+\end{quote}
+
+
+\subsection{\emph{@active\_if} controls the state of tasks}
+\label{tutorials/new_tutorial/active_if:active-if-controls-the-state-of-tasks}\begin{quote}
+\begin{itemize}
+\item {}
+Switches tasks on and off at run time depending on its parameters
+
+\item {}
+Evaluated each time \code{pipeline\_run}, \code{pipeline\_printout} or \code{pipeline\_printout\_graph} is called.
+
+\item {}
+Dormant tasks behave as if they are up to date and have no output.
+
+\end{itemize}
+
+The Design and initial implementation were contributed by Jacob Biesinger
+
+The following example shows its flexibility and syntax:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}1} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{False}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}3} \PYG{o}{=} \PYG{n+nb+bp}{True}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} task1}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.foo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.foo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}files}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ create\PYGZus{}files}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{outfile} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Only runs if all three run\PYGZus{}if\PYGZus{}true conditions are met}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} @active\PYGZus{}if determines if task is active}
+\PYG{n+nd}{@active\PYGZus{}if}\PYG{p}{(}\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}1}\PYG{p}{,} \PYG{k}{lambda}\PYG{p}{:} \PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2}\PYG{p}{)}
+\PYG{n+nd}{@active\PYGZus{}if}\PYG{p}{(}\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}3}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches off task because run\PYGZus{}if\PYGZus{}true\PYGZus{}2 == False}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches on task because all run\PYGZus{}if\PYGZus{}true conditions are met}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+The task starts off inactive:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches off task \PYGZdq{}this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive\PYGZdq{} because run\PYGZus{}if\PYGZus{}true\PYGZus{}2 == False}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+
+\PYG{g+go}{Task enters queue = create\PYGZus{}files}
+\PYG{g+go}{create\PYGZus{}files}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.foo] Missing file [a.foo]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.foo] Missing file [b.foo]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.foo] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.foo] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}files}
+\PYG{g+go}{Inactive Task = this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}
+\end{Verbatim}
+\end{quote}
+
+Now turn on the task:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches on task \PYGZdq{}this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive\PYGZdq{} because all run\PYGZus{}if\PYGZus{}true conditions are met}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+
+\PYG{g+go}{Task enters queue = this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}
+
+\PYG{g+go}{ Job = [a.foo \PYGZhy{}\PYGZgt{} a.bar] Missing file [a.bar]}
+\PYG{g+go}{ Job = [b.foo \PYGZhy{}\PYGZgt{} b.bar] Missing file [b.bar]}
+\PYG{g+go}{ Job = [a.foo \PYGZhy{}\PYGZgt{} a.bar] completed}
+\PYG{g+go}{ Job = [b.foo \PYGZhy{}\PYGZgt{} b.bar] completed}
+\PYG{g+go}{Completed Task = this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{posttask!Tutorial}\index{Tutorial!posttask}
+
+\section{\textbf{Chapter 19}: Signal the completion of each stage of our pipeline with \emph{@posttask}}
+\label{tutorials/new_tutorial/posttask:new-manual-posttask}\label{tutorials/new_tutorial/posttask:index-0}\label{tutorials/new_tutorial/posttask::doc}\label{tutorials/new_tutorial/posttask:new-manual-posttask-chapter-num-signal-the-completion-of-each-stage-of-our-pipeline-with-posttask}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} syntax
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/posttask:overview}\begin{quote}
+
+It is often useful to signal the completion of each task by specifying a specific
+action to be taken or function to be called. This can range from
+printing out some message, or \href{http://en.wikipedia.org/wiki/Touch\_(Unix)}{touching} some sentinel file,
+to emailing the author. This is particular useful if the {\hyperref[glossary:term-task]{\emph{task}}} is a recipe apply to an unspecified number
+of parameters in parallel in different {\hyperref[glossary:term-job]{\emph{job}}}s. If the task is never run, or if it
+fails, needless-to-say no task completion action will happen.
+
+\emph{Ruffus} uses the {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} decorator for this purpose.
+\end{quote}
+
+
+\subsubsection{\textbf{@posttask}}
+\label{tutorials/new_tutorial/posttask:posttask}\begin{quote}
+
+We can signal the completion of each task by specifying
+one or more function(s) using {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{k}{def} \PYG{n+nf}{task\PYGZus{}finished}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hooray}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{task\PYGZus{}finished}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+This is such a short function, we might as well write it in-line:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{k}{lambda}\PYG{p}{:} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hooray}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\begin{notice}{note}{Note:}
+The function(s) provided to {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} will be called if the pipeline passes
+through a task, even if none of its jobs are run because they are up-to-date.
+This happens when a upstream task is out-of-date, and the execution passes through
+this point in the pipeline. See the example in {\hyperref[tutorials/new_tutorial/dependencies:new-manual-dependencies]{\emph{Appendix 2: How dependency is checked}}}
+of this manual.
+\end{notice}
+
+\index{@posttask!touchfile (Manual)}\index{touchfile !@posttask (Manual)}
+
+\subsubsection{\emph{touch\_file}}
+\label{tutorials/new_tutorial/posttask:touch-file}\label{tutorials/new_tutorial/posttask:index-1}\label{tutorials/new_tutorial/posttask:new-manual-posttask-touch-file}\begin{quote}
+
+One way to note the completion of a task is to create some sort of
+``flag'' file. Each stage in a traditional \code{make} pipeline would contain a
+\code{touch completed.flag}.
+
+This is such a useful idiom that \emph{Ruffus} provides the shorthand {\hyperref[decorators/indicator_objects:decorators-touch-file]{\emph{touch\_file}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{touch\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task\PYGZus{}completed.flag}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Adding several post task actions}
+\label{tutorials/new_tutorial/posttask:adding-several-post-task-actions}\begin{quote}
+
+You can, of course, add more than one different action to be taken on completion of the
+task, either by stacking up as many {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} decorators
+as necessary, or by including several functions in the same {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{print\PYGZus{}hooray}\PYG{p}{,} \PYG{n}{print\PYGZus{}whoppee}\PYG{p}{)}
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{print\PYGZus{}hip\PYGZus{}hip}\PYG{p}{,} \PYG{n}{touch\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{sentinel\PYGZus{}flag}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{inputs!Tutorial}\index{Tutorial!inputs}\index{add\_inputs!Tutorial}\index{Tutorial!add\_inputs}\index{string substiution for inputs!Tutorial}\index{Tutorial!string substiution for inputs}
+
+\section{\textbf{Chapter 20}: Manipulating task inputs via string substitution using \emph{inputs()} and \emph{add\_inputs()}}
+\label{tutorials/new_tutorial/inputs:new-manual-inputs}\label{tutorials/new_tutorial/inputs:index-0}\label{tutorials/new_tutorial/inputs:new-manual-inputs-chapter-num-manipulating-task-inputs-via-string-substitution-using-inputs-and-add-inputs}\label{tutorials/new_tutorial/inputs::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs()}}} syntax
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/inputs_code:new-manual-inputs-code]{\emph{Chapter 20: Python Code for Manipulating task inputs via string substitution using inputs() and add\_inputs()}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/inputs:overview}\begin{quote}
+
+The previous chapters have been described how \emph{Ruffus} allows the \textbf{Output} names for each job
+to be generated from the \emph{Input} names via string substitution. This is how \emph{Ruffus} can
+automatically chain multiple tasks in a pipeline together seamlessly.
+
+Sometimes it is useful to be able to modify the \textbf{Input} by string substitution
+as well. There are two situations where this additional flexibility is needed:
+\begin{enumerate}
+\item {}
+You need to add additional prequisites or filenames to the \textbf{Input} of every single job
+
+\item {}
+You need to add additional \textbf{Input} file names which are some variant of the existing ones.
+
+\end{enumerate}
+
+Both will be much more obvious with some examples
+\end{quote}
+
+
+\subsection{Adding additional \emph{input} prerequisites per job with \emph{add\_inputs()}}
+\label{tutorials/new_tutorial/inputs:adding-additional-input-prerequisites-per-job-with-add-inputs}
+
+\subsubsection{1. Example: compiling c++ code}
+\label{tutorials/new_tutorial/inputs:example-compiling-c-code}\begin{quote}
+
+Let us first compile some c++ (\code{"*.cpp"}) files using plain {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} syntax:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{2. Example: Adding a common header file with \emph{add\_inputs()}}
+\label{tutorials/new_tutorial/inputs:example-adding-a-common-header-file-with-add-inputs}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} common (universal) header exists before our pipeline}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} make header files}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}matching\PYGZus{}headers}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add header to the input of every job}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add result of task create\PYGZus{}matching\PYGZus{}headers to the input of every job}
+ \PYG{n}{create\PYGZus{}matching\PYGZus{}headers}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+
+ \PYG{o}{\PYGZgt{}\PYGZgt{}}\PYG{o}{\PYGZgt{}} \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+ \PYG{n}{Job} \PYG{o}{=} \PYG{p}{[}\PYG{n}{hasty}\PYG{o}{.}\PYG{n}{cpp} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZgt{}} \PYG{n}{hasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{]} \PYG{n}{completed}
+ \PYG{n}{Job} \PYG{o}{=} \PYG{p}{[}\PYG{n}{messy}\PYG{o}{.}\PYG{n}{cpp} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZgt{}} \PYG{n}{messy}\PYG{o}{.}\PYG{n}{h}\PYG{p}{]} \PYG{n}{completed}
+ \PYG{n}{Job} \PYG{o}{=} \PYG{p}{[}\PYG{n}{tasty}\PYG{o}{.}\PYG{n}{cpp} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZgt{}} \PYG{n}{tasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{]} \PYG{n}{completed}
+ \PYG{n}{Completed} \PYG{n}{Task} \PYG{o}{=} \PYG{n}{create\PYGZus{}matching\PYGZus{}headers}
+ \PYG{n}{Job} \PYG{o}{=} \PYG{p}{[}\PYG{p}{[}\PYG{n}{hasty}\PYG{o}{.}\PYG{n}{cpp}\PYG{p}{,} \PYG{n}{universal}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{hasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{messy}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{tasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{]} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZgt{}} \PYG{n}{hasty}\PYG{o}{.}\PYG{n}{o}\PYG{p}{]} \PYG{n}{completed}
+ \PYG{n}{Job} \PYG{o}{=} \PYG{p}{[}\PYG{p}{[}\PYG{n}{messy}\PYG{o}{.}\PYG{n}{cpp}\PYG{p}{,} \PYG{n}{universal}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{hasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{messy}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{tasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{]} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZgt{}} \PYG{n}{messy}\PYG{o}{.}\PYG{n}{o}\PYG{p}{]} \PYG{n}{completed}
+ \PYG{n}{Job} \PYG{o}{=} \PYG{p}{[}\PYG{p}{[}\PYG{n}{tasty}\PYG{o}{.}\PYG{n}{cpp}\PYG{p}{,} \PYG{n}{universal}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{hasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{messy}\PYG{o}{.}\PYG{n}{h}\PYG{p}{,} \PYG{n}{tasty}\PYG{o}{.}\PYG{n}{h}\PYG{p}{]} \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZgt{}} \PYG{n}{tasty}\PYG{o}{.}\PYG{n}{o}\PYG{p}{]} \PYG{n}{completed}
+ \PYG{n}{Completed} \PYG{n}{Task} \PYG{o}{=} \PYG{n+nb}{compile}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{3. Example: Additional \emph{Input} can be tasks}
+\label{tutorials/new_tutorial/inputs:example-additional-input-can-be-tasks}\begin{quote}
+
+We can also add a task name to {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs()}}}.
+This chains the \textbf{Output}, i.e. run time results, of any previous task as
+an additional \textbf{Input} to every single job in the task.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} make header files}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}matching\PYGZus{}headers}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add header to the input of every job}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add result of task create\PYGZus{}matching\PYGZus{}headers to the input of every job}
+ \PYG{n}{create\PYGZus{}matching\PYGZus{}headers}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filenames}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] \PYGZhy{}\PYGZgt{} hasty.o] completed}
+\PYG{g+go}{ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] \PYGZhy{}\PYGZgt{} messy.o] completed}
+\PYG{g+go}{ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] \PYGZhy{}\PYGZgt{} tasty.o] completed}
+\PYG{g+go}{Completed Task = compile}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{4. Example: Add corresponding files using \emph{add\_inputs()} with \emph{formatter} or \emph{regex}}
+\label{tutorials/new_tutorial/inputs:example-add-corresponding-files-using-add-inputs-with-formatter-or-regex}\begin{quote}
+
+The previous example created headers corresponding to our source files and added them
+as the \textbf{Input} to the compilation. That is generally not what you want. Instead,
+what is generally need is a way to
+\begin{enumerate}
+\item {}
+Look up the exact corresponding header for the \emph{specific} job, and not add all
+possible files to all jobs in a task. When compiling \code{hasty.cpp}, we just need
+to add \code{hasty.h} (and \code{universal.h}).
+
+\item {}
+Add a pre-existing file name (\code{hasty.h} already exists. Don't create it via
+another task.)
+
+\end{enumerate}
+
+This is a surprisingly common requirement: In bioinformatics sometimes DNA or RNA
+sequence files come singly in \href{http://en.wikipedia.org/wiki/FASTQ\_format}{*.fastq}
+and sometimes in \href{http://en.wikipedia.org/wiki/DNA\_sequencing\_theory\#Pairwise\_end-sequencing}{matching pairs}:
+\code{*1.fastq, *2.fastq} etc. In the latter case, we often need to make sure that both
+sequence files are being processed in tandem. One way is to take one file name (\code{*1.fastq})
+and look up the other.
+\begin{quote}
+
+{\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs()}}} uses standard \emph{Ruffus} string substitution
+via {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} and {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} to lookup (generate) \textbf{Input} file names.
+(As a rule {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} only substitutes \textbf{Output} file names.)
+\end{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{source\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} corresponding header for each source file}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add header to the input of every job}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filenames}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+This script gives the following output
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[hasty.cpp, hasty.h, universal.h] \PYGZhy{}\PYGZgt{} hasty.o] completed}
+\PYG{g+go}{ Job = [[messy.cpp, messy.h, universal.h] \PYGZhy{}\PYGZgt{} messy.o] completed}
+\PYG{g+go}{ Job = [[tasty.cpp, tasty.h, universal.h] \PYGZhy{}\PYGZgt{} tasty.o] completed}
+\PYG{g+go}{Completed Task = compile}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Replacing all input parameters with \emph{inputs()}}
+\label{tutorials/new_tutorial/inputs:replacing-all-input-parameters-with-inputs}\begin{quote}
+
+The previous examples all \emph{added} to the set of \textbf{Input} file names.
+Sometimes it is necessary to replace all the \textbf{Input} parameters altogether.
+\end{quote}
+
+
+\subsubsection{5. Example: Running matching python scripts using \emph{inputs()}}
+\label{tutorials/new_tutorial/inputs:example-running-matching-python-scripts-using-inputs}\begin{quote}
+
+Here is a contrived example: we wish to find all cython/python files which have been
+compiled into corresponding c++ source files.
+Instead of compiling the c++, we shall invoke the corresponding python scripts.
+
+Given three c++ files and their corresponding python scripts:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{source\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} corresponding python file for each source file}
+ \PYG{n}{inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{run\PYGZus{}corresponding\PYGZus{}python}\PYG{p}{(}\PYG{n}{input\PYGZus{}filenames}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+The \emph{Ruffus} code will call each python script corresponding to their c++ counterpart:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [hasty.py \PYGZhy{}\PYGZgt{} hasty.results] completed}
+\PYG{g+go}{ Job = [messy.py \PYGZhy{}\PYGZgt{} messy.results] completed}
+\PYG{g+go}{ Job = [tasty.py \PYGZhy{}\PYGZgt{} tasty.results] completed}
+\PYG{g+go}{Completed Task = run\PYGZus{}corresponding\PYGZus{}python}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{on\_the\_fly!Tutorial}\index{Tutorial!on\_the\_fly}
+
+\section{\textbf{Chapter 21}: Esoteric: Generating parameters on the fly with \emph{@files}}
+\label{tutorials/new_tutorial/onthefly:index-0}\label{tutorials/new_tutorial/onthefly::doc}\label{tutorials/new_tutorial/onthefly:new-manual-on-the-fly-chapter-num-esoteric-generating-parameters-on-the-fly-with-files}\label{tutorials/new_tutorial/onthefly:new-manual-on-the-fly}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/files_ex:decorators-files-on-the-fly]{\emph{@files on-the-fly syntax in detail}}}
+
+\end{itemize}
+
+
+
+\begin{notice}{note}{Note:}
+Remember to look at the example code:
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-code]{\emph{Chapter 21: Esoteric: Python Code for Generating parameters on the fly with @files}}}
+
+\end{itemize}
+\end{notice}
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/onthefly:overview}\begin{quote}
+
+The different \emph{Ruffus} {\hyperref[decorators/decorators:decorators]{\emph{decorators}}} connect up different tasks and
+generate \emph{Output} (file names) from your \emph{Input} in all sorts of different ways.
+
+However, sometimes, none of them \emph{quite} do exactly what you need. And it becomes
+necessary to generate your own \emph{Input} and \emph{Output} parameters on the fly.
+
+Although this additional flexibility comes at the cost of a lot of extra inconvenient
+code, you can continue to leverage the rest of \emph{Ruffus} functionality such as
+checking whether files are up to date or not.
+\end{quote}
+
+\index{@files!Tutorial on-the-fly parameter generation}\index{Tutorial on-the-fly parameter generation!@files}
+
+\subsection{\emph{@files} syntax}
+\label{tutorials/new_tutorial/onthefly:index-1}\label{tutorials/new_tutorial/onthefly:files-syntax}\begin{quote}
+
+To generate parameters on the fly, use the {\hyperref[decorators/files_ex:decorators-files-on-the-fly]{\emph{@files}}}
+with a {\hyperref[glossary:term-generator]{\emph{generator}}} function which yields one list / tuple of parameters per job.
+
+For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} generator function}
+\PYG{k}{def} \PYG{n+nf}{generate\PYGZus{}parameters\PYGZus{}on\PYGZus{}the\PYGZus{}fly}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ returns one list of parameters per job}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.input}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.output}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.input}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.output}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{(}\PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{)}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.input}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.output}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{(}\PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{)}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd job}
+ \PYG{p}{]}
+ \PYG{k}{for} \PYG{n}{job\PYGZus{}parameters} \PYG{o+ow}{in} \PYG{n}{parameters}\PYG{p}{:}
+ \PYG{k}{yield} \PYG{n}{job\PYGZus{}parameters}
+
+\PYG{c}{\PYGZsh{} tell ruffus that parameters should be generated on the fly}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{generate\PYGZus{}parameters\PYGZus{}on\PYGZus{}the\PYGZus{}fly}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{pipeline\PYGZus{}task}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,} \PYG{n}{output}\PYG{p}{,} \PYG{n}{extra}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ + }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ =\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{extra}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{p}{,} \PYG{n}{extra}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{,} \PYG{n}{extra}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{+} \PYG{n}{extra}\PYG{p}{[}\PYG{l+m [...]
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Produces:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\end{Verbatim}
+\begin{description}
+\item[{Task = parallel\_task}] \leavevmode
+1 + 2 = 3
+Job = {[}''A'', 1, 2{]} completed
+3 + 4 = 7
+Job = {[}''B'', 3, 4{]} completed
+5 + 6 = 11
+Job = {[}''C'', 5, 6{]} completed
+
+\end{description}
+\end{quote}
+
+\begin{notice}{note}{Note:}
+Be aware that the parameter generating function may be invoked
+{\hyperref[tutorials/new_tutorial/dependencies:new-manual-dependencies-checking-multiple-times]{\emph{more than once}}}:
+* The first time to check if this part of the pipeline is up-to-date.
+* The second time when the pipeline task function is run.
+\end{notice}
+
+The resulting custom \emph{inputs}, \emph{outputs} parameters per job are
+treated normally for the purposes of checking to see if jobs are up-to-date and
+need to be re-run.
+\end{quote}
+
+
+\subsection{A Cartesian Product, all vs all example}
+\label{tutorials/new_tutorial/onthefly:a-cartesian-product-all-vs-all-example}\begin{quote}
+
+The {\hyperref[tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-code]{\emph{accompanying example}}} provides a more realistic reason why
+you would want to generate parameters on the fly. It is a fun piece of code, which generates
+N x M combinations from two sets of files as the \emph{inputs} of a pipeline stage.
+
+The \emph{inputs} / \emph{outputs} filenames are generated as a pair of nested for-loops to produce
+the N (outside loop) x M (inside loop) combinations, with the appropriate parameters
+for each job \code{yield}ed per iteration of the inner loop. The gist of this is:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Generator function}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} N x M jobs}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{k}{def} \PYG{n+nf}{generate\PYGZus{}simulation\PYGZus{}params} \PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Custom function to generate}
+\PYG{l+s+sd}{ file names for gene/gwas simulation study}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{k}{for} \PYG{n}{sim\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{get\PYGZus{}simulation\PYGZus{}files}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{p}{(}\PYG{n}{gene}\PYG{p}{,} \PYG{n}{gwas}\PYG{p}{)} \PYG{o+ow}{in} \PYG{n}{get\PYGZus{}gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{result\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{.}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{.results}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{gene}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file}\PYG{p}{)}
+ \PYG{k}{yield} \PYG{p}{(}\PYG{n}{gene}\PYG{p}{,} \PYG{n}{gwas}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file}\PYG{p}{)}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}
+
+
+
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{generate\PYGZus{}simulation\PYGZus{}params}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{...}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\begin{description}
+\item[{If \code{get\_gene\_gwas\_file\_pairs()} produces:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{c.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}
+\end{Verbatim}
+
+\item[{and \code{get\_gene\_gwas\_file\_pairs()} produces:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{p}{[}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{]}
+\end{Verbatim}
+
+\end{description}
+
+then we would end up with \code{3} x \code{2} = \code{6} jobs and the following equivalent function calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.gene.a.sim.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.gene.a.sim.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.gene.b.sim.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.gene.b.sim.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{c.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.gene.c.sim.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gene}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.gwas}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{c.sim}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.gene.c.sim.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+The {\hyperref[tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-code]{\emph{accompanying code}}} looks slightly more complicated because
+of some extra bookkeeping.
+
+You can compare this approach with the alternative of using {\hyperref[decorators/product:decorators-product]{\emph{@product}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} N x M jobs}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{n+nd}{@product}\PYG{p}{(} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.simulation}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.gene}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} add gwas as an input: looks like *.gene but with a differnt extension}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[1][0]/\PYGZob{}basename[1][0]\PYGZcb{}.gwas}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}.\PYGZob{}basename[1][0]\PYGZcb{}.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} output file}
+\PYG{k}{def} \PYG{n+nf}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{...}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{@parallel!Tutorial}\index{Tutorial!@parallel}
+
+\section{\textbf{Chapter 22}: Esoteric: Running jobs in parallel without files using \emph{@parallel}}
+\label{tutorials/new_tutorial/parallel:new-manual-parallel-chapter-num-esoteric-running-jobs-in-parallel-without-files-using-parallel}\label{tutorials/new_tutorial/parallel:index-0}\label{tutorials/new_tutorial/parallel::doc}\label{tutorials/new_tutorial/parallel:new-manual-deprecated-parallel}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel}}} syntax in detail
+
+\end{itemize}
+
+
+
+
+\subsection{\textbf{@parallel}}
+\label{tutorials/new_tutorial/parallel:parallel}\begin{quote}
+
+\textbf{@parallel} supplies parameters for multiple \textbf{jobs} exactly like {\hyperref[tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files]{\emph{@files}}} except that:
+\begin{enumerate}
+\item {}
+The first two parameters are not treated like \emph{inputs} and \emph{ouputs} parameters,
+and strings are not assumed to be file names
+
+\item {}
+Thus no checking of whether each job is up-to-date is made using \emph{inputs} and \emph{outputs} files
+
+\item {}
+No expansions of \href{http://docs.python.org/library/glob.html}{\emph{glob}} patterns or \emph{output} from previous tasks is carried out.
+
+\end{enumerate}
+
+This syntax is most useful when a pipeline stage does not involve creating or consuming any files, and
+you wish to forego the conveniences of {\hyperref[tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files]{\emph{@files}}}, {\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{@transform}}} etc.
+
+The following code performs some arithmetic in parallel:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd job}
+ \PYG{p}{]}
+\PYG{n+nd}{@parallel}\PYG{p}{(}\PYG{n}{parameters}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{parallel\PYGZus{}task}\PYG{p}{(}\PYG{n}{name}\PYG{p}{,} \PYG{n}{param1}\PYG{p}{,} \PYG{n}{param2}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Parallel task }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{: }\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{name}\PYG{p}{)}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ + }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ = }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{param1}\PYG{p}{,} \PYG{n}{param2}\PYG{p}{,} \PYG{n}{param1} \PYG{o}{+} \PYG{n}{param2}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+produces the following:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+Task = parallel\_task
+ Parallel task A: 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ Parallel task B: 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ Parallel task C: 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{check\_if\_uptodate!Tutorial}\index{Tutorial!check\_if\_uptodate}
+
+\section{\textbf{Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph{@check\_if\_uptodate}}
+\label{tutorials/new_tutorial/check_if_uptodate:index-0}\label{tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate}\label{tutorials/new_tutorial/check_if_uptodate::doc}\label{tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate-chapter-num-esoteric-writing-custom-functions-to-decide-which-jobs-are-up-to-date-with-check-if-uptodate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate syntax in detail}}}
+
+\end{itemize}
+
+
+
+
+\subsection{\textbf{@check\_if\_uptodate} : Manual dependency checking}
+\label{tutorials/new_tutorial/check_if_uptodate:check-if-uptodate-manual-dependency-checking}\begin{quote}
+\begin{description}
+\item[{tasks specified with most decorators such as}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+
+\item {}
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}}
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@collate}}}
+
+\end{itemize}
+
+\end{description}
+
+have automatic dependency checking based on file modification times.
+
+Sometimes, you might want to decide have more control over whether to run jobs, especially
+if a task does not rely on or produce files (i.e. with {\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel}}})
+
+You can write your own custom function to decide whether to run a job.
+This takes as many parameters as your task function, and needs to return a
+tuple for whether an update is required, and why (i.e. \code{tuple(bool, str)})
+
+This simple example which creates the file \code{"a.1"} if it does not exist:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+could be rewritten more laboriously as:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{os}
+\PYG{k}{def} \PYG{n+nf}{check\PYGZus{}file\PYGZus{}exists}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{return} \PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{File already exists}\PYG{l+s}{\PYGZdq{}}
+ \PYG{k}{return} \PYG{n+nb+bp}{True}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ is missing}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{output\PYGZus{}file}
+
+\PYG{n+nd}{@parallel}\PYG{p}{(}\PYG{p}{[}\PYG{p}{[}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate}\PYG{p}{(}\PYG{n}{check\PYGZus{}file\PYGZus{}exists}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\begin{description}
+\item[{Both produce the same output:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+Task = create\_if\_necessary
+ Job = [null, "a.1"] completed
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\begin{notice}{note}{Note:}
+The function specified by {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate}}} can be called
+more than once for each job.
+
+See the {\hyperref[tutorials/new_tutorial/dependencies:new-manual-dependencies]{\emph{description here}}} of how \emph{Ruffus} decides which tasks to run.
+\end{notice}
+
+\index{flowchart colours!Tutorial}\index{Tutorial!flowchart colours}
+
+\section{\textbf{Appendix 1}: Flow Chart Colours with \emph{pipeline\_printout\_graph(...)}}
+\label{tutorials/new_tutorial/flowchart_colours:index-0}\label{tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours-chapter-num-flow-chart-colours-with-pipeline-printout-graph}\label{tutorials/new_tutorial/flowchart_colours::doc}\label{tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph(...)}}}
+
+\item {}
+\code{Download code}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/flowchart_colours_code:new-manual-flowchart-colours-code]{\emph{Code}}} for experimenting with colours
+
+\end{itemize}
+
+
+
+
+\subsection{Flowchart colours}
+\label{tutorials/new_tutorial/flowchart_colours:flowchart-colours}
+The appearance of \emph{Ruffus} flowcharts produced by {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph}}}
+can be extensively customised.
+
+This is mainly controlled by the {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph-user-colour-scheme]{\emph{user\_colour\_scheme}}} (note UK spelling of ``colour'') parameter
+
+Example:
+\begin{quote}
+
+Use colour scheme index = 1
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart.svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{n}{final\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{user\PYGZus{}colour\PYGZus{}scheme} \PYG{o}{=} \PYG{p}{\PYGZob{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{colour\PYGZus{}scheme\PYGZus{}index}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{l+m+mi}{1}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Pipeline}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF3232}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Key}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Red}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}F6F4F4}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{linecolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final target}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}EFA03B}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+m+mi}{0} \PYG{p}{\PYGZcb{}}
+ \PYG{p}{\PYGZcb{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\begin{description}
+\item[{There are 8 colour schemes by setting \code{"colour\_scheme\_index"}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart.svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{n}{final\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{user\PYGZus{}colour\PYGZus{}scheme} \PYG{o}{=} \PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{colour\PYGZus{}scheme\PYGZus{}index}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{l+m+mi}{6}\PYG{p}{\PYGZcb{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+These colours were chosen after much fierce arguments between the authors and friends, and much
+inspiration from \href{http://kuler.adobe.com/\#create/fromacolor}{http://kuler.adobe.com/\#create/fromacolor}. Please
+feel free to submit any additional sets of colours for our consideration.
+
+(Click here for image in \code{svg}.)
+
+\includegraphics{flowchart_colour_schemes.png}
+
+\index{Checking dependencies!Tutorial}\index{Tutorial!Checking dependencies}
+
+\section{\textbf{Appendix 2}: How dependency is checked}
+\label{tutorials/new_tutorial/dependencies:new-manual-dependencies-chapter-num-how-dependency-is-checked}\label{tutorials/new_tutorial/dependencies:index-0}\label{tutorials/new_tutorial/dependencies::doc}\label{tutorials/new_tutorial/dependencies:new-manual-dependencies}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/dependencies:overview}\begin{quote}
+
+How does \emph{Ruffus} decide how to run your pipeline?
+\begin{itemize}
+\item {}
+In which order should pipelined functions be called?
+
+\item {}
+Which parts of the pipeline are up-to-date and do not need to be rerun?
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{Running all out-of-date tasks and dependents}
+\label{tutorials/new_tutorial/dependencies:running-all-out-of-date-tasks-and-dependents}\begin{quote}
+
+\scalebox{0.500000}{\includegraphics{manual_dependencies_flowchart_intro.png}}
+
+By default, \emph{Ruffus} will
+\begin{itemize}
+\item {}
+build a flow chart (dependency tree) of pipelined tasks (functions)
+
+\item {}
+start from the most ancestral tasks with the fewest dependencies (\code{task1} and \code{task4} in the flowchart above).
+
+\item {}
+walk up the tree to find the first incomplete / out-of-date tasks (i.e. \code{task3} and \code{task5}.
+
+\item {}
+start running from there
+
+\end{itemize}
+\begin{description}
+\item[{All down-stream (dependent) tasks will be re-run anyway, so we don't have to test}] \leavevmode
+whether they are up-to-date or not.
+
+\end{description}
+\phantomsection\label{tutorials/new_tutorial/dependencies:new-manual-dependencies-checking-multiple-times}
+\begin{notice}{note}{Note:}
+This means that \emph{Ruffus} \emph{may} ask any task if their jobs are out of date more than once:
+\begin{itemize}
+\item {}
+once when deciding which parts of the pipeline have to be run
+
+\item {}
+once just before executing the task.
+
+\end{itemize}
+\end{notice}
+
+\emph{Ruffus} tries to be clever / efficient, and does the minimal amount of querying.
+\end{quote}
+
+
+\subsubsection{Forced Reruns}
+\label{tutorials/new_tutorial/dependencies:forced-reruns}\label{tutorials/new_tutorial/dependencies:new-manual-dependencies-forced-reruns}\begin{quote}
+
+Even if a pipeline stage appears to be up to date,
+you can always force the pipeline to include from one or more task functions.
+
+This is particularly useful, for example, if the pipeline data hasn't changed but
+the analysis or computional code has.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{forcedtorun\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{up\PYGZus{}to\PYGZus{}date\PYGZus{}task1}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+will run all tasks from \code{up\_to\_date\_task1} to \code{final\_task}
+\end{quote}
+
+Both the ``target'' and the ``forced'' lists can include as many tasks as you wish. All dependencies
+are still carried out and out-of-date jobs rerun.
+\end{quote}
+
+
+\subsubsection{Esoteric option: Minimal Reruns}
+\label{tutorials/new_tutorial/dependencies:new-manual-dependencies-minimal-reruns}\label{tutorials/new_tutorial/dependencies:esoteric-option-minimal-reruns}\begin{quote}
+
+In the above example, if we were to delete the results of \code{up\_to\_date\_task1}, \emph{Ruffus}
+would rerun \code{up\_to\_date\_task1}, \code{up\_to\_date\_task2} and \code{task3}.
+
+However, you might argue that so long as \code{up\_to\_date\_task2} is up-to-date, and it
+is the only necessary prerequisite for \code{task3}, we should not be concerned about
+\code{up\_to\_date\_task1}.
+
+This is enabled with:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{task6}\PYG{p}{]}\PYG{p}{,} \PYG{n}{gnu\PYGZus{}make\PYGZus{}maximal\PYGZus{}rebuild\PYGZus{}mode} \PYG{o}{=} \PYG{n+nb+bp}{False}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+This option walks down the dependency tree and proceeds no further when it encounters
+an up-to-date task (\code{up\_to\_date\_task2}) whatever the state of what lies beyond it.
+
+This rather dangerous option is useful if you don't want to keep all the intermediate
+files/results from upstream tasks. The pipeline will only not involve any incomplete
+tasks which precede an up-to-date result.
+
+This is seldom what you intend, and you should always check that the appropriate stages
+of the pipeline are executed in the flowchart output.
+\end{quote}
+
+\index{exceptions!Tutorial}\index{Tutorial!exceptions}
+
+\section{\textbf{Appendix 3}: Exceptions thrown inside pipelines}
+\label{tutorials/new_tutorial/exceptions:new-manual-exceptions-chapter-num-exceptions-thrown-inside-pipelines}\label{tutorials/new_tutorial/exceptions:new-manual-exceptions}\label{tutorials/new_tutorial/exceptions:index-0}\label{tutorials/new_tutorial/exceptions::doc}
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/exceptions:overview}\begin{quote}
+
+The goal for \emph{Ruffus} is that exceptions should just work \emph{out-of-the-box} without any fuss.
+This is especially important for exceptions that come from your code which may be raised
+in a different process. Often multiple parallel operations (jobs or tasks) fail at the
+same time. \emph{Ruffus} will forward each of these exceptions with the tracebacks so you
+can jump straight to the offending line.
+
+This example shows separate exceptions from two jobs running in parallel:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{d.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{e.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{throw\PYGZus{}exceptions\PYGZus{}here}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{OOPS}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{ \PYGZgt{}\PYGZgt{}\PYGZgt{} pipeline\PYGZus{}run(multiprocess = 2)}
+
+\PYG{g+go}{ ruffus.ruffus\PYGZus{}exceptions.RethrownJobError:}
+
+\PYG{g+go}{ Original exceptions:}
+
+\PYG{g+go}{ Exception \PYGZsh{}1}
+\PYG{g+go}{ \PYGZsq{}exceptions.Exception(OOPS)\PYGZsq{} raised in ...}
+\PYG{g+go}{ Task = def throw\PYGZus{}exceptions\PYGZus{}here(...):}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.start]}
+
+\PYG{g+go}{ Traceback (most recent call last):}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 685, in run\PYGZus{}pooled\PYGZus{}job\PYGZus{}without\PYGZus{}exceptions}
+\PYG{g+go}{ return\PYGZus{}value = job\PYGZus{}wrapper(param, user\PYGZus{}defined\PYGZus{}work\PYGZus{}func, register\PYGZus{}cleanup, touch\PYGZus{}files\PYGZus{}only)}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 549, in job\PYGZus{}wrapper\PYGZus{}output\PYGZus{}files}
+\PYG{g+go}{ job\PYGZus{}wrapper\PYGZus{}io\PYGZus{}files(param, user\PYGZus{}defined\PYGZus{}work\PYGZus{}func, register\PYGZus{}cleanup, touch\PYGZus{}files\PYGZus{}only, output\PYGZus{}files\PYGZus{}only = True)}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 504, in job\PYGZus{}wrapper\PYGZus{}io\PYGZus{}files}
+\PYG{g+go}{ ret\PYGZus{}val = user\PYGZus{}defined\PYGZus{}work\PYGZus{}func(*(param[1:]))}
+\PYG{g+go}{ File \PYGZdq{}\PYGZlt{}stdin\PYGZgt{}\PYGZdq{}, line 3, in throw\PYGZus{}exceptions\PYGZus{}here}
+\PYG{g+go}{ Exception: OOPS}
+
+
+\PYG{g+go}{ Exception \PYGZsh{}2}
+\PYG{g+go}{ \PYGZsq{}exceptions.Exception(OOPS)\PYGZsq{} raised in ...}
+\PYG{g+go}{ Task = def throw\PYGZus{}exceptions\PYGZus{}here(...):}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.start]}
+
+\PYG{g+go}{ Traceback (most recent call last):}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 685, in run\PYGZus{}pooled\PYGZus{}job\PYGZus{}without\PYGZus{}exceptions}
+\PYG{g+go}{ return\PYGZus{}value = job\PYGZus{}wrapper(param, user\PYGZus{}defined\PYGZus{}work\PYGZus{}func, register\PYGZus{}cleanup, touch\PYGZus{}files\PYGZus{}only)}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 549, in job\PYGZus{}wrapper\PYGZus{}output\PYGZus{}files}
+\PYG{g+go}{ job\PYGZus{}wrapper\PYGZus{}io\PYGZus{}files(param, user\PYGZus{}defined\PYGZus{}work\PYGZus{}func, register\PYGZus{}cleanup, touch\PYGZus{}files\PYGZus{}only, output\PYGZus{}files\PYGZus{}only = True)}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 504, in job\PYGZus{}wrapper\PYGZus{}io\PYGZus{}files}
+\PYG{g+go}{ ret\PYGZus{}val = user\PYGZus{}defined\PYGZus{}work\PYGZus{}func(*(param[1:]))}
+\PYG{g+go}{ File \PYGZdq{}\PYGZlt{}stdin\PYGZgt{}\PYGZdq{}, line 3, in throw\PYGZus{}exceptions\PYGZus{}here}
+\PYG{g+go}{ Exception: OOPS}
+
+
+\PYG{g+go}{.. image:: ../../images/manual\PYGZus{}exceptions.png}
+\end{Verbatim}
+\end{quote}
+\phantomsection\label{tutorials/new_tutorial/exceptions:new-manual-exceptions-multiple-errors}
+\index{signalling}\index{interrupts}\index{break}\index{errors}\index{exceptions}\index{multiple errors}
+
+\subsection{Pipelines running in parallel accumulate Exceptions}
+\label{tutorials/new_tutorial/exceptions:index-1}\label{tutorials/new_tutorial/exceptions:pipelines-running-in-parallel-accumulate-exceptions}\begin{quote}
+
+As show above, by default \emph{Ruffus} accumulates \code{NN} exceptions before interrupting the pipeline prematurely where
+\code{NN} is the specified parallelism for {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(multiprocess = NN)}}}
+
+This seems a fair tradeoff between being able to gather detailed error information for
+running jobs, and not wasting too much time for a task that is going to fail anyway.
+\end{quote}
+
+
+\subsection{Terminate pipeline immediately upon Exceptions}
+\label{tutorials/new_tutorial/exceptions:terminate-pipeline-immediately-upon-exceptions}
+
+\subsubsection{Set \emph{pipeline\_run(exceptions\_terminate\_immediately = True)}}
+\label{tutorials/new_tutorial/exceptions:set-pipeline-run-exceptions-terminate-immediately-true}\begin{quote}
+
+To have all exceptions interrupt the pipeline immediately, invoke:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{exceptions\PYGZus{}terminate\PYGZus{}immediately} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+For example, with this change, only a single exception will be thrown before the pipeline is interrupted:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{d.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{e.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{throw\PYGZus{}exceptions\PYGZus{}here}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{OOPS}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{,} \PYG{n}{exceptions\PYGZus{}terminate\PYGZus{}immediately} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{ \PYGZgt{}\PYGZgt{}\PYGZgt{} pipeline\PYGZus{}run(multiprocess = 2)}
+
+\PYG{g+go}{ ruffus.ruffus\PYGZus{}exceptions.RethrownJobError:}
+
+\PYG{g+go}{ Original exception:}
+
+\PYG{g+go}{ Exception \PYGZsh{}1}
+\PYG{g+go}{ \PYGZsq{}exceptions.Exception(OOPS)\PYGZsq{} raised in ...}
+\PYG{g+go}{ Task = def throw\PYGZus{}exceptions\PYGZus{}here(...):}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.start]}
+
+\PYG{g+go}{ Traceback (most recent call last):}
+\PYG{g+go}{ [Tedious traceback snipped out!!!....]}
+\PYG{g+go}{ Exception: OOPS}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{raise \texttt{Ruffus.JobSignalledBreak}}
+\label{tutorials/new_tutorial/exceptions:raise-ruffus-jobsignalledbreak}\begin{quote}
+
+The same can be accomplished on a finer scale by throwing the \code{Ruffus.JobSignalledBreak} Exception. Unlike
+other exceptions, this causes an immediate halt in pipeline execution. If there are other exceptions in play at that
+point, they will be rethrown in the main process but no new exceptions will be added.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{d.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{e.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{throw\PYGZus{}exceptions\PYGZus{}here}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n}{JobSignalledBreak}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{OOPS}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Display exceptions as they occur}
+\label{tutorials/new_tutorial/exceptions:display-exceptions-as-they-occur}\begin{quote}
+
+In the following example, the jobs throw exceptions
+at two second staggered intervals into the job. With \code{log\_exceptions = True}, the
+exceptions are displayed as they occur even though the pipeline continues running.
+
+logger.error(...) will be invoked with the string representation of the each exception, and associated stack trace.
+
+The default logger prints to sys.stderr, but as usual can be changed to any class from the logging module or compatible object via
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(logger = XXX)}}}
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{time}\PYG{o}{,} \PYG{n+nn}{os}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{3.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{4.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{5.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{throw\PYGZus{}exceptions\PYGZus{}here}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{delay} \PYG{o}{=} \PYG{n+nb}{int}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{splitext}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{)}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{n}{delay} \PYG{o}{*} \PYG{l+m+mi}{2}\PYG{p}{)}
+ \PYG{k}{raise} \PYG{n}{JobSignalledBreak}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{OOPS}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{log\PYGZus{}exceptions} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{Ruffus names list!Tutorial}\index{Tutorial!Ruffus names list}
+
+\section{\textbf{Appendix 4}: Names exported from Ruffus}
+\label{tutorials/new_tutorial/list_of_ruffus_names:new-manual-ruffus-names-chapter-num-names-exported-from-ruffus}\label{tutorials/new_tutorial/list_of_ruffus_names:index-0}\label{tutorials/new_tutorial/list_of_ruffus_names::doc}\label{tutorials/new_tutorial/list_of_ruffus_names:new-manual-ruffus-names}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Ruffus Names}
+\label{tutorials/new_tutorial/list_of_ruffus_names:ruffus-names}\begin{quote}
+
+This is a list of all the names \emph{Ruffus} makes available:
+
+\begin{tabular}{|p{0.475\linewidth}|p{0.475\linewidth}|}
+\hline
+\textbf{\relax
+Category
+} & \textbf{\relax
+Manual
+}\\\hline
+
+\textbf{Pipeline functions}
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout()}}} ({\hyperref[tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout]{\emph{Manual}}})
+\item[] {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout()}}} ({\hyperref[tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph]{\emph{Manual}}})
+\item[] {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_printout()}}} ({\hyperref[tutorials/new_tutorial/introduction:new-manual-pipeline-run]{\emph{Manual}}})
+\end{DUlineblock}
+\\\hline
+
+\textbf{Decorators}
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[decorators/active_if:decorators-active-if]{\emph{@active\_if}}} ({\hyperref[tutorials/new_tutorial/active_if:new-manual-active-if]{\emph{Manual}}})
+\item[] {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate}}} ({\hyperref[tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate]{\emph{Manual}}})
+\item[] {\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} ({\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-collate]{\emph{Manual}}})
+\item[] {\hyperref[decorators/files:decorators-files]{\emph{@files}}} ({\hyperref[tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files]{\emph{Manual}}})
+\item[] {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}} ({\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows]{\emph{Manual}}})
+\item[] {\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{@jobs\_limit}}} ({\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-jobs-limit]{\emph{Manual}}})
+\item[] {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} ({\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{Manual}}})
+\item[] {\hyperref[decorators/mkdir:decorators-mkdir]{\emph{@mkdir}}} ({\hyperref[tutorials/new_tutorial/mkdir:new-manual-mkdir]{\emph{Manual}}})
+\item[] {\hyperref[decorators/originate:decorators-originate]{\emph{@originate}}} ({\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{Manual}}})
+\item[] {\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel}}} ({\hyperref[tutorials/new_tutorial/parallel:new-manual-deprecated-parallel]{\emph{Manual}}})
+\item[] {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} ({\hyperref[tutorials/new_tutorial/posttask:new-manual-posttask]{\emph{Manual}}})
+\item[] {\hyperref[decorators/split:decorators-split]{\emph{@split}}} ({\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{Manual}}})
+\item[] {\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} ({\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-subdivide]{\emph{Manual}}})
+\item[] {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} ({\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{Manual}}})
+\item[] {\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re}}} ({\hyperref[tutorials/new_tutorial/deprecated_files_re:new-manual-deprecated-files-re]{\emph{Manual}}})
+\end{DUlineblock}
+\\\hline
+
+\textbf{Loggers}
+ &
+\begin{DUlineblock}{0em}
+\item[] stderr\_logger
+\item[] black\_hole\_logger
+\end{DUlineblock}
+\\\hline
+
+\textbf{Parameter disambiguating Indicators}
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} ({\hyperref[tutorials/new_tutorial/output_file_names:new-manual-suffix]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} ({\hyperref[tutorials/new_tutorial/output_file_names:new-manual-regex]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} ({\hyperref[tutorials/new_tutorial/output_file_names:new-manual-formatter]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} ({\hyperref[tutorials/new_tutorial/inputs:new-manual-inputs]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{inputs}}} ({\hyperref[tutorials/new_tutorial/inputs:new-manual-inputs]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-touch-file]{\emph{touch\_file}}} ({\hyperref[tutorials/new_tutorial/posttask:new-manual-posttask-touch-file]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-combine]{\emph{combine}}}
+\item[] {\hyperref[decorators/follows:decorators-follows-mkdir]{\emph{mkdir}}} ({\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows-mkdir]{\emph{Manual}}})
+\item[] {\hyperref[decorators/indicator_objects:decorators-output-from]{\emph{output\_from}}} ({\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-output-from]{\emph{Manual}}})
+\end{DUlineblock}
+\\\hline
+
+\textbf{Decorators in ruffus.combinatorics}
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}} ({\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinations]{\emph{Manual}}})
+\item[] {\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}} ({\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinations-with-replacement]{\emph{Manual}}})
+\item[] {\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}} ({\hyperref[tutorials/new_tutorial/combinatorics:new-manual-permutations]{\emph{Manual}}})
+\item[] {\hyperref[decorators/product:decorators-product]{\emph{@product}}} ({\hyperref[tutorials/new_tutorial/combinatorics:new-manual-product]{\emph{Manual}}})
+\end{DUlineblock}
+\\\hline
+
+\textbf{Decorators in ruffus.cmdline}
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline-get-argparse]{\emph{get\_argparse}}}
+\item[] {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline-setup-logging]{\emph{setup\_logging}}}
+\item[] {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline-run]{\emph{run}}}
+\item[] {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline-message]{\emph{MESSAGE}}}
+\end{DUlineblock}
+\\\hline
+\end{tabular}
+
+\end{quote}
+
+\index{deprecated @files!Tutorial}\index{Tutorial!deprecated @files}
+
+\section{\textbf{Appendix 5}: \textbf{@files}: Deprecated syntax}
+\label{tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files}\label{tutorials/new_tutorial/deprecated_files:index-0}\label{tutorials/new_tutorial/deprecated_files::doc}\label{tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files-chapter-num-files-deprecated-syntax}
+\begin{notice}{warning}{Warning:}\begin{itemize}
+\item {}
+\textbf{This is deprecated syntax}
+
+\textbf{which is no longer supported and}
+
+\textbf{should NOT be used in new code.}
+
+\end{itemize}
+\end{notice}
+
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{decorators}}}
+
+\item {}
+{\hyperref[decorators/files:decorators-files]{\emph{@files}}} syntax in detail
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/deprecated_files:overview}\begin{quote}
+
+\begin{DUlineblock}{0em}
+\item[] The python functions which do the actual work of each stage or
+{\hyperref[glossary:term-task]{\emph{task}}} of a \emph{Ruffus} pipeline are written by you.
+\item[] The role of \emph{Ruffus} is to make sure these functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if desired.
+\end{DUlineblock}
+
+The easiest way to specify parameters to \emph{Ruffus} {\hyperref[glossary:term-task]{\emph{task}}} functions is to use
+the {\hyperref[decorators/files:decorators-files]{\emph{@files}}} decorator.
+
+\index{@files!Manual}\index{Manual!@files}\end{quote}
+
+
+\subsection{\textbf{@files}}
+\label{tutorials/new_tutorial/deprecated_files:files}\label{tutorials/new_tutorial/deprecated_files:index-1}\begin{quote}
+
+Running this code:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{single\PYGZus{}job\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfiles}\PYG{p}{,} \PYG{n}{text}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{o} \PYG{o+ow}{in} \PYG{n}{outfiles}\PYG{p}{:} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{o}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} prepare input file}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\begin{description}
+\item[{Is equivalent to calling:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{single\PYGZus{}job\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\item[{And produces:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [a.1 \PYGZhy{}\PYGZgt{} [a.2, b.2], A file] completed}
+\PYG{g+go}{Completed Task = single\PYGZus{}job\PYGZus{}io\PYGZus{}task}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\emph{Ruffus} will automatically check if your task is up to date. The second time {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}}
+is called, nothing will happen. But if you update \code{a.1}, the task will rerun:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [a.1 \PYGZhy{}\PYGZgt{} [a.2, b.2], A file] completed}
+\PYG{g+go}{Completed Task = single\PYGZus{}job\PYGZus{}io\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+
+See {\hyperref[tutorials/new_tutorial/checkpointing:new-manual-skip-up-to-date-rules]{\emph{chapter 2}}} for a more in-depth discussion of how \emph{Ruffus}
+decides which parts of the pipeline are complete and up-to-date.
+\end{quote}
+
+\index{@files!in parallel}\index{in parallel!@files}
+
+\subsection{Running the same code on different parameters in parallel}
+\label{tutorials/new_tutorial/deprecated_files:new-manual-files-parallel}\label{tutorials/new_tutorial/deprecated_files:index-2}\label{tutorials/new_tutorial/deprecated_files:running-the-same-code-on-different-parameters-in-parallel}\begin{quote}
+
+Your pipeline may require the same function to be called multiple times on independent parameters.
+In which case, you can supply all the parameters to @files, each will be sent to separate jobs that
+may run in parallel if necessary. \emph{Ruffus} will check if each separate {\hyperref[glossary:term-job]{\emph{job}}} is up-to-date using
+the \emph{inputs} and \emph{outputs} (first two) parameters (See the {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-only-rerun-out-of-date]{\emph{Up-to-date jobs are not re-run unnecessarily}}} ).
+
+For example, if a sequence
+(e.g. a list or tuple) of 5 parameters are passed to \textbf{@files}, that indicates
+there will also be 5 separate jobs:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.file}\PYG{l+s}{\PYGZsq{}} \PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{4} \PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2st job}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]} \PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3st job}
+ \PYG{p}{[} \PYG{l+m+mi}{67}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{13}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 4st job}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job5.file}\PYG{l+s}{\PYGZsq{}} \PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 5st job}
+ \PYG{p}{]}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{parameters}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task\PYGZus{}file}\PYG{p}{(}\PYG{o}{*}\PYG{n}{params}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+\begin{DUlineblock}{0em}
+\item[] \emph{Ruffus} creates as many jobs as there are elements in \code{parameters}.
+\item[] In turn, each of these elements consist of series of parameters which will be
+passed to each separate job.
+\end{DUlineblock}
+
+Thus the above code is equivalent to calling:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{task\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{n}{task\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{)}
+\PYG{n}{task\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{)}
+\PYG{n}{task\PYGZus{}file}\PYG{p}{(}\PYG{l+m+mi}{67}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{13}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{n}{task\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job5.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+What \code{task\_file()} does with these parameters is up to you!
+
+The only constraint on the parameters is that \emph{Ruffus} will treat any first
+parameter of each job as the \emph{inputs} and any second as the \emph{output}. Any
+strings in the \emph{inputs} or \emph{output} parameters (including those nested in sequences)
+will be treated as file names.
+
+Thus, to pick the parameters out of one of the above jobs:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{task\PYGZus{}file}\PYG{p}{(}\PYG{l+m+mi}{67}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{13}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{DUlineblock}{0em}
+\item[] \emph{inputs} == \code{67}
+\item[] \emph{outputs} == \code{{[}13, 'job4.file'{]}}
+\item[]
+\item[]
+\begin{DUlineblock}{\DUlineblockindent}
+\item[] The solitary output filename is \code{job4.file}
+\end{DUlineblock}
+\end{DUlineblock}
+\end{quote}
+\end{quote}
+
+\index{@files!check if up to date}\index{check if up to date!@files}\phantomsection\label{tutorials/new_tutorial/deprecated_files:new-manual-files-is-uptodate}
+
+\subsubsection{Checking if jobs are up to date}
+\label{tutorials/new_tutorial/deprecated_files:new-manual-files-is-uptodate}\label{tutorials/new_tutorial/deprecated_files:checking-if-jobs-are-up-to-date}\label{tutorials/new_tutorial/deprecated_files:new-manual-files-example}\label{tutorials/new_tutorial/deprecated_files:index-3}\begin{quote}
+
+\begin{DUlineblock}{0em}
+\item[] Usually we do not want to run all the stages in a pipeline but only where
+the input data has changed or is no longer up to date.
+\item[] One easy way to do this is to check the modification times for files produced
+at each stage of the pipeline.
+\end{DUlineblock}
+
+\begin{DUlineblock}{0em}
+\item[] Let us first create our starting files \code{a.1} and \code{b.1}
+\item[] We can then run the following pipeline function to create
+\end{DUlineblock}
+\begin{quote}
+\begin{itemize}
+\item {}
+\code{a.2} from \code{a.1} and
+
+\item {}
+\code{b.2} from \code{b.1}
+
+\end{itemize}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} create starting files}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{]}
+
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{parameters}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{text}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} copy infile contents to outfile}
+ \PYG{n}{infile\PYGZus{}text} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}
+ \PYG{n}{f} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{infile\PYGZus{}text} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{+} \PYG{n}{text}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\begin{description}
+\item[{This produces the following output:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [a.1 \PYGZhy{}\PYGZgt{} a.2, A file] completed}
+\PYG{g+go}{ Job = [b.1 \PYGZhy{}\PYGZgt{} b.2, B file] completed}
+\PYG{g+go}{Completed Task = parallel\PYGZus{}io\PYGZus{}task}
+\end{Verbatim}
+
+\end{description}
+
+\begin{DUlineblock}{0em}
+\item[] If you called {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}} again, nothing would happen because the files are up to date:
+\item[] \code{a.2} is more recent than \code{a.1} and
+\item[] \code{b.2} is more recent than \code{b.1}
+\end{DUlineblock}
+\begin{description}
+\item[{However, if you subsequently modified \code{a.1} again:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+you would see the following:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{Task = parallel\PYGZus{}io\PYGZus{}task}
+\PYG{g+go}{ Job = [\PYGZdq{}a.1\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}a.2\PYGZdq{}, \PYGZdq{}A file\PYGZdq{}] completed}
+\PYG{g+go}{ Job = [\PYGZdq{}b.1\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}b.2\PYGZdq{}, \PYGZdq{}B file\PYGZdq{}] unnecessary: already up to date}
+\PYG{g+go}{Completed Task = parallel\PYGZus{}io\PYGZus{}task}
+\end{Verbatim}
+
+The 2nd job is up to date and will be skipped.
+\end{quote}
+
+\index{deprecated @files\_re!Tutorial}\index{Tutorial!deprecated @files\_re}
+
+\section{\textbf{Appendix 6}: \textbf{@files\_re}: Deprecated \emph{syntax using regular expressions}}
+\label{tutorials/new_tutorial/deprecated_files_re:new-manual-deprecated-files-re}\label{tutorials/new_tutorial/deprecated_files_re:index-0}\label{tutorials/new_tutorial/deprecated_files_re::doc}\label{tutorials/new_tutorial/deprecated_files_re:new-manual-deprecated-files-re-chapter-num-files-re-deprecated-syntax-using-regular-expressions}
+\begin{notice}{warning}{Warning:}\begin{itemize}
+\item {}
+\textbf{This is deprecated syntax}
+
+\textbf{which is no longer supported and}
+
+\textbf{should NOT be used in new code.}
+
+\end{itemize}
+\end{notice}
+
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{decorators}}}
+
+\item {}
+{\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re}}} syntax in detail
+
+\end{itemize}
+
+
+
+
+\subsection{Overview}
+\label{tutorials/new_tutorial/deprecated_files_re:overview}\begin{quote}
+
+\textbf{@files\_re} combines the functionality of @transform, @collate and @merge in
+one overloaded decorator.
+
+This is the reason why its use is discouraged. \textbf{@files\_re} syntax is far too overloaded
+and context-dependent to support its myriad of different functions.
+
+The following documentation is provided to help maintain historical \emph{Ruffus} usage.
+\end{quote}
+
+
+\subsubsection{Transforming input and output filenames}
+\label{tutorials/new_tutorial/deprecated_files_re:transforming-input-and-output-filenames}\begin{quote}
+
+For example, the following code takes files from
+the previous pipeline task, and makes new output parameters with the \code{.sums} suffix
+in place of the \code{.chunks} suffix:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{step\PYGZus{}4\PYGZus{}split\PYGZus{}numbers\PYGZus{}into\PYGZus{}chunks}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{step\PYGZus{}5\PYGZus{}calculate\PYGZus{}sum\PYGZus{}of\PYGZus{}squares} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} calculate sums and sums of squares for all values in the input\PYGZus{}file\PYGZus{}name}
+ \PYG{c}{\PYGZsh{} writing to output\PYGZus{}file\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+This can be written using @files\_re equivalently:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ at files\_re(step\_4\_split\_numbers\_into\_chunks, r".chunks", r".sums")
+def step\_5\_calculate\_sum\_of\_squares (input\_file\_name, output\_file\_name):
+""
+\end{Verbatim}
+\end{quote}
+\end{quote}
+\phantomsection\label{tutorials/new_tutorial/deprecated_files_re:new-manual-files-re-combine}
+\index{combine!Manual}\index{Manual!combine}
+
+\subsubsection{Collating many \emph{inputs} into a single \emph{output}}
+\label{tutorials/new_tutorial/deprecated_files_re:collating-many-inputs-into-a-single-output}\label{tutorials/new_tutorial/deprecated_files_re:index-1}\begin{quote}
+
+Similarly, the following code collects \textbf{inputs}
+from the same species in the same directory:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.animals}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} inputs = all *.animal files}
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{mammals.([\PYGZca{}.]+)}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} regular expression}
+ \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/animals.in\PYGZus{}my\PYGZus{}zoo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} single output file per species}
+ \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZsq{}} \PYG{p}{)} \PYG{c}{\PYGZsh{} species name}
+\PYG{k}{def} \PYG{n+nf}{capture\PYGZus{}mammals}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{species}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} summarise all animals of this species}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+This can be written using @files\_re equivalently using the {\hyperref[decorators/indicator_objects:decorators-combine]{\emph{combine}}} indicator:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@files\PYGZus{}re}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.animals}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} inputs = all *.animal files}
+ \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{mammals.([\PYGZca{}.]+)}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} regular expression}
+ \PYG{n}{combine}\PYG{p}{(}\PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/animals.in\PYGZus{}my\PYGZus{}zoo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} single output file per species}
+ \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZsq{}} \PYG{p}{)} \PYG{c}{\PYGZsh{} species name}
+\PYG{k}{def} \PYG{n+nf}{capture\PYGZus{}mammals}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{species}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} summarise all animals of this species}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Generating \emph{input} and \emph{output} parameter using regular expresssions}
+\label{tutorials/new_tutorial/deprecated_files_re:generating-input-and-output-parameter-using-regular-expresssions}\begin{quote}
+
+The following code generates additional
+\emph{input} prerequisite file names which match the original \emph{input} files.
+
+We want each job of our \code{analyse()} function to get corresponding pairs
+of \code{xx.chunks} and \code{xx.red\_indian} files when
+\begin{quote}
+
+\code{*.chunks} are generated by the task function \code{split\_up\_problem()} and
+\code{*.red\_indian} are generated by the task function \code{make\_red\_indians()}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{make\PYGZus{}red\PYGZus{}indians}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{split\PYGZus{}up\PYGZus{}problem}\PYG{p}{,} \PYG{c}{\PYGZsh{} starting set of *inputs*}
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*).chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} regular expression}
+ \PYG{n}{inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}0\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} xx.chunks}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.red\PYGZus{}indian}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} important.file}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.results}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{} xx.results}
+ \PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{analyse}\PYG{p}{(}\PYG{n}{input\PYGZus{}filenames}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Do analysis here}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+The equivalent code using @files\_re looks very similar:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ at follows(make\_red\_indians)
+ at files\_re( split\_up\_problem, \# starting set of *inputs*
+ r"(.*).chunks", \# regular expression
+ [r"\PYGZbs{}g\textless{}0\textgreater{}", \# xx.chunks
+ r"\PYGZbs{}1.red\_indian"]), \# important.file
+ r"\PYGZbs{}1.results") \# xx.results
+def analyse(input\_filenames, output\_file\_name):
+ "Do analysis here"
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+Example code for:
+
+
+\section{\textbf{Chapter 1}: Python Code for An introduction to basic Ruffus syntax}
+\label{tutorials/new_tutorial/introduction_code:new-manual-introduction-code}\label{tutorials/new_tutorial/introduction_code:new-manual-introduction-chapter-num-python-code-for-an-introduction-to-basic-ruffus-syntax}\label{tutorials/new_tutorial/introduction_code::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 1}: {\hyperref[tutorials/new_tutorial/introduction:new-manual-introduction]{\emph{An introduction to basic Ruffus syntax}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Your first Ruffus script}
+\label{tutorials/new_tutorial/introduction_code:your-first-ruffus-script}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{p}{:}\PYG{p}{:}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} The starting data files would normally exist beforehand!}
+\PYG{c}{\PYGZsh{} We create some empty files for this example}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{starting\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+
+\PYG{k}{for} \PYG{n}{ff} \PYG{o+ow}{in} \PYG{n}{starting\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{ff}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 1 fasta\PYGZhy{}\PYGZgt{}sam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{starting\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = starting files}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.fasta}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .fasta}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .sam}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 2 sam\PYGZhy{}\PYGZgt{}bam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = previous stage}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .sam}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .bam}
+\PYG{k}{def} \PYG{n+nf}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 3 bam\PYGZhy{}\PYGZgt{}statistics}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = previous stage}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .bam}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.statistics}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Output suffix = .statistics}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{use\PYGZus{}linear\PYGZus{}model}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Extra statistics parameter}
+\PYG{k}{def} \PYG{n+nf}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}stats\PYGZus{}parameter}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Sketch of real analysis function}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Output}
+\label{tutorials/new_tutorial/introduction_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [a.fasta \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{ Job = [b.fasta \PYGZhy{}\PYGZgt{} b.sam] completed}
+\PYG{g+go}{ Job = [c.fasta \PYGZhy{}\PYGZgt{} c.sam] completed}
+\PYG{g+go}{Completed Task = map\PYGZus{}dna\PYGZus{}sequence}
+\PYG{g+go}{ Job = [a.sam \PYGZhy{}\PYGZgt{} a.bam] completed}
+\PYG{g+go}{ Job = [b.sam \PYGZhy{}\PYGZgt{} b.bam] completed}
+\PYG{g+go}{ Job = [c.sam \PYGZhy{}\PYGZgt{} c.bam] completed}
+\PYG{g+go}{Completed Task = compress\PYGZus{}sam\PYGZus{}file}
+\PYG{g+go}{ Job = [a.bam \PYGZhy{}\PYGZgt{} a.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [b.bam \PYGZhy{}\PYGZgt{} b.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [c.bam \PYGZhy{}\PYGZgt{} c.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{Completed Task = summarise\PYGZus{}bam\PYGZus{}file}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 1}: Python Code for Transforming data in a pipeline with \texttt{@transform}}
+\label{tutorials/new_tutorial/transform_code:new-manual-transform-code}\label{tutorials/new_tutorial/transform_code::doc}\label{tutorials/new_tutorial/transform_code:new-manual-introduction-chapter-num-python-code-for-transforming-data-in-a-pipeline-with-transform}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 2}: {\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{Transforming data in a pipeline with @transform}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Your first Ruffus script}
+\label{tutorials/new_tutorial/transform_code:your-first-ruffus-script}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} The starting data files would normally exist beforehand!}
+\PYG{c}{\PYGZsh{} We create some empty files for this example}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{starting\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.2.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.2.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.2.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{]}
+
+
+\PYG{k}{for} \PYG{n}{ff\PYGZus{}pair} \PYG{o+ow}{in} \PYG{n}{starting\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{ff\PYGZus{}pair}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{ff\PYGZus{}pair}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 1 fasta\PYGZhy{}\PYGZgt{}sam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{starting\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = starting files}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.1.fastq}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .1.fastq}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .sam}
+\PYG{k}{def} \PYG{n+nf}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} remember there are two input files now}
+ \PYG{n}{ii1} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{)}
+ \PYG{n}{ii2} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 2 sam\PYGZhy{}\PYGZgt{}bam}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{map\PYGZus{}dna\PYGZus{}sequence}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = previous stage}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .sam}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Output suffix = .bam}
+\PYG{k}{def} \PYG{n+nf}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} STAGE 3 bam\PYGZhy{}\PYGZgt{}statistics}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{compress\PYGZus{}sam\PYGZus{}file}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input = previous stage}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} suffix = .bam}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.statistics}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Output suffix = .statistics}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{use\PYGZus{}linear\PYGZus{}model}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Extra statistics parameter}
+\PYG{k}{def} \PYG{n+nf}{summarise\PYGZus{}bam\PYGZus{}file}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}stats\PYGZus{}parameter}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Sketch of real analysis function}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{ii} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}
+ \PYG{n}{oo} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Output}
+\label{tutorials/new_tutorial/transform_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[a.1.fastq, a.2.fastq] \PYGZhy{}\PYGZgt{} a.sam] completed}
+\PYG{g+go}{ Job = [[b.1.fastq, b.2.fastq] \PYGZhy{}\PYGZgt{} b.sam] completed}
+\PYG{g+go}{ Job = [[c.1.fastq, c.2.fastq] \PYGZhy{}\PYGZgt{} c.sam] completed}
+\PYG{g+go}{Completed Task = map\PYGZus{}dna\PYGZus{}sequence}
+\PYG{g+go}{ Job = [a.sam \PYGZhy{}\PYGZgt{} a.bam] completed}
+\PYG{g+go}{ Job = [b.sam \PYGZhy{}\PYGZgt{} b.bam] completed}
+\PYG{g+go}{ Job = [c.sam \PYGZhy{}\PYGZgt{} c.bam] completed}
+\PYG{g+go}{Completed Task = compress\PYGZus{}sam\PYGZus{}file}
+\PYG{g+go}{ Job = [a.bam \PYGZhy{}\PYGZgt{} a.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [b.bam \PYGZhy{}\PYGZgt{} b.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{ Job = [c.bam \PYGZhy{}\PYGZgt{} c.statistics, use\PYGZus{}linear\PYGZus{}model] completed}
+\PYG{g+go}{Completed Task = summarise\PYGZus{}bam\PYGZus{}file}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 3}: Python Code for More on \texttt{@transform}-ing data}
+\label{tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-code}\label{tutorials/new_tutorial/transform_in_parallel_code::doc}\label{tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-in-parallel-chapter-num-python-code-for-more-on-transform-ing-data}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 3}: {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-transform-in-parallel]{\emph{More on @transform-ing data and @originate}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Producing several items / files per job}
+\label{tutorials/new_tutorial/transform_in_parallel_code:producing-several-items-files-per-job}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} Create pairs of input files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{first\PYGZus{}task\PYGZus{}params} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+
+\PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs} \PYG{o+ow}{in} \PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} first task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} second task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Resulting Output}
+\label{tutorials/new_tutorial/transform_in_parallel_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} [job1.a.output.1, job1.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} [job2.a.output.1, job2.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} [job3.a.output.1, job3.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.output.1, job1.a.output.extra.1] \PYGZhy{}\PYGZgt{} job1.a.output2] completed}
+\PYG{g+go}{ Job = [[job2.a.output.1, job2.a.output.extra.1] \PYGZhy{}\PYGZgt{} job2.a.output2] completed}
+\PYG{g+go}{ Job = [[job3.a.output.1, job3.a.output.extra.1] \PYGZhy{}\PYGZgt{} job3.a.output2] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Defining tasks function out of order}
+\label{tutorials/new_tutorial/transform_in_parallel_code:defining-tasks-function-out-of-order}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{} Create pairs of input files}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{first\PYGZus{}task\PYGZus{}params} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs} \PYG{o+ow}{in} \PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} second task defined first}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} task name string wrapped in output\PYGZus{}from(...)}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{output\PYGZus{}from}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} first task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Run}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Resulting Output}
+\label{tutorials/new_tutorial/transform_in_parallel_code:id1}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} [job1.a.output.1, job1.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} [job2.a.output.1, job2.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} [job3.a.output.1, job3.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.output.1, job1.a.output.extra.1] \PYGZhy{}\PYGZgt{} job1.a.output2] completed}
+\PYG{g+go}{ Job = [[job2.a.output.1, job2.a.output.extra.1] \PYGZhy{}\PYGZgt{} job2.a.output2] completed}
+\PYG{g+go}{ Job = [[job3.a.output.1, job3.a.output.extra.1] \PYGZhy{}\PYGZgt{} job3.a.output2] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Multiple dependencies}
+\label{tutorials/new_tutorial/transform_in_parallel_code:new-manual-transform-multiple-dependencies-code}\label{tutorials/new_tutorial/transform_in_parallel_code:multiple-dependencies}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+ \PYG{k+kn}{import} \PYG{n+nn}{time}
+ \PYG{k+kn}{import} \PYG{n+nn}{random}
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{} Create pairs of input files}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{first\PYGZus{}task\PYGZus{}params} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+ \PYG{n}{second\PYGZus{}task\PYGZus{}params} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job5.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job5.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job6.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job6.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs} \PYG{o+ow}{in} \PYG{n}{first\PYGZus{}task\PYGZus{}params} \PYG{o}{+} \PYG{n}{second\PYGZus{}task\PYGZus{}params}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} first task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} second task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{second\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} third task}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} depends on both first\PYGZus{}task() and second\PYGZus{}task()}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{third\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+ \PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZh [...]
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Run}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{third\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Resulting Output}
+\label{tutorials/new_tutorial/transform_in_parallel_code:id2}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{third\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} [job3.a.output.1, job3.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job6.a.start, job6.b.start] \PYGZhy{}\PYGZgt{} [job6.a.output.1, job6.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} [job1.a.output.1, job1.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job4.a.start, job4.b.start] \PYGZhy{}\PYGZgt{} [job4.a.output.1, job4.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job5.a.start, job5.b.start] \PYGZhy{}\PYGZgt{} [job5.a.output.1, job5.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} [job2.a.output.1, job2.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.output.1, job1.a.output.extra.1] \PYGZhy{}\PYGZgt{} job1.a.output2] completed}
+\PYG{g+go}{ Job = [[job2.a.output.1, job2.a.output.extra.1] \PYGZhy{}\PYGZgt{} job2.a.output2] completed}
+\PYG{g+go}{ Job = [[job3.a.output.1, job3.a.output.extra.1] \PYGZhy{}\PYGZgt{} job3.a.output2] completed}
+\PYG{g+go}{ Job = [[job4.a.output.1, job4.a.output.extra.1] \PYGZhy{}\PYGZgt{} job4.a.output2] completed}
+\PYG{g+go}{ Job = [[job5.a.output.1, job5.a.output.extra.1] \PYGZhy{}\PYGZgt{} job5.a.output2] completed}
+\PYG{g+go}{ Job = [[job6.a.output.1, job6.a.output.extra.1] \PYGZhy{}\PYGZgt{} job6.a.output2] completed}
+\PYG{g+go}{Completed Task = third\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Multiple dependencies after @follows}
+\label{tutorials/new_tutorial/transform_in_parallel_code:multiple-dependencies-after-follows}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{time}
+\PYG{k+kn}{import} \PYG{n+nn}{random}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} Create pairs of input files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{first\PYGZus{}task\PYGZus{}params} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+\PYG{n}{second\PYGZus{}task\PYGZus{}params} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job4.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job5.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job5.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job6.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job6.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+
+\PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs} \PYG{o+ow}{in} \PYG{n}{first\PYGZus{}task\PYGZus{}params} \PYG{o}{+} \PYG{n}{second\PYGZus{}task\PYGZus{}params}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}pairs}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} first task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} second task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{second\PYGZus{}task\PYGZus{}params}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.extra.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{some\PYGZus{}extra.string.for\PYGZus{}example}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{,}
+ \PYG{n}{extra\PYGZus{}parameter\PYGZus{}str}\PYG{p}{,} \PYG{n}{extra\PYGZus{}parameter\PYGZus{}num}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}file\PYGZus{}pair}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} third task}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} depends on both first\PYGZus{}task() and second\PYGZus{}task()}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{third\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{third\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Resulting Output: \texttt{first\_task} completes before \texttt{second\_task}}
+\label{tutorials/new_tutorial/transform_in_parallel_code:resulting-output-first-task-completes-before-second-task}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{third\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} [job2.a.output.1, job2.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} [job3.a.output.1, job3.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} [job1.a.output.1, job1.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job4.a.start, job4.b.start] \PYGZhy{}\PYGZgt{} [job4.a.output.1, job4.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job6.a.start, job6.b.start] \PYGZhy{}\PYGZgt{} [job6.a.output.1, job6.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{ Job = [[job5.a.start, job5.b.start] \PYGZhy{}\PYGZgt{} [job5.a.output.1, job5.a.output.extra.1], some\PYGZus{}extra.string.for\PYGZus{}example, 14] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.output.1, job1.a.output.extra.1] \PYGZhy{}\PYGZgt{} job1.a.output2] completed}
+\PYG{g+go}{ Job = [[job2.a.output.1, job2.a.output.extra.1] \PYGZhy{}\PYGZgt{} job2.a.output2] completed}
+\PYG{g+go}{ Job = [[job3.a.output.1, job3.a.output.extra.1] \PYGZhy{}\PYGZgt{} job3.a.output2] completed}
+\PYG{g+go}{ Job = [[job4.a.output.1, job4.a.output.extra.1] \PYGZhy{}\PYGZgt{} job4.a.output2] completed}
+\PYG{g+go}{ Job = [[job5.a.output.1, job5.a.output.extra.1] \PYGZhy{}\PYGZgt{} job5.a.output2] completed}
+\PYG{g+go}{ Job = [[job6.a.output.1, job6.a.output.extra.1] \PYGZhy{}\PYGZgt{} job6.a.output2] completed}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 4}: Python Code for Creating files with \texttt{@originate}}
+\label{tutorials/new_tutorial/originate_code:new-manual-originate-code}\label{tutorials/new_tutorial/originate_code::doc}\label{tutorials/new_tutorial/originate_code:new-manual-originate-chapter-num-python-code-for-creating-files-with-originate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 4}: {\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{@originate}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Using \texttt{@originate}}
+\label{tutorials/new_tutorial/originate_code:using-originate}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} second task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Output}
+\label{tutorials/new_tutorial/originate_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ Job = [None -\textgreater{} [job1.a.start, job1.b.start]] completed
+ Job = [None -\textgreater{} [job2.a.start, job2.b.start]] completed
+ Job = [None -\textgreater{} [job3.a.start, job3.b.start]] completed
+Completed Task = create\_initial\_file\_pairs
+ Job = [[job1.a.start, job1.b.start] -\textgreater{} job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -\textgreater{} job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -\textgreater{} job3.a.output.1] completed
+Completed Task = first\_task
+ Job = [job1.a.output.1 -\textgreater{} job1.a.output.2] completed
+ Job = [job2.a.output.1 -\textgreater{} job2.a.output.2] completed
+ Job = [job3.a.output.1 -\textgreater{} job3.a.output.2] completed
+Completed Task = second\_task
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 5}: Python Code for Understanding how your pipeline works with \emph{pipeline\_printout(...)}}
+\label{tutorials/new_tutorial/pipeline_printout_code::doc}\label{tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-code}\label{tutorials/new_tutorial/pipeline_printout_code:new-manual-pipeline-printout-chapter-num-python-code-for-understanding-how-your-pipeline-works-with-pipeline-printout}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} syntax
+
+\item {}
+Back to \textbf{Chapter 5}: {\hyperref[tutorials/new_tutorial/pipeline_printout:new-manual-pipeline-printout]{\emph{Understanding how your pipeline works}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Display the initial state of the pipeline}
+\label{tutorials/new_tutorial/pipeline_printout_code:display-the-initial-state-of-the-pipeline}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} second task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Normal Output}
+\label{tutorials/new_tutorial/pipeline_printout_code:normal-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{Task = first\PYGZus{}task}
+\PYG{g+go}{Task = second\PYGZus{}task}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{High Verbosity Output}
+\label{tutorials/new_tutorial/pipeline_printout_code:high-verbosity-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{ Job = [None}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.b.start]}
+\PYG{g+go}{ Job needs update: Missing files [job1.a.start, job1.b.start]}
+\PYG{g+go}{ Job = [None}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.b.start]}
+\PYG{g+go}{ Job needs update: Missing files [job2.a.start, job2.b.start]}
+\PYG{g+go}{ Job = [None}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.b.start]}
+\PYG{g+go}{ Job needs update: Missing files [job3.a.start, job3.b.start]}
+
+\PYG{g+go}{Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.output.1]}
+\PYG{g+go}{ Job needs update: Missing files [job1.a.start, job1.b.start, job1.a.output.1]}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.output.1]}
+\PYG{g+go}{ Job needs update: Missing files [job2.a.start, job2.b.start, job2.a.output.1]}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.output.1]}
+\PYG{g+go}{ Job needs update: Missing files [job3.a.start, job3.b.start, job3.a.output.1]}
+
+\PYG{g+go}{Task = second\PYGZus{}task}
+\PYG{g+go}{ Job = [job1.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.output.2]}
+\PYG{g+go}{ Job needs update: Missing files [job1.a.output.1, job1.a.output.2]}
+\PYG{g+go}{ Job = [job2.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.output.2]}
+\PYG{g+go}{ Job needs update: Missing files [job2.a.output.1, job2.a.output.2]}
+\PYG{g+go}{ Job = [job3.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.output.2]}
+\PYG{g+go}{ Job needs update: Missing files [job3.a.output.1, job3.a.output.2]}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Display the partially up-to-date pipeline}
+\label{tutorials/new_tutorial/pipeline_printout_code:display-the-partially-up-to-date-pipeline}\begin{quote}
+
+Run the pipeline, modify \code{job1.stage} so that the second task is no longer up-to-date
+and printout the pipeline stage again:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{3}\PYG{p}{)}
+\PYG{g+go}{Task enters queue = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} [job1.a.start, job1.b.start]]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} [job2.a.start, job2.b.start]]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} [job3.a.start, job3.b.start]]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} [job1.a.start, job1.b.start]] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} [job2.a.start, job2.b.start]] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} [job3.a.start, job3.b.start]] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{Task enters queue = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} job1.a.output.1]}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} job2.a.output.1]}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} job3.a.output.1]}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start] \PYGZhy{}\PYGZgt{} job1.a.output.1] completed}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start] \PYGZhy{}\PYGZgt{} job2.a.output.1] completed}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start] \PYGZhy{}\PYGZgt{} job3.a.output.1] completed}
+\PYG{g+go}{Completed Task = first\PYGZus{}task}
+\PYG{g+go}{Task enters queue = second\PYGZus{}task}
+\PYG{g+go}{ Job = [job1.a.output.1 \PYGZhy{}\PYGZgt{} job1.a.output.2]}
+\PYG{g+go}{ Job = [job2.a.output.1 \PYGZhy{}\PYGZgt{} job2.a.output.2]}
+\PYG{g+go}{ Job = [job3.a.output.1 \PYGZhy{}\PYGZgt{} job3.a.output.2]}
+\PYG{g+go}{ Job = [job1.a.output.1 \PYGZhy{}\PYGZgt{} job1.a.output.2] completed}
+\PYG{g+go}{ Job = [job2.a.output.1 \PYGZhy{}\PYGZgt{} job2.a.output.2] completed}
+\PYG{g+go}{ Job = [job3.a.output.1 \PYGZhy{}\PYGZgt{} job3.a.output.2] completed}
+\PYG{g+go}{Completed Task = second\PYGZus{}task}
+
+
+\PYG{g+go}{\PYGZsh{} modify job1.stage1}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{job1.a.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{close}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+At a verbosity of 6, even jobs which are up-to-date will be displayed:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{6}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which are up\PYGZhy{}to\PYGZhy{}date:}
+
+\PYG{g+go}{Task = create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}
+\PYG{g+go}{ Job = [None}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.b.start]}
+\PYG{g+go}{ Job = [None}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.b.start]}
+\PYG{g+go}{ Job = [None}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.start}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.b.start]}
+
+\PYG{g+go}{Task = first\PYGZus{}task}
+\PYG{g+go}{ Job = [[job1.a.start, job1.b.start]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.output.1]}
+\PYG{g+go}{ Job = [[job2.a.start, job2.b.start]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.output.1]}
+\PYG{g+go}{ Job = [[job3.a.start, job3.b.start]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.output.1]}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+
+
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = second\PYGZus{}task}
+\PYG{g+go}{ Job = [job1.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job1.a.output.2]}
+\PYG{g+go}{ Job needs update:}
+\PYG{g+go}{ Input files:}
+\PYG{g+go}{ * 22 Jul 2014 15:29:19.33: job1.a.output.1}
+\PYG{g+go}{ Output files:}
+\PYG{g+go}{ * 22 Jul 2014 15:29:07.53: job1.a.output.2}
+
+\PYG{g+go}{ Job = [job2.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job2.a.output.2]}
+\PYG{g+go}{ Job = [job3.a.output.1}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{} job3.a.output.2]}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\end{Verbatim}
+
+We can now see that the there is only one job in ``second\_task'' which needs to be re-run
+because `job1.stage1' has been modified after `job1.stage2'
+\end{quote}
+
+
+\section{\textbf{Chapter 7}: Python Code for Displaying the pipeline visually with \emph{pipeline\_printout\_graph(...)}}
+\label{tutorials/new_tutorial/pipeline_printout_graph_code:new-manual-pipeline-printout-graph-chapter-num-python-code-for-displaying-the-pipeline-visually-with-pipeline-printout-graph}\label{tutorials/new_tutorial/pipeline_printout_graph_code::doc}\label{tutorials/new_tutorial/pipeline_printout_graph_code:new-manual-pipeline-printout-graph-code}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph(...)}}} syntax
+
+\item {}
+Back to \textbf{Chapter 7}: {\hyperref[tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph]{\emph{Displaying the pipeline visually}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Code}
+\label{tutorials/new_tutorial/pipeline_printout_graph_code:code}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\},numbers=left,firstnumber=1,stepnumber=1]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} second task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} Print graph before running pipeline}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Show flow chart and tasks before running the pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Show flow chart and tasks before running the pipeline}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simple\PYGZus{}tutorial\PYGZus{}stage5\PYGZus{}before.png}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{png}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{minimal\PYGZus{}key\PYGZus{}legend}\PYG{o}{=}\PYG{n+nb+bp}{True}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{} modify job1.stage1}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{job1.a.output.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{close}\PYG{p}{(}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{} Print graph after everything apart from {}`{}`job1.a.output.1{}`{}` is update}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Show flow chart and tasks after running the pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Show flow chart and tasks after running the pipeline}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simple\PYGZus{}tutorial\PYGZus{}stage5\PYGZus{}after.png}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{png}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{p}{[}\PYG{n}{second\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{no\PYGZus{}key\PYGZus{}legend}\PYG{o}{=}\PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Flowcharts}
+\label{tutorials/new_tutorial/pipeline_printout_graph_code:resulting-flowcharts}\begin{quote}
+
+\begin{tabular}{|p{0.475\linewidth}|p{0.475\linewidth}|}
+\hline
+
+{\hfill\scalebox{0.950000}{\includegraphics{simple_tutorial_stage5_before.png}}\hfill}
+
+\begin{center}Before
+\end{center} &
+{\hfill\scalebox{0.950000}{\includegraphics{simple_tutorial_stage5_after.png}}\hfill}
+
+\begin{center}After
+\end{center}\\\hline
+\end{tabular}
+
+
+\begin{tabular}{|p{0.950\linewidth}|}
+\hline
+
+{\hfill\scalebox{0.500000}{\includegraphics{tutorial_key.png}}\hfill}
+
+\begin{center}Legend
+\end{center}\\\hline
+\end{tabular}
+
+\end{quote}
+
+
+\section{\textbf{Chapter 8}: Python Code for Specifying output file names with \emph{formatter()} and \emph{regex()}}
+\label{tutorials/new_tutorial/output_file_names_code::doc}\label{tutorials/new_tutorial/output_file_names_code:new-manual-output-file-names-chapter-num-python-code-for-specifying-output-file-names-with-formatter-and-regex}\label{tutorials/new_tutorial/output_file_names_code:new-manual-output-file-names-code}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} syntax
+
+\item {}
+Back to \textbf{Chapter 8}: {\hyperref[tutorials/new_tutorial/output_file_names:new-manual-output-file-names]{\emph{Specifying output file names}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Example Code for \emph{suffix()}}
+\label{tutorials/new_tutorial/output_file_names_code:example-code-for-suffix}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} suffix}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{c}{\PYGZsh{} name of previous task(s) (or list of files, or a glob)}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} matching suffix of the \PYGZdq{}input file\PYGZdq{}}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{45}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output.b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)} \PYG{c}{\PYGZsh{} resulting suffix}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ input\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}files}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ output\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{first\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Example Code for \emph{formatter()}}
+\label{tutorials/new_tutorial/output_file_names_code:example-code-for-formatter}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} create both files as necessary}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} formatter}
+\PYG{c}{\PYGZsh{}}
+
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job(?P\PYGZlt{}JOBNUMBER\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extract job number}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job[123].b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Match only \PYGZdq{}b\PYGZdq{} files}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement list}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[1]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{45}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}files}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Example Code for \emph{formatter()} with replacements in \emph{extra} arguments}
+\label{tutorials/new_tutorial/output_file_names_code:example-code-for-formatter-with-replacements-in-extra-arguments}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} print job number as an extra argument}
+\PYG{c}{\PYGZsh{}}
+
+\PYG{c}{\PYGZsh{} first task}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job(?P\PYGZlt{}JOBNUMBER\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extract job number}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job[123].b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Match only \PYGZdq{}b\PYGZdq{} files}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement list}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[1]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}JOBNUMBER[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}\PYG{p}{,} \PYG{n}{job\PYGZus{}number}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{n}{job\PYGZus{}number}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{:}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}files}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Example Code for \emph{formatter()} in Zoos}
+\label{tutorials/new_tutorial/output_file_names_code:example-code-for-formatter-in-zoos}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Make directories}
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}
+ \PYG{c}{\PYGZsh{} List of animals and plants}
+ \PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.handreared.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.tame.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile/reptiles.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose/flowering.handreared.plants}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Food for the \PYGZob{}tameness:11s\PYGZcb{} \PYGZob{}animal\PYGZus{}name:9s\PYGZcb{} = \PYGZob{}output\PYGZus{}file:90s\PYGZcb{} will be placed in \PYGZob{}new\PYGZus{}directory\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+
+\PYG{n}{Results} \PYG{o+ow}{in}\PYG{p}{:}
+
+\PYG{p}{:}\PYG{p}{:}
+
+ \PYG{o}{\PYGZgt{}\PYGZgt{}}\PYG{o}{\PYGZgt{}} \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{crocodile} \PYG{o}{=} \PYG{o}{.}\PYG{o}{/}\PYG{n}{reptiles}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{crocodile}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{o}{.}\PYG{o}{/}\PYG{n}{reptiles}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{tame} \PYG{n}{dog} \PYG{o}{=} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}\PYG{o}{/}\PYG{n}{tame}\PYG{o}{.}\PYG{n}{dog}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{dog} \PYG{o}{=} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{dog}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{handreared} \PYG{n}{lion} \PYG{o}{=} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}\PYG{o}{/}\PYG{n}{handreared}\PYG{o}{.}\PYG{n}{lion}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{lion} \PYG{o}{=} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{lion}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{tiger} \PYG{o}{=} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{tiger}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{o}{.}\PYG{o}{/}\PYG{n}{mammals}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Example Code for \emph{regex()} in zoos}
+\label{tutorials/new_tutorial/output_file_names_code:example-code-for-regex-in-zoos}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Make directories}
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}
+ \PYG{c}{\PYGZsh{} List of animals and plants}
+ \PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.handreared.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.tame.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile/reptiles.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose/flowering.handreared.plants}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*?/?)(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+)/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}tame\PYGZgt{}.}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Food for the \PYGZob{}tameness:11s\PYGZcb{} \PYGZob{}animal\PYGZus{}name:9s\PYGZcb{} = \PYGZob{}output\PYGZus{}file:90s\PYGZcb{} will be placed in \PYGZob{}new\PYGZus{}directory\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+
+\PYG{n}{Results} \PYG{o+ow}{in}\PYG{p}{:}
+
+\PYG{p}{:}\PYG{p}{:}
+
+ \PYG{o}{\PYGZgt{}\PYGZgt{}}\PYG{o}{\PYGZgt{}} \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{crocodile} \PYG{o}{=} \PYG{n}{reptiles}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{crocodile}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{n}{reptiles}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{tame} \PYG{n}{dog} \PYG{o}{=} \PYG{n}{mammals}\PYG{o}{/}\PYG{n}{tame}\PYG{o}{.}\PYG{n}{dog}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{dog} \PYG{o}{=} \PYG{n}{mammals}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{dog}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{handreared} \PYG{n}{lion} \PYG{o}{=} \PYG{n}{mammals}\PYG{o}{/}\PYG{n}{handreared}\PYG{o}{.}\PYG{n}{lion}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{lion} \PYG{o}{=} \PYG{n}{mammals}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{lion}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{n}{mammals}
+ \PYG{n}{Food} \PYG{k}{for} \PYG{n}{the} \PYG{n}{wild} \PYG{n}{tiger} \PYG{o}{=} \PYG{n}{mammals}\PYG{o}{/}\PYG{n}{wild}\PYG{o}{.}\PYG{n}{tiger}\PYG{o}{.}\PYG{n}{food} \PYG{n}{will} \PYG{n}{be} \PYG{n}{placed} \PYG{o+ow}{in} \PYG{n}{mammals}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 9}: Python Code for Preparing directories for output with \emph{@mkdir()}}
+\label{tutorials/new_tutorial/mkdir_code:new-manual-mkdir-code}\label{tutorials/new_tutorial/mkdir_code::doc}\label{tutorials/new_tutorial/mkdir_code:new-manual-mkdir-chapter-num-python-code-for-preparing-directories-for-output-with-mkdir}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/mkdir:decorators-mkdir]{\emph{mkdir()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} syntax
+
+\item {}
+Back to \textbf{Chapter 9}: {\hyperref[tutorials/new_tutorial/mkdir:new-manual-mkdir]{\emph{Preparing directories for output with @mkdir()}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Code for \emph{formatter()} Zoo example}
+\label{tutorials/new_tutorial/mkdir_code:code-for-formatter-zoo-example}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Make directories}
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}
+ \PYG{c}{\PYGZsh{} List of animals and plants}
+ \PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.handreared.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.tame.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile/reptiles.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose/flowering.handreared.plants}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{} create directories for each clade}
+\PYG{n+nd}{@mkdir}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}/\PYGZob{}tame[0]\PYGZcb{}.\PYGZob{}subdir[0][0]\PYGZcb{}.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subpath[0][1]\PYGZcb{}/\PYGZob{}clade[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}subdir[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}tame[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}40s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}90s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} this works now}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Code for \emph{regex()} Zoo example}
+\label{tutorials/new_tutorial/mkdir_code:code-for-regex-zoo-example}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Make directories}
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{n+nd}{@originate}\PYG{p}{(}
+ \PYG{c}{\PYGZsh{} List of animals and plants}
+ \PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tiger/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lion/mammals.handreared.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.tame.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog/mammals.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile/reptiles.wild.animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rose/flowering.handreared.plants}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{} create directories for each clade}
+\PYG{n+nd}{@mkdir}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*?/?)(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+)/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+\PYG{c}{\PYGZsh{} Put different animals in different directories depending on their clade}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*?/?)(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+)/(?P\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).(?P\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{w+).animals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Only animals: ignore plants!}
+
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}tame\PYGZgt{}.}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2.food}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement}
+
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}clade\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} new\PYGZus{}directory}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} animal\PYGZus{}name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{g\PYGZlt{}tame\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} tameness}
+\PYG{k}{def} \PYG{n+nf}{feed}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{new\PYGZus{}directory}\PYG{p}{,} \PYG{n}{animal\PYGZus{}name}\PYG{p}{,} \PYG{n}{tameness}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}40s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}90s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} this works now}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 10}: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}
+\label{tutorials/new_tutorial/checkpointing_code:new-manual-checkpointing-chapter-num-python-code-for-checkpointing-interrupted-pipelines-and-exceptions}\label{tutorials/new_tutorial/checkpointing_code::doc}\label{tutorials/new_tutorial/checkpointing_code:new-manual-checkpointing-code}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/checkpointing:new-manual-checkpointing]{\emph{Back to \textbar{}new\_manual.checkpointing.chapter\_num\textbar{}: Interrupted Pipelines and Exceptions}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Code for .:ref:\emph{suffix() \textless{}decorators.suffix\textgreater{}} example}
+\label{tutorials/new_tutorial/checkpointing_code:code-for-ref-suffix-decorators-suffix-example}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 12}: Python Code for Splitting up large tasks / files with \textbf{@split}}
+\label{tutorials/new_tutorial/split_code:new-manual-split-chapter-num-python-code-for-splitting-up-large-tasks-files-with-split}\label{tutorials/new_tutorial/split_code::doc}\label{tutorials/new_tutorial/split_code:new-manual-split-code}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/split:decorators-split]{\emph{@split syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 12}: {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{Splitting up large tasks / files with @split}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Splitting large jobs}
+\label{tutorials/new_tutorial/split_code:splitting-large-jobs}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n}{NUMBER\PYGZus{}OF\PYGZus{}RANDOMS} \PYG{o}{=} \PYG{l+m+mi}{10000}
+\PYG{n}{CHUNK\PYGZus{}SIZE} \PYG{o}{=} \PYG{l+m+mi}{1000}
+
+
+\PYG{k+kn}{import} \PYG{n+nn}{random}\PYG{o}{,} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{glob}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Create random numbers}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{random\PYGZus{}numbers.list}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}random\PYGZus{}numbers}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{f} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NUMBER\PYGZus{}OF\PYGZus{}RANDOMS}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{f}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}g}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)} \PYG{o}{*} \PYG{l+m+mf}{100.0}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} split initial file}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{create\PYGZus{}random\PYGZus{}numbers}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}problem} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ splits random numbers file into xxx files of chunk\PYGZus{}size each}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} clean up any files from previous runs}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}for ff in glob.glob(\PYGZdq{}*.chunks\PYGZdq{}):}
+ \PYG{k}{for} \PYG{n}{ff} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{ff}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} create new file every chunk\PYGZus{}size lines and}
+ \PYG{c}{\PYGZsh{} copy each line into current file}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb+bp}{None}
+ \PYG{n}{cnt\PYGZus{}files} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}name} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{i}\PYG{p}{,} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{enumerate}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{i} \PYG{o}{\PYGZpc{}} \PYG{n}{CHUNK\PYGZus{}SIZE} \PYG{o}{==} \PYG{l+m+mi}{0}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}files} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.chunks}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{cnt\PYGZus{}files}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Calculate sum and sum of squares for each chunk file}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{split\PYGZus{}problem}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{sum\PYGZus{}of\PYGZus{}squares} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{output} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{sum\PYGZus{}squared}\PYG{p}{,} \PYG{n+nb}{sum} \PYG{o}{=} \PYG{p}{[}\PYG{l+m+mf}{0.0}\PYG{p}{,} \PYG{l+m+mf}{0.0}\PYG{p}{]}
+ \PYG{n}{cnt\PYGZus{}values} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}values} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{val} \PYG{o}{=} \PYG{n+nb}{float}\PYG{p}{(}\PYG{n}{line}\PYG{o}{.}\PYG{n}{rstrip}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{sum\PYGZus{}squared} \PYG{o}{+}\PYG{o}{=} \PYG{n}{val} \PYG{o}{*} \PYG{n}{val}
+ \PYG{n+nb}{sum} \PYG{o}{+}\PYG{o}{=} \PYG{n}{val}
+ \PYG{n}{output}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n+nb}{repr}\PYG{p}{(}\PYG{n}{sum\PYGZus{}squared}\PYG{p}{)}\PYG{p}{,} \PYG{n+nb}{repr}\PYG{p}{(}\PYG{n+nb}{sum}\PYG{p}{)}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}values}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Output}
+\label{tutorials/new_tutorial/split_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} random\PYGZus{}numbers.list] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}random\PYGZus{}numbers}
+\PYG{g+go}{ Job = [[random\PYGZus{}numbers.list] \PYGZhy{}\PYGZgt{} *.chunks] completed}
+\PYG{g+go}{Completed Task = split\PYGZus{}problem}
+\PYG{g+go}{ Job = [1.chunks \PYGZhy{}\PYGZgt{} 1.sums] completed}
+\PYG{g+go}{ Job = [10.chunks \PYGZhy{}\PYGZgt{} 10.sums] completed}
+\PYG{g+go}{ Job = [2.chunks \PYGZhy{}\PYGZgt{} 2.sums] completed}
+\PYG{g+go}{ Job = [3.chunks \PYGZhy{}\PYGZgt{} 3.sums] completed}
+\PYG{g+go}{ Job = [4.chunks \PYGZhy{}\PYGZgt{} 4.sums] completed}
+\PYG{g+go}{ Job = [5.chunks \PYGZhy{}\PYGZgt{} 5.sums] completed}
+\PYG{g+go}{ Job = [6.chunks \PYGZhy{}\PYGZgt{} 6.sums] completed}
+\PYG{g+go}{ Job = [7.chunks \PYGZhy{}\PYGZgt{} 7.sums] completed}
+\PYG{g+go}{ Job = [8.chunks \PYGZhy{}\PYGZgt{} 8.sums] completed}
+\PYG{g+go}{ Job = [9.chunks \PYGZhy{}\PYGZgt{} 9.sums] completed}
+\PYG{g+go}{Completed Task = sum\PYGZus{}of\PYGZus{}squares}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 13}: Python Code for \texttt{@merge} multiple input into a single result}
+\label{tutorials/new_tutorial/merge_code::doc}\label{tutorials/new_tutorial/merge_code:new-manual-merge-code}\label{tutorials/new_tutorial/merge_code:new-manual-merge-chapter-num-python-code-for-merge-multiple-input-into-a-single-result}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 13}: {\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{Splitting up large tasks / files with @merge}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Splitting large jobs}
+\label{tutorials/new_tutorial/merge_code:splitting-large-jobs}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n}{NUMBER\PYGZus{}OF\PYGZus{}RANDOMS} \PYG{o}{=} \PYG{l+m+mi}{10000}
+\PYG{n}{CHUNK\PYGZus{}SIZE} \PYG{o}{=} \PYG{l+m+mi}{1000}
+
+
+\PYG{k+kn}{import} \PYG{n+nn}{random}\PYG{o}{,} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{glob}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Create random numbers}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{random\PYGZus{}numbers.list}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}random\PYGZus{}numbers}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{f} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NUMBER\PYGZus{}OF\PYGZus{}RANDOMS}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{f}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}g}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{random}\PYG{o}{.}\PYG{n}{random}\PYG{p}{(}\PYG{p}{)} \PYG{o}{*} \PYG{l+m+mf}{100.0}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} split initial file}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{create\PYGZus{}random\PYGZus{}numbers}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}problem} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ splits random numbers file into xxx files of chunk\PYGZus{}size each}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} clean up any files from previous runs}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}for ff in glob.glob(\PYGZdq{}*.chunks\PYGZdq{}):}
+ \PYG{k}{for} \PYG{n}{ff} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{ff}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} create new file every chunk\PYGZus{}size lines and}
+ \PYG{c}{\PYGZsh{} copy each line into current file}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb+bp}{None}
+ \PYG{n}{cnt\PYGZus{}files} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}name} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{i}\PYG{p}{,} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{enumerate}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{i} \PYG{o}{\PYGZpc{}} \PYG{n}{CHUNK\PYGZus{}SIZE} \PYG{o}{==} \PYG{l+m+mi}{0}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}files} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.chunks}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{cnt\PYGZus{}files}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Calculate sum and sum of squares for each chunk file}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{split\PYGZus{}problem}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.chunks}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.sums}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{sum\PYGZus{}of\PYGZus{}squares} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{output} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{sum\PYGZus{}squared}\PYG{p}{,} \PYG{n+nb}{sum} \PYG{o}{=} \PYG{p}{[}\PYG{l+m+mf}{0.0}\PYG{p}{,} \PYG{l+m+mf}{0.0}\PYG{p}{]}
+ \PYG{n}{cnt\PYGZus{}values} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}values} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{val} \PYG{o}{=} \PYG{n+nb}{float}\PYG{p}{(}\PYG{n}{line}\PYG{o}{.}\PYG{n}{rstrip}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{sum\PYGZus{}squared} \PYG{o}{+}\PYG{o}{=} \PYG{n}{val} \PYG{o}{*} \PYG{n}{val}
+ \PYG{n+nb}{sum} \PYG{o}{+}\PYG{o}{=} \PYG{n}{val}
+ \PYG{n}{output}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n+nb}{repr}\PYG{p}{(}\PYG{n}{sum\PYGZus{}squared}\PYG{p}{)}\PYG{p}{,} \PYG{n+nb}{repr}\PYG{p}{(}\PYG{n+nb}{sum}\PYG{p}{)}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}values}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Calculate variance from sums}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@merge}\PYG{p}{(}\PYG{n}{sum\PYGZus{}of\PYGZus{}squares}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{variance.result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{calculate\PYGZus{}variance} \PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Calculate variance naively}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} initialise variables}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{all\PYGZus{}sum\PYGZus{}squared} \PYG{o}{=} \PYG{l+m+mf}{0.0}
+ \PYG{n}{all\PYGZus{}sum} \PYG{o}{=} \PYG{l+m+mf}{0.0}
+ \PYG{n}{all\PYGZus{}cnt\PYGZus{}values} \PYG{o}{=} \PYG{l+m+mf}{0.0}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} added up all the sum\PYGZus{}squared, and sum and cnt\PYGZus{}values from all the chunks}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file\PYGZus{}name} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{n}{sum\PYGZus{}squared}\PYG{p}{,} \PYG{n+nb}{sum}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}values} \PYG{o}{=} \PYG{n+nb}{map}\PYG{p}{(}\PYG{n+nb}{float}\PYG{p}{,} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{o}{.}\PYG{n}{readlines}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{all\PYGZus{}sum\PYGZus{}squared} \PYG{o}{+}\PYG{o}{=} \PYG{n}{sum\PYGZus{}squared}
+ \PYG{n}{all\PYGZus{}sum} \PYG{o}{+}\PYG{o}{=} \PYG{n+nb}{sum}
+ \PYG{n}{all\PYGZus{}cnt\PYGZus{}values} \PYG{o}{+}\PYG{o}{=} \PYG{n}{cnt\PYGZus{}values}
+ \PYG{n}{all\PYGZus{}mean} \PYG{o}{=} \PYG{n}{all\PYGZus{}sum} \PYG{o}{/} \PYG{n}{all\PYGZus{}cnt\PYGZus{}values}
+ \PYG{n}{variance} \PYG{o}{=} \PYG{p}{(}\PYG{n}{all\PYGZus{}sum\PYGZus{}squared} \PYG{o}{\PYGZhy{}} \PYG{n}{all\PYGZus{}sum} \PYG{o}{*} \PYG{n}{all\PYGZus{}mean}\PYG{p}{)}\PYG{o}{/}\PYG{p}{(}\PYG{n}{all\PYGZus{}cnt\PYGZus{}values}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} print output}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{variance}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Output}
+\label{tutorials/new_tutorial/merge_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} random\PYGZus{}numbers.list] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}random\PYGZus{}numbers}
+\PYG{g+go}{ Job = [[random\PYGZus{}numbers.list] \PYGZhy{}\PYGZgt{} *.chunks] completed}
+\PYG{g+go}{Completed Task = split\PYGZus{}problem}
+\PYG{g+go}{ Job = [1.chunks \PYGZhy{}\PYGZgt{} 1.sums] completed}
+\PYG{g+go}{ Job = [10.chunks \PYGZhy{}\PYGZgt{} 10.sums] completed}
+\PYG{g+go}{ Job = [2.chunks \PYGZhy{}\PYGZgt{} 2.sums] completed}
+\PYG{g+go}{ Job = [3.chunks \PYGZhy{}\PYGZgt{} 3.sums] completed}
+\PYG{g+go}{ Job = [4.chunks \PYGZhy{}\PYGZgt{} 4.sums] completed}
+\PYG{g+go}{ Job = [5.chunks \PYGZhy{}\PYGZgt{} 5.sums] completed}
+\PYG{g+go}{ Job = [6.chunks \PYGZhy{}\PYGZgt{} 6.sums] completed}
+\PYG{g+go}{ Job = [7.chunks \PYGZhy{}\PYGZgt{} 7.sums] completed}
+\PYG{g+go}{ Job = [8.chunks \PYGZhy{}\PYGZgt{} 8.sums] completed}
+\PYG{g+go}{ Job = [9.chunks \PYGZhy{}\PYGZgt{} 9.sums] completed}
+\PYG{g+go}{Completed Task = sum\PYGZus{}of\PYGZus{}squares}
+\PYG{g+go}{ Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] \PYGZhy{}\PYGZgt{} variance.result] completed}
+\PYG{g+go}{Completed Task = calculate\PYGZus{}variance}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 14}: Python Code for Multiprocessing, \texttt{drmaa} and Computation Clusters}
+\label{tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-code}\label{tutorials/new_tutorial/multiprocessing_code:new-manual-multiprocessing-chapter-num-python-code-for-multiprocessing-drmaa-and-computation-clusters}\label{tutorials/new_tutorial/multiprocessing_code::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{@jobs\_limit}}} syntax
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}} syntax
+
+\item {}
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}} syntax
+
+\item {}
+Back to \textbf{Chapter 14}: {\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-multiprocessing]{\emph{Multiprocessing, drmaa and Computation Clusters}}}
+
+\end{itemize}
+
+
+
+
+\subsection{\emph{@jobs\_limit}}
+\label{tutorials/new_tutorial/multiprocessing_code:jobs-limit}\begin{quote}
+\begin{itemize}
+\item {}
+First 2 tasks are constrained to a parallelism of 3 shared jobs at a time
+
+\item {}
+Final task is constrained to a parallelism of 5 jobs at a time
+
+\item {}
+The entire pipeline is constrained to a (theoretical) parallelism of 10 jobs at a time
+
+\end{itemize}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{time}
+
+\PYG{c}{\PYGZsh{} make list of 10 files}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*stage1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{make\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{l+m+mi}{10}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{i} \PYG{o}{\PYGZlt{}} \PYG{l+m+mi}{5}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.small\PYGZus{}stage1}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{i}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{else}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.big\PYGZus{}stage1}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{i}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@jobs\PYGZus{}limit}\PYG{p}{(}\PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ftp\PYGZus{}download\PYGZus{}limit}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{make\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.small\PYGZus{}stage1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage1\PYGZus{}small}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FTP downloading }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{}Start}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FTP downloading }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{}Finished}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+
+\PYG{n+nd}{@jobs\PYGZus{}limit}\PYG{p}{(}\PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ftp\PYGZus{}download\PYGZus{}limit}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{make\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.big\PYGZus{}stage1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage1\PYGZus{}big}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FTP downloading }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{}Start}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FTP downloading }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{}Finished}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+
+\PYG{n+nd}{@jobs\PYGZus{}limit}\PYG{p}{(}\PYG{l+m+mi}{5}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{n}{stage1\PYGZus{}small}\PYG{p}{,} \PYG{n}{stage1\PYGZus{}big}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage3}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage2}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Processing stage2 }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{}Start}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+ \PYG{n}{time}\PYG{o}{.}\PYG{n}{sleep}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Processing stage2 }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{}Finished}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{10}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{10}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} 3 jobs at a time, interleaved}
+\PYG{g+go}{FTP downloading 5.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 6.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 7.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 5.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 8.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 6.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 9.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 7.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 0.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 8.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 1.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 9.big\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 2.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 0.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 3.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 1.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 4.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{FTP downloading 2.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 3.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{FTP downloading 4.small\PYGZus{}stage1 \PYGZhy{}\PYGZgt{}Finished}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} 5 jobs at a time, interleaved}
+\PYG{g+go}{Processing stage2 0.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 1.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 2.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 3.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 4.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 0.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 5.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 1.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 6.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 2.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 4.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 7.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 8.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 3.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 9.stage2 \PYGZhy{}\PYGZgt{}Start}
+\PYG{g+go}{Processing stage2 5.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 7.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 6.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 8.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\PYG{g+go}{Processing stage2 9.stage2 \PYGZhy{}\PYGZgt{}Finished}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Using \texttt{ruffus.drmaa\_wrapper}}
+\label{tutorials/new_tutorial/multiprocessing_code:using-ruffus-drmaa-wrapper}\label{tutorials/new_tutorial/multiprocessing_code:id1}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}!/usr/bin/python}
+\PYG{n}{job\PYGZus{}queue\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{YOUR\PYGZus{}QUEUE\PYGZus{}NAME\PYGZus{}GOES\PYGZus{}HERE}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{job\PYGZus{}other\PYGZus{}options} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}P YOUR\PYGZus{}PROJECT\PYGZus{}NAME\PYGZus{}GOES\PYGZus{}HERE}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.drmaa\PYGZus{}wrapper} \PYG{k+kn}{import} \PYG{n}{run\PYGZus{}job}\PYG{p}{,} \PYG{n}{error\PYGZus{}drmaa\PYGZus{}job}
+
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{get\PYGZus{}argparse}\PYG{p}{(}\PYG{n}{description}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{WHAT DOES THIS PIPELINE DO?}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+
+\PYG{n}{options} \PYG{o}{=} \PYG{n}{parser}\PYG{o}{.}\PYG{n}{parse\PYGZus{}args}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} logger which can be passed to multiprocessing ruffus tasks}
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} start shared drmaa session for all jobs / tasks in pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{k+kn}{import} \PYG{n+nn}{drmaa}
+\PYG{n}{drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa}\PYG{o}{.}\PYG{n}{Session}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{initialize}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.chromosome}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{X.chromosome}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}test\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{try}\PYG{p}{:}
+ \PYG{n}{stdout\PYGZus{}res}\PYG{p}{,} \PYG{n}{stderr\PYGZus{}res} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{job\PYGZus{}queue\PYGZus{}name}\PYG{p}{,} \PYG{n}{job\PYGZus{}other\PYGZus{}options} \PYG{o}{=} \PYG{n}{get\PYGZus{}queue\PYGZus{}options}\PYG{p}{(}\PYG{p}{)}
+
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} ruffus.drmaa\PYGZus{}wrapper.run\PYGZus{}job}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{stdout\PYGZus{}res}\PYG{p}{,} \PYG{n}{stderr\PYGZus{}res} \PYG{o}{=} \PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{n}{cmd\PYGZus{}str} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{touch }\PYG{l+s}{\PYGZdq{}} \PYG{o}{+} \PYG{n}{output\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}name} \PYG{o}{=} \PYG{n}{job\PYGZus{}name}\PYG{p}{,}
+ \PYG{n}{logger} \PYG{o}{=} \PYG{n}{logger}\PYG{p}{,}
+ \PYG{n}{drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa\PYGZus{}session}\PYG{p}{,}
+ \PYG{n}{run\PYGZus{}locally} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{local\PYGZus{}run}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}queue\PYGZus{}name} \PYG{o}{=} \PYG{n}{job\PYGZus{}queue\PYGZus{}name}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}other\PYGZus{}options} \PYG{o}{=} \PYG{n}{job\PYGZus{}other\PYGZus{}options}\PYG{p}{)}
+
+ \PYG{c}{\PYGZsh{} relay all the stdout, stderr, drmaa output to diagnose failures}
+ \PYG{k}{except} \PYG{n}{error\PYGZus{}drmaa\PYGZus{}job} \PYG{k}{as} \PYG{n}{err}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n+nb}{map}\PYG{p}{(}\PYG{n+nb}{str}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Failed to run:}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{cmd}\PYG{p}{,}
+ \PYG{n}{err}\PYG{p}{,}
+ \PYG{n}{stdout\PYGZus{}res}\PYG{p}{,}
+ \PYG{n}{stderr\PYGZus{}res}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{k}{if} \PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZus{}\PYGZus{}main\PYGZus{}\PYGZus{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{run} \PYG{p}{(}\PYG{n}{options}\PYG{p}{,} \PYG{n}{multithread} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{jobs}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} cleanup drmaa}
+ \PYG{n}{drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{exit}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 15}: Python Code for Logging progress through a pipeline}
+\label{tutorials/new_tutorial/logging_code:new-manual-logging-chapter-num-python-code-for-logging-progress-through-a-pipeline}\label{tutorials/new_tutorial/logging_code::doc}\label{tutorials/new_tutorial/logging_code:new-manual-logging-code}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+Back to \textbf{Chapter 15}: {\hyperref[tutorials/new_tutorial/logging:new-manual-logging]{\emph{Logging progress through a pipeline}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Rotating set of file logs}
+\label{tutorials/new_tutorial/logging_code:rotating-set-of-file-logs}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{logging}
+\PYG{k+kn}{import} \PYG{n+nn}{logging.handlers}
+
+\PYG{n}{LOG\PYGZus{}FILENAME} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/tmp/ruffus.log}\PYG{l+s}{\PYGZsq{}}
+
+\PYG{c}{\PYGZsh{} Set up a specific logger with our desired output level}
+\PYG{n}{logger} \PYG{o}{=} \PYG{n}{logging}\PYG{o}{.}\PYG{n}{getLogger}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{My\PYGZus{}Ruffus\PYGZus{}logger}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{n}{logger}\PYG{o}{.}\PYG{n}{setLevel}\PYG{p}{(}\PYG{n}{logging}\PYG{o}{.}\PYG{n}{DEBUG}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} Rotate a set of 5 log files every 2kb}
+\PYG{n}{handler} \PYG{o}{=} \PYG{n}{logging}\PYG{o}{.}\PYG{n}{handlers}\PYG{o}{.}\PYG{n}{RotatingFileHandler}\PYG{p}{(}
+ \PYG{n}{LOG\PYGZus{}FILENAME}\PYG{p}{,} \PYG{n}{maxBytes}\PYG{o}{=}\PYG{l+m+mi}{2000}\PYG{p}{,} \PYG{n}{backupCount}\PYG{o}{=}\PYG{l+m+mi}{5}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} Add the log message handler to the logger}
+\PYG{n}{logger}\PYG{o}{.}\PYG{n}{addHandler}\PYG{p}{(}\PYG{n}{handler}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} Ruffus pipeline}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Start with some initial data file of yours...}
+\PYG{n}{initial\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{job1.input}\PYG{l+s}{\PYGZdq{}}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{initial\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{initial\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.output1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Some detailed description}\PYG{l+s}{\PYGZdq{}}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} use our custom logging object}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{logger}\PYG{o}{=}\PYG{n}{logger}\PYG{p}{)}
+\PYG{k}{print} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/tmp/ruffus.log}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 16}: Python Code for \emph{@subdivide} tasks to run efficiently and regroup with \emph{@collate}}
+\label{tutorials/new_tutorial/subdivide_collate_code:new-manual-subdivide-collate-code}\label{tutorials/new_tutorial/subdivide_collate_code::doc}\label{tutorials/new_tutorial/subdivide_collate_code:new-manual-subdivide-collate-chapter-num-python-code-for-subdivide-tasks-to-run-efficiently-and-regroup-with-collate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{@jobs\_limit}}} syntax
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}} syntax
+
+\item {}
+{\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{drmaa\_wrapper.run\_job()}}} syntax
+
+\item {}
+Back to \textbf{Chapter 16}: {\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-subdivide-collate]{\emph{:ref:{}`@subdivide tasks to run efficiently and regroup with @collate}}}
+
+\end{itemize}
+
+
+
+
+\subsection{\emph{@subdivide} and regroup with \emph{@collate} example}
+\label{tutorials/new_tutorial/subdivide_collate_code:subdivide-and-regroup-with-collate-example}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{random}\PYG{o}{,} \PYG{n+nn}{sys}
+
+\PYG{c}{\PYGZsh{} Create files a random number of lines}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}test\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}lines} \PYG{o}{=} \PYG{n}{random}\PYG{o}{.}\PYG{n}{randint}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{3}\PYG{p}{)} \PYG{o}{*} \PYG{l+m+mi}{2}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{ii} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{cnt\PYGZus{}lines}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{oo}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{data item = }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{ii}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ has }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ lines}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}lines}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} subdivide the input files into NNN fragment files of 2 lines each}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@subdivide}\PYG{p}{(} \PYG{n}{create\PYGZus{}test\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}.*.fragment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{subdivide\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name\PYGZus{}stem}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} cleanup any previous results}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{oo} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{oo}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Output files contain two lines each}
+ \PYG{c}{\PYGZsh{} (new output files every even line)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{cnt\PYGZus{}output\PYGZus{}files} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{ii}\PYG{p}{,} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{enumerate}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{ii} \PYG{o}{\PYGZpc{}} \PYG{l+m+mi}{2} \PYG{o}{==} \PYG{l+m+mi}{0}\PYG{p}{:}
+ \PYG{n}{cnt\PYGZus{}output\PYGZus{}files} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{output\PYGZus{}file\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{.}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.fragment}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name\PYGZus{}stem}\PYG{p}{,} \PYG{n}{cnt\PYGZus{}output\PYGZus{}files}\PYG{p}{)}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Subdivide }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Analyse each fragment independently}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{subdivide\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.fragment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.analysed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{analyse\PYGZus{}fragments}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Analysing }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{oo}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{analysed }\PYG{l+s}{\PYGZdq{}} \PYG{o}{+} \PYG{n}{line}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Group results using original names}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@collate}\PYG{p}{(} \PYG{n}{analyse\PYGZus{}fragments}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} split file name into [abc].NUMBER.analysed}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/(?P\PYGZlt{}NAME\PYGZgt{}[abc]+)}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.analysed\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}NAME[0]\PYGZcb{}.final\PYGZus{}result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{recombine\PYGZus{}analyses}\PYG{p}{(}\PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{input\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{input\PYGZus{}file\PYGZus{}names}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Recombine }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{oo}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+
+
+
+\PYG{c}{\PYGZsh{}pipeline\PYGZus{}printout(sys.stdout, verbose = 3)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+\end{Verbatim}
+
+Results in
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+
+\PYG{g+go}{ a.start has 2 lines}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.start] completed}
+\PYG{g+go}{ b.start has 6 lines}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.start] completed}
+\PYG{g+go}{ c.start has 6 lines}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} c.start] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}test\PYGZus{}files}
+
+\PYG{g+go}{ Subdivide a.start \PYGZhy{}\PYGZgt{} /home/lg/temp/a.1.fragment}
+\PYG{g+go}{ Job = [a.start \PYGZhy{}\PYGZgt{} a.*.fragment, a] completed}
+\PYG{g+go}{ Subdivide b.start \PYGZhy{}\PYGZgt{} /home/lg/temp/b.1.fragment}
+\PYG{g+go}{ Subdivide b.start \PYGZhy{}\PYGZgt{} /home/lg/temp/b.2.fragment}
+\PYG{g+go}{ Subdivide b.start \PYGZhy{}\PYGZgt{} /home/lg/temp/b.3.fragment}
+\PYG{g+go}{ Job = [b.start \PYGZhy{}\PYGZgt{} b.*.fragment, b] completed}
+\PYG{g+go}{ Subdivide c.start \PYGZhy{}\PYGZgt{} /home/lg/temp/c.1.fragment}
+\PYG{g+go}{ Subdivide c.start \PYGZhy{}\PYGZgt{} /home/lg/temp/c.2.fragment}
+\PYG{g+go}{ Subdivide c.start \PYGZhy{}\PYGZgt{} /home/lg/temp/c.3.fragment}
+\PYG{g+go}{ Job = [c.start \PYGZhy{}\PYGZgt{} c.*.fragment, c] completed}
+\PYG{g+go}{Completed Task = subdivide\PYGZus{}files}
+
+\PYG{g+go}{ Analysing /home/lg/temp/a.1.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/a.1.analysed}
+\PYG{g+go}{ Job = [a.1.fragment \PYGZhy{}\PYGZgt{} a.1.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/b.1.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/b.1.analysed}
+\PYG{g+go}{ Job = [b.1.fragment \PYGZhy{}\PYGZgt{} b.1.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/b.2.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/b.2.analysed}
+\PYG{g+go}{ Job = [b.2.fragment \PYGZhy{}\PYGZgt{} b.2.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/b.3.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/b.3.analysed}
+\PYG{g+go}{ Job = [b.3.fragment \PYGZhy{}\PYGZgt{} b.3.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/c.1.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/c.1.analysed}
+\PYG{g+go}{ Job = [c.1.fragment \PYGZhy{}\PYGZgt{} c.1.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/c.2.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/c.2.analysed}
+\PYG{g+go}{ Job = [c.2.fragment \PYGZhy{}\PYGZgt{} c.2.analysed] completed}
+\PYG{g+go}{ Analysing /home/lg/temp/c.3.fragment \PYGZhy{}\PYGZgt{} /home/lg/temp/c.3.analysed}
+\PYG{g+go}{ Job = [c.3.fragment \PYGZhy{}\PYGZgt{} c.3.analysed] completed}
+\PYG{g+go}{Completed Task = analyse\PYGZus{}fragments}
+
+\PYG{g+go}{ Recombine /home/lg/temp/a.1.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/a.final\PYGZus{}result}
+\PYG{g+go}{ Job = [[a.1.analysed] \PYGZhy{}\PYGZgt{} a.final\PYGZus{}result] completed}
+\PYG{g+go}{ Recombine /home/lg/temp/b.1.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/b.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/b.2.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/b.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/b.3.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/b.final\PYGZus{}result}
+\PYG{g+go}{ Job = [[b.1.analysed, b.2.analysed, b.3.analysed] \PYGZhy{}\PYGZgt{} b.final\PYGZus{}result] completed}
+\PYG{g+go}{ Recombine /home/lg/temp/c.1.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/c.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/c.2.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/c.final\PYGZus{}result}
+\PYG{g+go}{ Recombine /home/lg/temp/c.3.analysed \PYGZhy{}\PYGZgt{} /home/lg/temp/c.final\PYGZus{}result}
+\PYG{g+go}{ Job = [[c.1.analysed, c.2.analysed, c.3.analysed] \PYGZhy{}\PYGZgt{} c.final\PYGZus{}result] completed}
+\PYG{g+go}{Completed Task = recombine\PYGZus{}analyses}
+\end{Verbatim}
+\end{quote}
+
+
+\section{\textbf{Chapter 17}: Python Code for \emph{@combinations}, \emph{@permutations} and all versus all \emph{@product}}
+\label{tutorials/new_tutorial/combinatorics_code:new-manual-combinatorics-chapter-num-python-code-for-combinations-permutations-and-all-versus-all-product}\label{tutorials/new_tutorial/combinatorics_code::doc}\label{tutorials/new_tutorial/combinatorics_code:new-manual-combinatorics-code}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}}
+
+\item {}
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}}
+
+\item {}
+{\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}}
+
+\item {}
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}}
+
+\item {}
+Back to \textbf{Chapter 17}: {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinatorics]{\emph{Preparing directories for output with @combinatorics()}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Example code for \emph{@product}}
+\label{tutorials/new_tutorial/combinatorics_code:example-code-for-product}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Three sets of initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ab}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{p.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{q.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}pq}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{x.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{x.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{y.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{y.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}xy}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @product}
+\PYG{n+nd}{@product}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ab}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 1}
+
+ \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}pq}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 2}
+
+ \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}xy}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 3}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}.product}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{}}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extra parameter: basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{product\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{} basenames = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}file}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZsh{} basenames = a p x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a p y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a q x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a q y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b p x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b p y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b q x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b q y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}y.product}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Example code for \emph{@permutations}}
+\label{tutorials/new_tutorial/combinatorics_code:example-code-for-permutations}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @permutations}
+\PYG{n+nd}{@permutations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 2 at a time}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}.permutations}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{permutations\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+go}{A \PYGZhy{} B}
+\PYG{g+go}{A \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} A}
+\PYG{g+go}{B \PYGZhy{} C}
+\PYG{g+go}{B \PYGZhy{} D}
+\PYG{g+go}{C \PYGZhy{} A}
+\PYG{g+go}{C \PYGZhy{} B}
+\PYG{g+go}{C \PYGZhy{} D}
+\PYG{g+go}{D \PYGZhy{} A}
+\PYG{g+go}{D \PYGZhy{} B}
+\PYG{g+go}{D \PYGZhy{} C}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Example code for \emph{@combinations}}
+\label{tutorials/new_tutorial/combinatorics_code:example-code-for-combinations}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @combinations}
+\PYG{n+nd}{@combinations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 3 at a time}
+ \PYG{l+m+mi}{3}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][1]\PYGZcb{}.combinations}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combinations\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{A \PYGZhy{} B \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} B \PYGZhy{} D}
+\PYG{g+go}{A \PYGZhy{} C \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} C \PYGZhy{} D}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Example code for \emph{@combinations\_with\_replacement}}
+\label{tutorials/new_tutorial/combinatorics_code:example-code-for-combinations-with-replacement}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @combinations\PYGZus{}with\PYGZus{}replacement}
+\PYG{n+nd}{@combinations\PYGZus{}with\PYGZus{}replacement}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 2 at a time}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}.combinations\PYGZus{}with\PYGZus{}replacement}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combinations\PYGZus{}with\PYGZus{}replacement\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{A \PYGZhy{} A}
+\PYG{g+go}{A \PYGZhy{} B}
+\PYG{g+go}{A \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} B}
+\PYG{g+go}{B \PYGZhy{} C}
+\PYG{g+go}{B \PYGZhy{} D}
+\PYG{g+go}{C \PYGZhy{} C}
+\PYG{g+go}{C \PYGZhy{} D}
+\PYG{g+go}{D \PYGZhy{} D}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\section{\textbf{Chapter 20}: Python Code for Manipulating task inputs via string substitution using \emph{inputs()} and \emph{add\_inputs()}}
+\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-code}\label{tutorials/new_tutorial/inputs_code::doc}\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-chapter-num-python-code-for-manipulating-task-inputs-via-string-substitution-using-inputs-and-add-inputs}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs()}}} syntax
+
+\item {}
+{\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs()}}} syntax
+
+\item {}
+Back to \textbf{Chapter 20}: {\hyperref[tutorials/new_tutorial/inputs:new-manual-inputs]{\emph{Manipulating task inputs via string substitution}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Example code for adding additional \emph{input} prerequisites per job with \emph{add\_inputs()}}
+\label{tutorials/new_tutorial/inputs_code:example-code-for-adding-additional-input-prerequisites-per-job-with-add-inputs}
+
+\subsubsection{1. Example: compiling c++ code}
+\label{tutorials/new_tutorial/inputs_code:example-compiling-c-code}\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-example1}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [hasty.cpp \PYGZhy{}\PYGZgt{} hasty.o] completed}
+\PYG{g+go}{ Job = [messy.cpp \PYGZhy{}\PYGZgt{} messy.o] completed}
+\PYG{g+go}{ Job = [tasty.cpp \PYGZhy{}\PYGZgt{} tasty.o] completed}
+\PYG{g+go}{Completed Task = compile}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{2. Example: Adding a common header file with \emph{add\_inputs()}}
+\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-example2}\label{tutorials/new_tutorial/inputs_code:example-adding-a-common-header-file-with-add-inputs}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} common (universal) header exists before our pipeline}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add header to the input of every job}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[hasty.cpp, universal.h] \PYGZhy{}\PYGZgt{} hasty.o] completed}
+\PYG{g+go}{ Job = [[messy.cpp, universal.h] \PYGZhy{}\PYGZgt{} messy.o] completed}
+\PYG{g+go}{ Job = [[tasty.cpp, universal.h] \PYGZhy{}\PYGZgt{} tasty.o] completed}
+\PYG{g+go}{Completed Task = compile}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{3. Example: Additional \emph{Input} can be tasks}
+\label{tutorials/new_tutorial/inputs_code:example-additional-input-can-be-tasks}\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-example3}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} common (universal) header exists before our pipeline}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} make header files}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}matching\PYGZus{}headers}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add header to the input of every job}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add result of task create\PYGZus{}matching\PYGZus{}headers to the input of every job}
+ \PYG{n}{create\PYGZus{}matching\PYGZus{}headers}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [hasty.cpp \PYGZhy{}\PYGZgt{} hasty.h] completed}
+\PYG{g+go}{ Job = [messy.cpp \PYGZhy{}\PYGZgt{} messy.h] completed}
+\PYG{g+go}{ Job = [tasty.cpp \PYGZhy{}\PYGZgt{} tasty.h] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}matching\PYGZus{}headers}
+\PYG{g+go}{ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] \PYGZhy{}\PYGZgt{} hasty.o] completed}
+\PYG{g+go}{ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] \PYGZhy{}\PYGZgt{} messy.o] completed}
+\PYG{g+go}{ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] \PYGZhy{}\PYGZgt{} tasty.o] completed}
+\PYG{g+go}{Completed Task = compile}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{4. Example: Add corresponding files using \emph{add\_inputs()} with \emph{formatter} or \emph{regex}}
+\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-example4}\label{tutorials/new_tutorial/inputs_code:example-add-corresponding-files-using-add-inputs-with-formatter-or-regex}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{n}{header\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files} \PYG{o}{+} \PYG{n}{header\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} common (universal) header exists before our pipeline}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{source\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} corresponding header for each source file}
+ \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} add header to the input of every job}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}filename}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [[hasty.cpp, hasty.h, universal.h] \PYGZhy{}\PYGZgt{} hasty.o] completed}
+\PYG{g+go}{ Job = [[messy.cpp, messy.h, universal.h] \PYGZhy{}\PYGZgt{} messy.o] completed}
+\PYG{g+go}{ Job = [[tasty.cpp, tasty.h, universal.h] \PYGZhy{}\PYGZgt{} tasty.o] completed}
+\PYG{g+go}{Completed Task = compile}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Example code for replacing all input parameters with \emph{inputs()}}
+\label{tutorials/new_tutorial/inputs_code:example-code-for-replacing-all-input-parameters-with-inputs}
+
+\subsubsection{5. Example: Running matching python scripts using \emph{inputs()}}
+\label{tutorials/new_tutorial/inputs_code:example-running-matching-python-scripts-using-inputs}\label{tutorials/new_tutorial/inputs_code:new-manual-inputs-example5}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} source files exist before our pipeline}
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.cpp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{n}{python\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hasty.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tasty.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{messy.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{k}{for} \PYG{n}{source\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{source\PYGZus{}files} \PYG{o}{+} \PYG{n}{python\PYGZus{}files}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{source\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} common (universal) header exists before our pipeline}
+\PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{source\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.cpp\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} corresponding python file for each source file}
+ \PYG{n}{inputs}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{run\PYGZus{}corresponding\PYGZus{}python}\PYG{p}{(}\PYG{n}{input\PYGZus{}filenames}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+Giving:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [hasty.py \PYGZhy{}\PYGZgt{} hasty.results] completed}
+\PYG{g+go}{ Job = [messy.py \PYGZhy{}\PYGZgt{} messy.results] completed}
+\PYG{g+go}{ Job = [tasty.py \PYGZhy{}\PYGZgt{} tasty.results] completed}
+\PYG{g+go}{Completed Task = run\PYGZus{}corresponding\PYGZus{}python}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\section{\textbf{Chapter 21}: Esoteric: Python Code for Generating parameters on the fly with \emph{@files}}
+\label{tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-code}\label{tutorials/new_tutorial/onthefly_code::doc}\label{tutorials/new_tutorial/onthefly_code:new-manual-on-the-fly-chapter-num-esoteric-python-code-for-generating-parameters-on-the-fly-with-files}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[decorators/files_ex:decorators-files-on-the-fly]{\emph{@files on-the-fly syntax in detail}}}
+
+\item {}
+Back to \textbf{Chapter 21}: {\hyperref[tutorials/new_tutorial/onthefly:new-manual-on-the-fly]{\emph{Generating parameters on the fly}}}
+
+\end{itemize}
+
+
+
+
+\subsection{Introduction}
+\label{tutorials/new_tutorial/onthefly_code:introduction}\begin{quote}
+
+\begin{DUlineblock}{0em}
+\item[] This script takes N pairs of input file pairs (with the suffices .gene and .gwas)
+\item[] and runs them against M sets of simulation data (with the suffix .simulation)
+\item[] A summary per input file pair is then produced
+\end{DUlineblock}
+
+In pseudo-code:
+\begin{quote}
+
+STEP\_1:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+for n\_file in NNN\_pairs\_of\_input\_files:
+ for m\_file in MMM\_simulation\_data:
+
+ [n\_file.gene,
+ n\_file.gwas,
+ m\_file.simulation] -\textgreater{} n\_file.m\_file.simulation\_res
+\end{Verbatim}
+
+STEP\_2:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+for n\_file in NNN\_pairs\_of\_input\_files:
+
+ n\_file.*.simulation\_res -\textgreater{} n\_file.mean
+\end{Verbatim}
+
+\begin{DUlineblock}{0em}
+\item[] n = CNT\_GENE\_GWAS\_FILES
+\item[] m = CNT\_SIMULATION\_FILES
+\end{DUlineblock}
+\end{quote}
+\end{quote}
+
+
+\subsection{Code}
+\label{tutorials/new_tutorial/onthefly_code:code}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{os}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} constants}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{n}{working\PYGZus{}dir} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{temp\PYGZus{}NxM}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{simulation\PYGZus{}data\PYGZus{}dir} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simulation}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{gene\PYGZus{}data\PYGZus{}dir} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{gene}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{CNT\PYGZus{}GENE\PYGZus{}GWAS\PYGZus{}FILES} \PYG{o}{=} \PYG{l+m+mi}{2}
+\PYG{n}{CNT\PYGZus{}SIMULATION\PYGZus{}FILES} \PYG{o}{=} \PYG{l+m+mi}{3}
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} imports}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{k+kn}{import} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{sys}
+\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{izip}
+\PYG{k+kn}{import} \PYG{n+nn}{glob}
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Functions}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} get gene gwas file pairs}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{k}{def} \PYG{n+nf}{get\PYGZus{}gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(} \PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Helper function to get all *.gene, *.gwas from the direction specified}
+\PYG{l+s+sd}{ in \PYGZhy{}\PYGZhy{}gene\PYGZus{}data\PYGZus{}dir}
+
+\PYG{l+s+sd}{ Returns}
+\PYG{l+s+sd}{ file pairs with both .gene and .gwas extensions,}
+\PYG{l+s+sd}{ corresponding roots (no extension) of each file}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{gene\PYGZus{}files} \PYG{o}{=} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.gene}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{gwas\PYGZus{}files} \PYG{o}{=} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.gwas}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{common\PYGZus{}roots} \PYG{o}{=} \PYG{n+nb}{set}\PYG{p}{(}\PYG{n+nb}{map}\PYG{p}{(}\PYG{k}{lambda} \PYG{n}{x}\PYG{p}{:} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{splitext}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,} \PYG{n}{gene\PYGZus{}files}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{common\PYGZus{}roots} \PYG{o}{\PYGZam{}}\PYG{o}{=}\PYG{n+nb}{set}\PYG{p}{(}\PYG{n+nb}{map}\PYG{p}{(}\PYG{k}{lambda} \PYG{n}{x}\PYG{p}{:} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{splitext}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,} \PYG{n}{gwas\PYGZus{}files}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{common\PYGZus{}roots} \PYG{o}{=} \PYG{n+nb}{list}\PYG{p}{(}\PYG{n}{common\PYGZus{}roots}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{p} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{p}{;} \PYG{n}{g\PYGZus{}dir} \PYG{o}{=} \PYG{n}{gene\PYGZus{}data\PYGZus{}dir}
+ \PYG{n}{file\PYGZus{}pairs} \PYG{o}{=} \PYG{p}{[}\PYG{p}{[}\PYG{n}{p}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{g\PYGZus{}dir}\PYG{p}{,} \PYG{n}{x} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.gene}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{p}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{g\PYGZus{}dir}\PYG{p}{,} \PYG{n}{x} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.gwas}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{]} \PYG{k}{for} \PYG{n}{x} \PYG{o+ow}{in} \PYG{n}{common\PYGZus{}roots}\PYG{p}{]}
+ \PYG{k}{return} \PYG{n}{file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{common\PYGZus{}roots}
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} get simulation files}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{k}{def} \PYG{n+nf}{get\PYGZus{}simulation\PYGZus{}files}\PYG{p}{(} \PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Helper function to get all *.simulation from the direction specified}
+\PYG{l+s+sd}{ in \PYGZhy{}\PYGZhy{}simulation\PYGZus{}data\PYGZus{}dir}
+\PYG{l+s+sd}{ Returns}
+\PYG{l+s+sd}{ file with .simulation extensions,}
+\PYG{l+s+sd}{ corresponding roots (no extension) of each file}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{simulation\PYGZus{}files} \PYG{o}{=} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.simulation}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{simulation\PYGZus{}roots} \PYG{o}{=}\PYG{n+nb}{map}\PYG{p}{(}\PYG{k}{lambda} \PYG{n}{x}\PYG{p}{:} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{splitext}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,} \PYG{n}{simulation\PYGZus{}files}\PYG{p}{)}
+ \PYG{k}{return} \PYG{n}{simulation\PYGZus{}files}\PYG{p}{,} \PYG{n}{simulation\PYGZus{}roots}
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Main logic}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+
+
+
+
+
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} setup\PYGZus{}simulation\PYGZus{}data}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} mkdir: makes sure output directories exist before task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{mkdir}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{n}{simulation\PYGZus{}data\PYGZus{}dir}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{setup\PYGZus{}simulation\PYGZus{}data} \PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ create simulation files}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{CNT\PYGZus{}GENE\PYGZus{}GWAS\PYGZus{}FILES}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}03d}\PYG{l+s}{.gene}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{i}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}03d}\PYG{l+s}{.gwas}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{i}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} gene files without corresponding gwas and vice versa}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{orphan1.gene}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{orphan2.gwas}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{orphan3.gwas}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{CNT\PYGZus{}SIMULATION\PYGZus{}FILES}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}03d}\PYG{l+s}{.simulation}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{i}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} cleanup\PYGZus{}simulation\PYGZus{}data}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{k}{def} \PYG{n+nf}{try\PYGZus{}rmdir} \PYG{p}{(}\PYG{n}{d}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{d}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{try}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{rmdir}\PYG{p}{(}\PYG{n}{d}\PYG{p}{)}
+ \PYG{k}{except} \PYG{n+ne}{OSError}\PYG{p}{:}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Warning:}\PYG{l+s+se}{\PYGZbs{}t}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ is not empty and will not be removed.}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{d}\PYG{p}{)}
+
+
+
+\PYG{k}{def} \PYG{n+nf}{cleanup\PYGZus{}simulation\PYGZus{}data} \PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ cleanup files}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Cleanup working directory and simulation files.}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} cleanup gene and gwas files}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{f} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.gene}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{f}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{f} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.gwas}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{f}\PYG{p}{)}
+ \PYG{n}{try\PYGZus{}rmdir}\PYG{p}{(}\PYG{n}{gene\PYGZus{}data\PYGZus{}dir}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} cleanup simulation}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{f} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}data\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.simulation}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{f}\PYG{p}{)}
+ \PYG{n}{try\PYGZus{}rmdir}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}data\PYGZus{}dir}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} cleanup working\PYGZus{}dir}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{f} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simulation\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.simulation\PYGZus{}res}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{f}\PYG{p}{)}
+ \PYG{n}{try\PYGZus{}rmdir}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simulation\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{f} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.mean}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{f}\PYG{p}{)}
+ \PYG{n}{try\PYGZus{}rmdir}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Step 1:}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} for n\PYGZus{}file in NNN\PYGZus{}pairs\PYGZus{}of\PYGZus{}input\PYGZus{}files:}
+\PYG{c}{\PYGZsh{} for m\PYGZus{}file in MMM\PYGZus{}simulation\PYGZus{}data:}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} [n\PYGZus{}file.gene,}
+\PYG{c}{\PYGZsh{} n\PYGZus{}file.gwas,}
+\PYG{c}{\PYGZsh{} m\PYGZus{}file.simulation] \PYGZhy{}\PYGZgt{} working\PYGZus{}dir/n\PYGZus{}file.m\PYGZus{}file.simulation\PYGZus{}res}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{k}{def} \PYG{n+nf}{generate\PYGZus{}simulation\PYGZus{}params} \PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Custom function to generate}
+\PYG{l+s+sd}{ file names for gene/gwas simulation study}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{simulation\PYGZus{}files}\PYG{p}{,} \PYG{n}{simulation\PYGZus{}file\PYGZus{}roots} \PYG{o}{=} \PYG{n}{get\PYGZus{}simulation\PYGZus{}files}\PYG{p}{(}\PYG{p}{)}
+ \PYG{n}{gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}roots} \PYG{o}{=} \PYG{n}{get\PYGZus{}gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{sim\PYGZus{}file}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file\PYGZus{}root} \PYG{o+ow}{in} \PYG{n}{izip}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}files}\PYG{p}{,} \PYG{n}{simulation\PYGZus{}file\PYGZus{}roots}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{p}{(}\PYG{n}{gene}\PYG{p}{,} \PYG{n}{gwas}\PYG{p}{)}\PYG{p}{,} \PYG{n}{gene\PYGZus{}file\PYGZus{}root} \PYG{o+ow}{in} \PYG{n}{izip}\PYG{p}{(}\PYG{n}{gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{n}{gene\PYGZus{}gwas\PYGZus{}file\PYGZus{}roots}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{result\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{.}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{.simulation\PYGZus{}res}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{gene\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file\PYGZus{}root}\PYG{p}{)}
+ \PYG{n}{result\PYGZus{}file\PYGZus{}path} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simulation\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{yield} \PYG{p}{[}\PYG{n}{gene}\PYG{p}{,} \PYG{n}{gwas}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file}\PYG{p}{]}\PYG{p}{,} \PYG{n}{result\PYGZus{}file\PYGZus{}path}\PYG{p}{,} \PYG{n}{gene\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}
+
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} mkdir: makes sure output directories exist before task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{mkdir}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{simulation\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{generate\PYGZus{}simulation\PYGZus{}params}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{gwas\PYGZus{}simulation}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{result\PYGZus{}file\PYGZus{}path}\PYG{p}{,} \PYG{n}{gene\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Dummy calculation of gene gwas vs simulation data}
+\PYG{l+s+sd}{ Normally runs in parallel on a computational cluster}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{p}{(}\PYG{n}{gene\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{gwas\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{simulation\PYGZus{}data\PYGZus{}file}\PYG{p}{)} \PYG{o}{=} \PYG{n}{input\PYGZus{}files}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{simulation\PYGZus{}res\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{result\PYGZus{}file\PYGZus{}path}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{simulation\PYGZus{}res\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ + }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{gene\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{sim\PYGZus{}file\PYGZus{}root}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Step 2:}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Statistical summary per gene/gwas file pair}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} for n\PYGZus{}file in NNN\PYGZus{}pairs\PYGZus{}of\PYGZus{}input\PYGZus{}files:}
+\PYG{c}{\PYGZsh{} working\PYGZus{}dir/simulation\PYGZus{}results/n.*.simulation\PYGZus{}res}
+\PYG{c}{\PYGZsh{} \PYGZhy{}\PYGZgt{} working\PYGZus{}dir/n.mean}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+
+
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{gwas\PYGZus{}simulation}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{simulation\PYGZus{}results/(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+.simulation\PYGZus{}res}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.mean}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{OK}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{statistical\PYGZus{}summary} \PYG{p}{(}\PYG{n}{result\PYGZus{}files}\PYG{p}{,} \PYG{n}{summary\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Simulate statistical summary}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{summary\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{summary\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{f} \PYG{o+ow}{in} \PYG{n}{result\PYGZus{}files}\PYG{p}{:}
+ \PYG{n}{summary\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{f}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{setup\PYGZus{}simulation\PYGZus{}data}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{statistical\PYGZus{}summary}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} uncomment to printout flowchar}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} pipeline\PYGZus{}printout(sys.stdout, [statistical\PYGZus{}summary], verbose=2)}
+\PYG{c}{\PYGZsh{} graph\PYGZus{}printout (\PYGZdq{}flowchart.jpg\PYGZdq{}, \PYGZdq{}jpg\PYGZdq{}, [statistical\PYGZus{}summary])}
+\PYG{c}{\PYGZsh{}}
+
+\PYG{n}{cleanup\PYGZus{}simulation\PYGZus{}data} \PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Resulting Output}
+\label{tutorials/new_tutorial/onthefly_code:resulting-output}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{setup\PYGZus{}simulation\PYGZus{}data}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{)}
+\PYG{g+go}{ Make directories [temp\PYGZus{}NxM/gene, temp\PYGZus{}NxM/simulation] completed}
+\PYG{g+go}{Completed Task = setup\PYGZus{}simulation\PYGZus{}data\PYGZus{}mkdir\PYGZus{}1}
+\PYG{g+go}{ Job completed}
+\PYG{g+go}{Completed Task = setup\PYGZus{}simulation\PYGZus{}data}
+
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{statistical\PYGZus{}summary}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{)}
+\PYG{g+go}{ Make directories [temp\PYGZus{}NxM, temp\PYGZus{}NxM/simulation\PYGZus{}results] completed}
+\PYG{g+go}{Completed Task = gwas\PYGZus{}simulation\PYGZus{}mkdir\PYGZus{}1}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/gene/001.gene, temp\PYGZus{}NxM/gene/001.gwas, temp\PYGZus{}NxM/simulation/000.simulation] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/simulation\PYGZus{}results/001.000.simulation\PYGZus{}res, 001, 000, 001.000.simulation\PYGZus{}res] completed}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/gene/000.gene, temp\PYGZus{}NxM/gene/000.gwas, temp\PYGZus{}NxM/simulation/000.simulation] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/simulation\PYGZus{}results/000.000.simulation\PYGZus{}res, 000, 000, 000.000.simulation\PYGZus{}res] completed}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/gene/001.gene, temp\PYGZus{}NxM/gene/001.gwas, temp\PYGZus{}NxM/simulation/001.simulation] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/simulation\PYGZus{}results/001.001.simulation\PYGZus{}res, 001, 001, 001.001.simulation\PYGZus{}res] completed}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/gene/000.gene, temp\PYGZus{}NxM/gene/000.gwas, temp\PYGZus{}NxM/simulation/001.simulation] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/simulation\PYGZus{}results/000.001.simulation\PYGZus{}res, 000, 001, 000.001.simulation\PYGZus{}res] completed}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/gene/000.gene, temp\PYGZus{}NxM/gene/000.gwas, temp\PYGZus{}NxM/simulation/002.simulation] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/simulation\PYGZus{}results/000.002.simulation\PYGZus{}res, 000, 002, 000.002.simulation\PYGZus{}res] completed}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/gene/001.gene, temp\PYGZus{}NxM/gene/001.gwas, temp\PYGZus{}NxM/simulation/002.simulation] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/simulation\PYGZus{}results/001.002.simulation\PYGZus{}res, 001, 002, 001.002.simulation\PYGZus{}res] completed}
+\PYG{g+go}{Completed Task = gwas\PYGZus{}simulation}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/simulation\PYGZus{}results/000.000.simulation\PYGZus{}res, temp\PYGZus{}NxM/simulation\PYGZus{}results/000.001.simulation\PYGZus{}res, temp\PYGZus{}NxM/simulation\PYGZus{}results/000.002.simulation\PYGZus{}res] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/000.mean] completed}
+\PYG{g+go}{ Job = [[temp\PYGZus{}NxM/simulation\PYGZus{}results/001.000.simulation\PYGZus{}res, temp\PYGZus{}NxM/simulation\PYGZus{}results/001.001.simulation\PYGZus{}res, temp\PYGZus{}NxM/simulation\PYGZus{}results/001.002.simulation\PYGZus{}res] \PYGZhy{}\PYGZgt{} temp\PYGZus{}NxM/001.mean] completed}
+\end{Verbatim}
+\end{quote}
+
+\index{flowchart colours!Tutorial}\index{Tutorial!flowchart colours}
+
+\section{\textbf{Appendix 1}: Python code for Flow Chart Colours with \emph{pipeline\_printout\_graph(...)}}
+\label{tutorials/new_tutorial/flowchart_colours_code:index-0}\label{tutorials/new_tutorial/flowchart_colours_code:new-manual-flowchart-colours-code}\label{tutorials/new_tutorial/flowchart_colours_code:new-manual-flowchart-colours-chapter-num-python-code-for-flow-chart-colours-with-pipeline-printout-graph}\label{tutorials/new_tutorial/flowchart_colours_code::doc}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/manual_contents:new-manual-table-of-contents]{\emph{Manual Table of Contents}}}
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph(...)}}}
+
+\item {}
+\code{Download code}
+
+\item {}
+Back to {\hyperref[tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours]{\emph{Flowchart colours}}}
+
+\end{itemize}
+
+This example shows how flowchart colours can be customised.
+
+
+
+
+\subsection{Code}
+\label{tutorials/new_tutorial/flowchart_colours_code:code}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}!/usr/bin/env python}
+\PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+
+\PYG{l+s+sd}{ play\PYGZus{}with\PYGZus{}colours.py}
+\PYG{l+s+sd}{ [\PYGZhy{}\PYGZhy{}log\PYGZus{}file PATH]}
+\PYG{l+s+sd}{ [\PYGZhy{}\PYGZhy{}verbose]}
+
+\PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+
+\PYG{c}{\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} play\PYGZus{}with\PYGZus{}colours.py}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Copyright (c) 7/13/2010 Leo Goodstadt}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Permission is hereby granted, free of charge, to any person obtaining a copy}
+\PYG{c}{\PYGZsh{} of this software and associated documentation files (the \PYGZdq{}Software\PYGZdq{}), to deal}
+\PYG{c}{\PYGZsh{} in the Software without restriction, including without limitation the rights}
+\PYG{c}{\PYGZsh{} to use, copy, modify, merge, publish, distribute, sublicense, and/or sell}
+\PYG{c}{\PYGZsh{} copies of the Software, and to permit persons to whom the Software is}
+\PYG{c}{\PYGZsh{} furnished to do so, subject to the following conditions:}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} The above copyright notice and this permission notice shall be included in}
+\PYG{c}{\PYGZsh{} all copies or substantial portions of the Software.}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} THE SOFTWARE IS PROVIDED \PYGZdq{}AS IS\PYGZdq{}, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR}
+\PYG{c}{\PYGZsh{} IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,}
+\PYG{c}{\PYGZsh{} FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE}
+\PYG{c}{\PYGZsh{} AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER}
+\PYG{c}{\PYGZsh{} LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,}
+\PYG{c}{\PYGZsh{} OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN}
+\PYG{c}{\PYGZsh{} THE SOFTWARE.}
+\PYG{c}{\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh [...]
+
+\PYG{k+kn}{import} \PYG{n+nn}{sys}\PYG{o}{,} \PYG{n+nn}{os}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} options}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+
+\PYG{k+kn}{from} \PYG{n+nn}{optparse} \PYG{k+kn}{import} \PYG{n}{OptionParser}
+\PYG{k+kn}{import} \PYG{n+nn}{StringIO}
+
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{OptionParser}\PYG{p}{(}\PYG{n}{version}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZpc{}}\PYG{l+s}{play\PYGZus{}with\PYGZus{}colours 1.0}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{usage} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{ play\PYGZus{}with\PYGZus{}colours }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}flowchart FILE [options] }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[\PYGZhy{}\PYGZhy{}colour\PYGZus{}scheme\PYGZus{}index INT ] }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[\PYGZhy{}\PYGZhy{}key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph]}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}flowchart}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FILE}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{string}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Don}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{t actually run any commands; just print the pipeline }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{as a flowchart.}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}colour\PYGZus{}scheme\PYGZus{}index}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{colour\PYGZus{}scheme\PYGZus{}index}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{INTEGER}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{int}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Index of colour scheme for flow chart.}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{action}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{store\PYGZus{}true}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{default}\PYG{o}{=}\PYG{n+nb+bp}{False}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Print out legend and key for dependency graph.}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{p}{(}\PYG{n}{options}\PYG{p}{,} \PYG{n}{remaining\PYGZus{}args}\PYG{p}{)} \PYG{o}{=} \PYG{n}{parser}\PYG{o}{.}\PYG{n}{parse\PYGZus{}args}\PYG{p}{(}\PYG{p}{)}
+\PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Missing mandatory parameter: \PYGZhy{}\PYGZhy{}flowchart.}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} imports}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.ruffus\PYGZus{}exceptions} \PYG{k+kn}{import} \PYG{n}{JobSignalledBreak}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Pipeline}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} up to date tasks}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate} \PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{p}{(}\PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task1}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate} \PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{p}{(}\PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task1}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task2}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate} \PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{p}{(}\PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task2}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task3}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate} \PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{p}{(}\PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task3}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}final\PYGZus{}target}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Explicitly specified}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate} \PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{p}{(}\PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task1}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Explicitly\PYGZus{}specified\PYGZus{}task}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Tasks to run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Explicitly\PYGZus{}specified\PYGZus{}task}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Task\PYGZus{}to\PYGZus{}run1}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Task\PYGZus{}to\PYGZus{}run1}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Task\PYGZus{}to\PYGZus{}run2}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Task\PYGZus{}to\PYGZus{}run2}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Task\PYGZus{}to\PYGZus{}run3}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate} \PYG{p}{(}\PYG{k}{lambda} \PYG{p}{:} \PYG{p}{(}\PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Task\PYGZus{}to\PYGZus{}run2}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task\PYGZus{}forced\PYGZus{}to\PYGZus{}rerun}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Final target}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task\PYGZus{}forced\PYGZus{}to\PYGZus{}rerun}\PYG{p}{,} \PYG{n}{Task\PYGZus{}to\PYGZus{}run3}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Final\PYGZus{}target}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Ignored downstream}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Final\PYGZus{}target}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Downstream\PYGZus{}task1\PYGZus{}ignored}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{Final\PYGZus{}target}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Downstream\PYGZus{}task2\PYGZus{}ignored}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+
+
+
+
+
+
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Main logic}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{k+kn}{from} \PYG{n+nn}{collections} \PYG{k+kn}{import} \PYG{n}{defaultdict}
+\PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme} \PYG{o}{=} \PYG{n}{defaultdict}\PYG{p}{(}\PYG{n+nb}{dict}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Base chart on this overall colour scheme index}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{colour\PYGZus{}scheme\PYGZus{}index}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{colour\PYGZus{}scheme\PYGZus{}index}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Overriding colours}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{if} \PYG{n}{options}\PYG{o}{.}\PYG{n}{colour\PYGZus{}scheme\PYGZus{}index} \PYG{o}{==} \PYG{n+nb+bp}{None}\PYG{p}{:}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Vicious cycle}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{linecolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF3232}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Pipeline}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF3232}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Key}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Key}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}F6F4F4}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{linecolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{linecolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{gray}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}EFA03B}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Vicious cycle}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF3232}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Vicious cycle}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{white}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Vicious cycle}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{white}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Vicious cycle}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}B8CC6E}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}006000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}006000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Down stream}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{white}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Down stream}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{gray}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Down stream}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{gray}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Down stream}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Explicitly specified task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{transparent}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Explicitly specified task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Explicitly specified task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Explicitly specified task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}EBF3FF}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task forced to rerun}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{transparent}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task forced to rerun}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task forced to rerun}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date task forced to rerun}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}EFA03B}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}006000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{color}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}006000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Up\PYGZhy{}to\PYGZhy{}date Final target}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{0}
+
+\PYG{k}{if} \PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZus{}\PYGZus{}main\PYGZus{}\PYGZus{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(}
+
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} use flowchart file name extension to decide flowchart format}
+ \PYG{c}{\PYGZsh{} e.g. svg, jpg etc.}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{splitext}\PYG{p}{(}\PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{:}\PYG{p}{]}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} final targets}
+ \PYG{p}{[}\PYG{n}{Final\PYGZus{}target}\PYG{p}{,} \PYG{n}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}final\PYGZus{}target}\PYG{p}{]}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Explicitly specified tasks}
+ \PYG{p}{[}\PYG{n}{Explicitly\PYGZus{}specified\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Do we want key legend}
+ \PYG{n}{no\PYGZus{}key\PYGZus{}legend} \PYG{o}{=} \PYG{o+ow}{not} \PYG{n}{options}\PYG{o}{.}\PYG{n}{key\PYGZus{}legend\PYGZus{}in\PYGZus{}graph}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Print all the task types whether used or not}
+ \PYG{n}{minimal\PYGZus{}key\PYGZus{}legend} \PYG{o}{=} \PYG{n+nb+bp}{False}\PYG{p}{,}
+
+ \PYG{n}{user\PYGZus{}colour\PYGZus{}scheme} \PYG{o}{=} \PYG{n}{custom\PYGZus{}flow\PYGZus{}chart\PYGZus{}colour\PYGZus{}scheme}\PYG{p}{,}
+ \PYG{n}{pipeline\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Colour schemes}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\chapter{Overview:}
+\label{contents:overview}
+
+\section{Cheat Sheet}
+\label{cheatsheet:cheat-sheet}\label{cheatsheet::doc}\label{cheatsheet:id1}
+The \code{ruffus} module is a lightweight way to add support
+for running computational pipelines.
+
+\begin{DUlineblock}{0em}
+\item[] Each stage or \textbf{task} in a computational pipeline is represented by a python function
+\item[] Each python function can be called in parallel to run multiple \textbf{jobs}.
+\end{DUlineblock}
+
+
+\subsection{1. Annotate functions with \textbf{Ruffus} decorators}
+\label{cheatsheet:annotate-functions-with-ruffus-decorators}
+
+\subsubsection{Core}
+\label{cheatsheet:core}
+\begin{tabular}{|p{0.317\linewidth}|p{0.317\linewidth}|p{0.317\linewidth}|}
+\hline
+\textbf{\relax
+Decorator
+} & \textbf{\relax
+Syntax
+} & \textbf{\relax }\\\hline
+
+ at originate ({\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{Manual}}})
+ &
+{\hyperref[decorators/originate:decorators-originate]{\emph{@originate}}} ( \code{output\_files}, {[}\code{extra\_parameters},...{]} )
+ & \\\hline
+
+ at split ({\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{Manual}}})
+ &
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}} ( \code{tasks\_or\_file\_names}, \code{output\_files}, {[}\code{extra\_parameters},...{]} )
+ & \\\hline
+
+ at transform ({\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{Manual}}})
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-suffix-string]{\emph{suffix}}}\emph{(}\code{suffix\_string}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )
+\item[] {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )
+\end{DUlineblock}
+ & \\\hline
+
+ at merge ({\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{Manual}}})
+ &
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} (\code{tasks\_or\_file\_names}, \code{output}, {[}\code{extra\_parameters},...{]} )
+ & \\\hline
+
+ at posttask ({\hyperref[tutorials/new_tutorial/posttask:new-manual-posttask]{\emph{Manual}}})
+ &
+\begin{DUlineblock}{0em}
+\item[] {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} ( \code{signal\_task\_completion\_function} )
+\item[] {\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} ({\hyperref[decorators/indicator_objects:decorators-touch-file]{\emph{touch\_file}}}( \code{'task1.completed'} ))
+\end{DUlineblock}
+ & \\\hline
+\end{tabular}
+
+
+
+\subsubsection{See \emph{Decorators} for a complete list of decorators}
+\label{cheatsheet:see-decorators-for-a-complete-list-of-decorators}
+
+\subsection{2. Print dependency graph if necessary}
+\label{cheatsheet:print-dependency-graph-if-necessary}\begin{itemize}
+\item {}
+For a graphical flowchart in \code{jpg}, \code{svg}, \code{dot}, \code{png}, \code{ps}, \code{gif} formats:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart.svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{itemize}
+\begin{itemize}
+\item {}
+For a text printout of all jobs
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+\end{itemize}
+
+
+\subsection{3. Run the pipeline}
+\label{cheatsheet:run-the-pipeline}
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{multiprocess} \PYG{o}{=} \PYG{n}{N\PYGZus{}PARALLEL\PYGZus{}JOBS}\PYG{p}{)}
+\end{Verbatim}
+\phantomsection\label{pipeline_functions:pipeline-functions}
+See {\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run}\phantomsection\label{pipeline_functions:pipeline-run}\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout}\phantomsection\label{pipeline_functions:pipeline-printout}\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph}\phantomsection\label{pipeline_functions:pipeline-printout-graph}\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-get-task-n [...]
+
+\section{Pipeline functions}
+\label{pipeline_functions:ppg-history-file}\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-history-file}\label{pipeline_functions::doc}\label{pipeline_functions:id1}\begin{quote}
+
+There are only four functions for \textbf{Ruffus} pipelines:
+\begin{itemize}
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}} executes a pipeline
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout}}} prints a list of tasks and jobs which will be run in a pipeline
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph}}} prints a schematic flowchart of pipeline tasks in various graphical formats
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-get-task-names]{\emph{pipeline\_get\_task\_names}}} returns a list of all task names in the pipeline
+
+\end{itemize}
+\end{quote}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run}
+\index{pipeline functions!pipeline\_run}\index{pipeline\_run!Run pipeline}\index{Run pipeline!pipeline\_run}
+
+\subsection{\emph{pipeline\_run}}
+\label{pipeline_functions:id2}\label{pipeline_functions:index-0}
+\textbf{pipeline\_run} ( {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-target-tasks]{\emph{target\_tasks}}} = {[}{]}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-forcedtorun-tasks]{\emph{forcedtorun\_tasks}}} = {[}{]}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-multiprocess]{\emph{multiprocess}}} = 1, {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-logger]{\emph{logger}}} = stderr\_logger, {\hyperref[pipeline_functions:p [...]
+\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Runs all specified pipelined functions if they or any antecedent tasks are
+incomplete or out-of-date.
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run task2 whatever its state, and also task1 and antecedents if they are incomplete}
+\PYG{c}{\PYGZsh{} Do not log pipeline progress messages to stderr}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{task2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{forcedtorun\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{task2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{logger} \PYG{o}{=} \PYG{n}{blackhole\PYGZus{}logger}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-target-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{target\_tasks}}] \leavevmode
+Pipeline functions and any necessary antecedents (specified implicitly or with {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}})
+which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-forcedtorun-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{forcedtorun\_tasks}}] \leavevmode
+Optional. These pipeline functions will be invoked regardless of their state.
+Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-multiprocess}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{multiprocess}}] \leavevmode
+Optional. The number of processes which should be dedicated to running in parallel independent
+tasks and jobs within each task. If \code{multiprocess} is set to 1, the pipeline will
+execute in the main process.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-multithread}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{multithread}}] \leavevmode
+Optional. The number of threads which should be dedicated to running in parallel independent
+tasks and jobs within each task. Should be used only with drmaa. Otherwise the CPython \href{https://wiki.python.org/moin/GlobalInterpreterLock}{global interpreter lock (GIL)}
+will slow down your pipeline
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-logger}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{logger}}] \leavevmode
+For logging messages indicating the progress of the pipeline in terms of tasks and jobs.
+Defaults to outputting to sys.stderr.
+Setting \code{logger=blackhole\_logger} will prevent any logging output.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-gnu-make}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{gnu\_make\_maximal\_rebuild\_mode}}] \leavevmode
+\begin{notice}{warning}{Warning:}
+This is a dangerous option. Use rarely and with caution
+\end{notice}
+
+Optional parameter governing how \textbf{Ruffus} determines which part of the pipeline is
+out of date and needs to be re-run. If set to \code{False}, \textbf{ruffus} will work back
+from the \code{target\_tasks} and only execute the pipeline after the first up-to-date
+tasks that it encounters. For example, if there are four tasks:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} task1 \PYGZhy{}\PYGZgt{} task2 \PYGZhy{}\PYGZgt{} task3 \PYGZhy{}\PYGZgt{} task4 \PYGZhy{}\PYGZgt{} task5}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{target\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{task5}\PYG{p}{]}
+\end{Verbatim}
+\end{quote}
+
+If \code{task3()} is up-to-date, then only \code{task4()} and \code{task5()} will be run.
+This will be the case even if \code{task2()} and \code{task1()} are incomplete.
+
+This allows you to remove all intermediate results produced by \code{task1 -\textgreater{} task3}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-verbose}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{verbose}}] \leavevmode
+Optional parameter indicating the verbosity of the messages sent to \code{logger}:
+(Defaults to level 1 if unspecified)
+\begin{itemize}
+\item {}
+level \textbf{0} : \emph{nothing}
+
+\item {}
+level \textbf{1} : \emph{Out-of-date Task names}
+
+\item {}
+level \textbf{2} : \emph{All Tasks (including any task function docstrings)}
+
+\item {}
+level \textbf{3} : \emph{Out-of-date Jobs in Out-of-date Tasks, no explanation}
+
+\item {}
+level \textbf{4} : \emph{Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings}
+
+\item {}
+level \textbf{5} : \emph{All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)}
+
+\item {}
+level \textbf{6} : \emph{All jobs in All Tasks whether out of date or not}
+
+\item {}
+level \textbf{10}: \emph{logs messages useful only for debugging ruffus pipeline code}
+
+\end{itemize}
+
+\code{verbose \textgreater{}= 10} are intended for debugging \textbf{Ruffus} by the developers and the details
+are liable to change from release to release
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-runtime-data}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{runtime\_data}}] \leavevmode
+Experimental feature for passing data to tasks at run time
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-one-second-per-job}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{one\_second\_per\_job}}] \leavevmode
+To work around poor file timepstamp resolution for some file systems.
+Defaults to True if checksum\_level is 0 forcing Tasks to take a minimum of 1 second to complete.
+If your file system has coarse grained time stamps, you can turn on this delay
+by setting \emph{one\_second\_per\_job} to \code{True}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-touch-files-only}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{touch\_files\_only}}] \leavevmode
+Create or update output files only to simulate the running of the pipeline.
+Does not invoke real task functions to run jobs. This is most useful to force a
+pipeline to acknowledge that a particular part is now up-to-date.
+
+This will not work properly if the identities of some files are not known before hand,
+and depend on run time. In other words, not recommended if \code{@split} or custom parameter generators are being used.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-exceptions-terminate-immediately}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{exceptions\_terminate\_immediately}}] \leavevmode
+Exceptions cause immediate termination of the pipeline.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-log-exceptions}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{log\_exceptions}}] \leavevmode
+Print exceptions to the logger as soon as they occur.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-history-file}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{history\_file}}] \leavevmode
+The database file which stores checksums and file timestamps for input/output files.
+Defaults to \code{.ruffus\_history.sqlite} if unspecified
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-checksum-level}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{checksum\_level}}] \leavevmode
+Several options for checking up-to-dateness are available: Default is level 1.
+\begin{itemize}
+\item {}
+level 0 : Use only file timestamps
+
+\item {}
+level 1 : above, plus timestamp of successful job completion
+
+\item {}
+level 2 : above, plus a checksum of the pipeline function body
+
+\item {}
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\end{itemize}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-run-verbose-abbreviated-path}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{verbose\_abbreviated\_path}}] \leavevmode
+Whether input and output paths are abbreviated. Defaults to 2 if unspecified
+\begin{itemize}
+\item {}
+level 0: The full (expanded, abspath) input or output path
+
+\item {}
+level \textgreater{} 1: The number of subdirectories to include. Abbreviated paths are prefixed with \code{{[},,,{]}/}
+
+\item {}
+level \textless{} 0: Input / Output parameters are truncated to \code{MMM} letters where \code{verbose\_abbreviated\_path ==-MMM}. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by \code{\textless{}???\textgreater{}}
+
+\end{itemize}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout}
+\index{pipeline functions!pipeline\_run}\index{pipeline\_printout!Printout simulated run of the pipeline}\index{Printout simulated run of the pipeline!pipeline\_printout}
+
+\subsection{\emph{pipeline\_printout}}
+\label{pipeline_functions:index-1}\label{pipeline_functions:id3}
+\textbf{pipeline\_printout} ({\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-output-stream]{\emph{output\_stream}}} = sys.stdout, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-target-tasks]{\emph{target\_tasks}}} = {[}{]}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-forcedtorun-tasks]{\emph{forcedtorun\_tasks}}} = {[}{]}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-verbose]{\emph{verbose}}} = 1, {\hyper [...]
+\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Prints out all the pipelined functions which will be invoked given specified \code{target\_tasks}
+without actually running the pipeline. Because this is a simulation, some of the job
+parameters may be incorrect. For example, the results of a {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{@split}}}
+operation is not predetermined and will only be known after the pipelined function
+splits up the original data. Parameters of all downstream pipelined functions will
+be changed depending on this initial operation.
+\end{quote}
+\begin{description}
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Simulate running task2 whatever its state, and also task1 and antecedents}
+\PYG{c}{\PYGZsh{} if they are incomplete}
+\PYG{c}{\PYGZsh{} Print out results to STDOUT}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{task2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{forcedtorun\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{task2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{1}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-output-stream}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_stream}}] \leavevmode
+Where to printout the results of simulating the running of the pipeline.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-target-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{target\_tasks}}] \leavevmode
+As in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}: Pipeline functions and any necessary antecedents (specified implicitly or with {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}})
+which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-forcedtorun-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{forcedtorun\_tasks}}] \leavevmode
+As in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}:These pipeline functions will be invoked regardless of their state.
+Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-verbose}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{verbose}}] \leavevmode
+Optional parameter indicating the verbosity of the messages sent to \code{logger}:
+(Defaults to level 4 if unspecified)
+\begin{itemize}
+\item {}
+level \textbf{0} : \emph{nothing}
+
+\item {}
+level \textbf{1} : \emph{Out-of-date Task names}
+
+\item {}
+level \textbf{2} : \emph{All Tasks (including any task function docstrings)}
+
+\item {}
+level \textbf{3} : \emph{Out-of-date Jobs in Out-of-date Tasks, no explanation}
+
+\item {}
+level \textbf{4} : \emph{Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings}
+
+\item {}
+level \textbf{5} : \emph{All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)}
+
+\item {}
+level \textbf{6} : \emph{All jobs in All Tasks whether out of date or not}
+
+\item {}
+level \textbf{10}: \emph{logs messages useful only for debugging ruffus pipeline code}
+
+\end{itemize}
+
+\code{verbose \textgreater{}= 10} are intended for debugging \textbf{Ruffus} by the developers and the details
+are liable to change from release to release
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-indent}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{indent}}] \leavevmode
+Optional parameter governing the indentation when printing out the component job
+parameters of each task function.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-gnu-make}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{gnu\_make\_maximal\_rebuild\_mode}}] \leavevmode
+\begin{notice}{warning}{Warning:}
+This is a dangerous option. Use rarely and with caution
+\end{notice}
+
+See explanation in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-gnu-make]{\emph{pipeline\_run}}}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-wrap-width}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{wrap\_width}}] \leavevmode
+Optional parameter governing the length of each line before it starts wrapping
+around.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-runtime-data}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{runtime\_data}}] \leavevmode
+Experimental feature for passing data to tasks at run time
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-history-file}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{history\_file}}] \leavevmode
+The database file which stores checksums and file timestamps for input/output files.
+Defaults to \code{.ruffus\_history.sqlite} if unspecified
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-checksum-level}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{checksum\_level}}] \leavevmode
+Several options for checking up-to-dateness are available: Default is level 1.
+\begin{itemize}
+\item {}
+level 0 : Use only file timestamps
+
+\item {}
+level 1 : above, plus timestamp of successful job completion
+
+\item {}
+level 2 : above, plus a checksum of the pipeline function body
+
+\item {}
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\end{itemize}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-verbose-abbreviated-path}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{verbose\_abbreviated\_path}}] \leavevmode
+Whether input and output paths are abbreviated. Defaults to 2 if unspecified
+\begin{itemize}
+\item {}
+level 0: The full (expanded, abspath) input or output path
+
+\item {}
+level \textgreater{} 1: The number of subdirectories to include. Abbreviated paths are prefixed with \code{{[},,,{]}/}
+
+\item {}
+level \textless{} 0: Input / Output parameters are truncated to \code{MMM} letters where \code{verbose\_abbreviated\_path ==-MMM}. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by \code{\textless{}???\textgreater{}}
+
+\end{itemize}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph}
+\index{pipeline functions!pipeline\_printout\_graph}\index{pipeline\_printout\_graph!print flowchart representation of pipeline functions}\index{print flowchart representation of pipeline functions!pipeline\_printout\_graph}
+
+\subsection{\emph{pipeline\_printout\_graph}}
+\label{pipeline_functions:id4}\label{pipeline_functions:index-2}
+\textbf{pipeline\_printout\_graph} ({\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph-stream]{\emph{stream}}}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph-output-format]{\emph{output\_format}}} = None, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph-target-tasks]{\emph{target\_tasks}}} = {[}{]}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph-forcedtorun-tasks]{\emph{forcedtorun\_tasks}} [...]
+\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Prints out flowchart of all the pipelined functions which will be invoked given specified \code{target\_tasks}
+without actually running the pipeline.
+
+See {\hyperref[tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours]{\emph{Flowchart colours}}}
+\end{quote}
+\begin{description}
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart.jpg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{jpg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{task16}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{forcedtorun\PYGZus{}tasks} \PYG{o}{=} \PYG{p}{[}\PYG{n}{task2}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{no\PYGZus{}key\PYGZus{}legend} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Customising appearance:}
+\begin{quote}
+
+The {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph-user-colour-scheme]{\emph{user\_colour\_scheme}}} parameter can be used to change
+flowchart colours. This allows the default {\hyperref[tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours]{\emph{Colour Schemes}}}
+to be set. An example of customising flowchart appearance is available {\hyperref[tutorials/new_tutorial/flowchart_colours_code:new-manual-flowchart-colours-code]{\emph{(see code)}}} .
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-stream}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{stream}}] \leavevmode
+The file or file-like object to which the flowchart should be printed.
+If a string is provided, it is assumed that this is the name of the output file
+which will be opened automatically.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-output-format}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_format}}] \leavevmode
+If missing, defaults to the extension of the \emph{stream} file name (i.e. \code{jpg} for \code{a.jpg})
+
+\begin{DUlineblock}{0em}
+\item[] If the programme \code{dot} can be found on the executio path, this
+can be any number of \href{http://www.graphviz.org/doc/info/output.html}{formats}
+supported by \href{http://www.graphviz.org/}{Graphviz}, including, for example,
+\code{jpg}, \code{png}, \code{pdf}, \code{svg} etc.
+\item[] Otherwise, \textbf{ruffus} will only output without error in the \href{http://en.wikipedia.org/wiki/DOT\_language}{dot} format, which
+is a plain-text graph description language.
+\end{DUlineblock}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-target-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{target\_tasks}}] \leavevmode
+As in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}: Pipeline functions and any necessary antecedents (specified implicitly or with {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}})
+which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-forcedtorun-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{forcedtorun\_tasks}}] \leavevmode
+As in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}:These pipeline functions will be invoked regardless of their state.
+Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-draw-vertically}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{draw\_vertically}}] \leavevmode
+Draw flowchart in vertical orientation
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-ignore-upstream-of-target}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{ignore\_upstream\_of\_target}}] \leavevmode
+Start drawing flowchart from specified target tasks. Do not draw tasks which are
+downstream (subsequent) to the targets.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-skip-uptodate-tasks}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{ignore\_upstream\_of\_target}}] \leavevmode
+Do not draw up-to-date / completed tasks in the flowchart unless they are
+lie on the execution path of the pipeline.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-gnu-make}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{gnu\_make\_maximal\_rebuild\_mode}}] \leavevmode
+\begin{notice}{warning}{Warning:}
+This is a dangerous option. Use rarely and with caution
+\end{notice}
+
+See explanation in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-gnu-make]{\emph{pipeline\_run}}}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-test-all-task-for-update}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{test\_all\_task\_for\_update}}] \leavevmode
+\begin{DUlineblock}{0em}
+\item[] Indicates whether intermediate tasks are out of date or not. Normally \textbf{Ruffus} will
+stop checking dependent tasks for completion or whether they are out-of-date once it has
+discovered the maximal extent of the pipeline which has to be run.
+\item[] For displaying the flow of the pipeline, this is hardly very informative.
+\end{DUlineblock}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-no-key-legend}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{no\_key\_legend}}] \leavevmode
+Do not include key legend explaining the colour scheme of the flowchart.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-minimal-key-legend}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{minimal\_key\_legend}}] \leavevmode
+Do not include unused task types in key legend.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-user-colour-scheme}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{user\_colour\_scheme}}] \leavevmode
+Dictionary specifying colour scheme for flowchart
+
+See complete {\hyperref[tutorials/new_tutorial/flowchart_colours:new-manual-flowchart-colours]{\emph{list of Colour Schemes}}}.
+
+\begin{DUlineblock}{0em}
+\item[] Colours can be names e.g. \code{"black"} or quoted hex e.g. \code{'"\#F6F4F4"'} (note extra quotes)
+\item[] Default values will be used unless specified
+\end{DUlineblock}
+
+\begin{tabular}{|p{0.317\linewidth}|p{0.317\linewidth}|p{0.317\linewidth}|}
+\hline
+\textbf{\relax
+key
+} & \textbf{\relax
+Subkey
+} & \textbf{\relax }\\\hline
+\begin{itemize}
+\item {}
+\code{'colour\_scheme\_index'}
+
+\end{itemize}
+ &
+\begin{DUlineblock}{0em}
+\item[] index of default colour scheme,
+\item[] 0-7, defaults to 0 unless specified
+\end{DUlineblock}
+ & \\\hline
+\begin{itemize}
+\item {}
+\code{'Final target'}
+
+\item {}
+\code{'Explicitly specified task'}
+
+\item {}
+\code{'Task to run'}
+
+\item {}
+\code{'Down stream'}
+
+\item {}
+\code{'Up-to-date Final target'}
+
+\item {}
+\code{'Up-to-date task forced to rerun'}
+
+\item {}
+\code{'Up-to-date task'}
+
+\item {}
+\code{'Vicious cycle'}
+
+\end{itemize}
+ & \begin{itemize}
+\item {}
+\code{'fillcolor'}
+
+\item {}
+\code{'fontcolor'}
+
+\item {}
+\code{'color'}
+
+\item {}
+\code{'dashed'} = \code{0/1}
+
+\end{itemize}
+ &
+Colours / attributes for each task type
+\\\hline
+\begin{itemize}
+\item {}
+\code{'Vicious cycle'}
+
+\item {}
+\code{'Task to run'}
+
+\item {}
+\code{'Up-to-date'}
+
+\end{itemize}
+ & \begin{itemize}
+\item {}
+\code{'linecolor'}
+
+\end{itemize}
+ &
+Colours for arrows between tasks
+\\\hline
+\begin{itemize}
+\item {}
+\code{'Pipeline'}
+
+\end{itemize}
+ & \begin{itemize}
+\item {}
+\code{'fontcolor'}
+
+\end{itemize}
+ &
+Flowchart title colour
+\\\hline
+\begin{itemize}
+\item {}
+\code{'Key'}
+
+\end{itemize}
+ & \begin{itemize}
+\item {}
+\code{'fontcolor'}
+
+\item {}
+\code{'fillcolor'}
+
+\end{itemize}
+ &
+Legend colours
+\\\hline
+\end{tabular}
+
+
+Example:
+\begin{quote}
+
+Use colour scheme index = 1
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart.svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{svg}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{n}{final\PYGZus{}task}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{user\PYGZus{}colour\PYGZus{}scheme} \PYG{o}{=} \PYG{p}{\PYGZob{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{colour\PYGZus{}scheme\PYGZus{}index}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{l+m+mi}{1}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Pipeline}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF3232}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Key}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Red}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}F6F4F4}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Task to run}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{linecolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}0044A0}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final target}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:}\PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fillcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}EFA03B}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{black}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}} \PYG{p}{:} \PYG{l+m+mi}{0} \PYG{p}{\PYGZcb{}}
+ \PYG{p}{\PYGZcb{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-pipeline-name}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{pipeline\_name}}] \leavevmode
+Specify title for flowchart
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-size}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{size}}] \leavevmode
+Size in inches for flowchart
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-dpi}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{dpi}}] \leavevmode
+Resolution in dots per inch. Ignored for svg output
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-runtime-data}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{runtime\_data}}] \leavevmode
+Experimental feature for passing data to tasks at run time
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-history-file}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{history\_file}}] \leavevmode
+The database file which stores checksums and file timestamps for input/output files.
+Defaults to \code{.ruffus\_history.sqlite} if unspecified
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-printout-graph-checksum-level}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{checksum\_level}}] \leavevmode
+Several options for checking up-to-dateness are available: Default is level 1.
+\begin{itemize}
+\item {}
+level 0 : Use only file timestamps
+
+\item {}
+level 1 : above, plus timestamp of successful job completion
+
+\item {}
+level 2 : above, plus a checksum of the pipeline function body
+
+\item {}
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\end{itemize}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{pipeline_functions:pipeline-functions-pipeline-get-task-names}
+\index{pipeline functions!pipeline\_get\_task\_names}\index{pipeline\_get\_task\_names!print list of task names without running the pipeline}\index{print list of task names without running the pipeline!pipeline\_get\_task\_names}
+
+\subsection{\emph{pipeline\_get\_task\_names}}
+\label{pipeline_functions:id5}\label{pipeline_functions:index-3}
+\textbf{pipeline\_get\_task\_names} ()
+\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Returns a list of all task names in the pipeline without running the pipeline or checking to see if the tasks are connected correctly
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+Given:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}data}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}data}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.task1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task1}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.task1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.task2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Produces a list of three task names:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}get\PYGZus{}task\PYGZus{}names} \PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{[\PYGZsq{}create\PYGZus{}data\PYGZsq{}, \PYGZsq{}task1\PYGZsq{}, \PYGZsq{}task2\PYGZsq{}]}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-functions}\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job}\phantomsection\label{drmaa_wrapper_functions:run-job}\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-cmd-str}\phantomsection\label{drmaa_wrapper_functions:dw-cmd-str}\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-script-directory}\phantomsection\label{drmaa_wrapper_functions:dw-job-script-directory}\phantomsection\ [...]
+
+\section{drmaa functions}
+\label{drmaa_wrapper_functions:dw-touch-only}\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-touch-only}\label{drmaa_wrapper_functions::doc}\label{drmaa_wrapper_functions:id1}\begin{quote}
+
+\code{drmaa\_wrapper} is not exported automatically by ruffus and must be specified explicitly:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} imported ruffus.drmaa\PYGZus{}wrapper explicitly}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.drmaa\PYGZus{}wrapper} \PYG{k+kn}{import} \PYG{n}{run\PYGZus{}job}\PYG{p}{,} \PYG{n}{error\PYGZus{}drmaa\PYGZus{}job}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job}
+\index{drmaa !run\_job}\index{run\_job!Run drmaa}\index{Run drmaa!run\_job}
+
+\subsection{\emph{run\_job}}
+\label{drmaa_wrapper_functions:id2}\label{drmaa_wrapper_functions:index-0}
+\textbf{run\_job} ({\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-cmd-str]{\emph{cmd\_str}}}, {\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-job-name]{\emph{job\_name}}} = None, {\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-job-other-options]{\emph{job\_other\_options}}} = None, {\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-job-script-directory]{\emph{job\_script\_directory}}} = None, {\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-job-e [...]
+\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+\code{ruffus.drmaa\_wrapper.run\_job} dispatches a command with arguments to a cluster or Grid Engine node and waits for the command to complete.
+
+It is the semantic equivalent of calling \href{http://docs.python.org/2/library/os.html\#os.system}{os.system} or
+\href{http://docs.python.org/2/library/subprocess.html\#subprocess.check\_call}{subprocess.check\_output}.
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.drmaa\PYGZus{}wrapper} \PYG{k+kn}{import} \PYG{n}{run\PYGZus{}job}\PYG{p}{,} \PYG{n}{error\PYGZus{}drmaa\PYGZus{}job}
+\PYG{k+kn}{import} \PYG{n+nn}{drmaa}
+\PYG{n}{my\PYGZus{}drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa}\PYG{o}{.}\PYG{n}{Session}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{my\PYGZus{}drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{initialize}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ls}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{test}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}other\PYGZus{}options}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}P mott\PYGZhy{}flint.prja \PYGZhy{}q short.qa}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}script\PYGZus{}directory} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{test\PYGZus{}dir}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}environment}\PYG{o}{=}\PYG{p}{\PYGZob{}} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{BASH\PYGZus{}ENV}\PYG{l+s}{\PYGZsq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZti{}/.bashrc}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{n}{retain\PYGZus{}job\PYGZus{}scripts} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{,} \PYG{n}{drmaa\PYGZus{}session}\PYG{o}{=}\PYG{n}{my\PYGZus{}drmaa\PYGZus{}session}\PYG{p}{)}
+\PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ls}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{test}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}other\PYGZus{}options}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}P mott\PYGZhy{}flint.prja \PYGZhy{}q short.qa}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}script\PYGZus{}directory} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{test\PYGZus{}dir}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}environment}\PYG{o}{=}\PYG{p}{\PYGZob{}} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{BASH\PYGZus{}ENV}\PYG{l+s}{\PYGZsq{}} \PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZti{}/.bashrc}\PYG{l+s}{\PYGZsq{}} \PYG{p}{\PYGZcb{}}\PYG{p}{,}
+ \PYG{n}{retain\PYGZus{}job\PYGZus{}scripts} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{,}
+ \PYG{n}{drmaa\PYGZus{}session}\PYG{o}{=}\PYG{n}{my\PYGZus{}drmaa\PYGZus{}session}\PYG{p}{,}
+ \PYG{n}{working\PYGZus{}directory} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/gpfs1/well/mott\PYGZhy{}flint/lg/src/oss/ruffus/doc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} catch exceptions}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{try}\PYG{p}{:}
+ \PYG{n}{stdout\PYGZus{}res}\PYG{p}{,} \PYG{n}{stderr\PYGZus{}res} \PYG{o}{=} \PYG{n}{run\PYGZus{}job}\PYG{p}{(}\PYG{n}{cmd}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}name} \PYG{o}{=} \PYG{n}{job\PYGZus{}name}\PYG{p}{,}
+ \PYG{n}{logger} \PYG{o}{=} \PYG{n}{logger}\PYG{p}{,}
+ \PYG{n}{drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa\PYGZus{}session}\PYG{p}{,}
+ \PYG{n}{run\PYGZus{}locally} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{local\PYGZus{}run}\PYG{p}{,}
+ \PYG{n}{job\PYGZus{}other\PYGZus{}options} \PYG{o}{=} \PYG{n}{get\PYGZus{}queue\PYGZus{}name}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} relay all the stdout, stderr, drmaa output to diagnose failures}
+\PYG{k}{except} \PYG{n}{error\PYGZus{}drmaa\PYGZus{}job} \PYG{k}{as} \PYG{n}{err}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n+nb}{map}\PYG{p}{(}\PYG{n+nb}{str}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Failed to run:}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{cmd}\PYG{p}{,}
+ \PYG{n}{err}\PYG{p}{,}
+ \PYG{n}{stdout\PYGZus{}res}\PYG{p}{,}
+ \PYG{n}{stderr\PYGZus{}res}\PYG{p}{]}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{n}{my\PYGZus{}drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{exit}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-cmd-str}\begin{itemize}
+\item {}
+\emph{cmd\_str}
+\begin{quote}
+
+The command which will be run remotely including all parameters
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-name}\begin{itemize}
+\item {}
+\emph{job\_name}
+\begin{quote}
+
+A descriptive name for the command. This will be displayed by \href{http://gridscheduler.sourceforge.net/htmlman/htmlman1/qstat.html}{SGE qstat}, for example.
+Defaults to ``ruffus\_job''
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-other-options}\begin{itemize}
+\item {}
+\emph{job\_other\_options}
+\begin{quote}
+
+Other drmaa parameters can be passed verbatim as a string.
+
+Examples for SGE include project name (\code{-P project\_name}), parallel environment (\code{-pe parallel\_environ}), account (\code{-A account\_string}), resource (\code{-l resource=expression}),
+queue name (\code{-q a\_queue\_name}), queue priority (\code{-p 15}).
+
+These are parameters which you normally need to include when submitting jobs interactively, for example via
+\href{http://gridscheduler.sourceforge.net/htmlman/htmlman1/qsub.html}{SGE qsub}
+or \href{http://apps.man.poznan.pl/trac/slurm-drmaa/wiki/WikiStart\#Nativespecification}{SLURM} (\href{https://computing.llnl.gov/linux/slurm/srun.html}{srun})
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-script-directory}\begin{itemize}
+\item {}
+\emph{job\_script\_directory}
+\begin{quote}
+
+The directory where drmaa temporary script files will be found. Defaults to the current working directory.
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-job-environment}\begin{itemize}
+\item {}
+\emph{job\_environment}
+\begin{quote}
+
+A dictionary of key / values with environment variables. E.g. \code{"\{'BASH\_ENV': '\textasciitilde{}/.bashrc'\}"}
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-working-directory}\begin{itemize}
+\item {}
+\emph{working\_directory}
+\begin{itemize}
+\item {}
+Sets the working directory.
+
+\item {}
+Should be a fully qualified path.
+
+\item {}
+Defaults to the current working directory.
+
+\end{itemize}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-retain-job-scripts}\begin{itemize}
+\item {}
+\emph{retain\_job\_scripts}
+\begin{quote}
+
+Do not delete temporary script files containg drmaa commands. Useful for
+debugging, running on the command line directly, and can provide a useful record of the commands.
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-logger}\begin{itemize}
+\item {}
+\emph{logger}
+\begin{quote}
+
+For logging messages indicating the progress of the pipeline in terms of tasks and jobs. Takes objects with the standard python
+\href{https://docs.python.org/2/library/logging.html}{logging} module interface.
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-drmaa-session}\begin{itemize}
+\item {}
+\emph{drmaa\_session}
+\begin{quote}
+
+A shared drmaa session created and managed separately.
+
+In the main part of your \textbf{Ruffus} pipeline script somewhere there should be code looking like this:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} start shared drmaa session for all jobs / tasks in pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{k+kn}{import} \PYG{n+nn}{drmaa}
+\PYG{n}{drmaa\PYGZus{}session} \PYG{o}{=} \PYG{n}{drmaa}\PYG{o}{.}\PYG{n}{Session}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{initialize}\PYG{p}{(}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} pipeline functions}
+\PYG{c}{\PYGZsh{}}
+
+\PYG{k}{if} \PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZus{}\PYGZus{}main\PYGZus{}\PYGZus{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{run} \PYG{p}{(}\PYG{n}{options}\PYG{p}{,} \PYG{n}{multithread} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{jobs}\PYG{p}{)}
+ \PYG{n}{drmaa\PYGZus{}session}\PYG{o}{.}\PYG{n}{exit}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-run-locally}\begin{itemize}
+\item {}
+\emph{run\_locally}
+\begin{quote}
+
+Runs commands locally using the standard python \href{https://docs.python.org/2/library/subprocess.html}{subprocess} module
+rather than dispatching remotely. This allows scripts to be debugged easily
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-touch-only}\begin{itemize}
+\item {}
+\emph{touch\_only}
+\begin{quote}
+
+Create or update {\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-output-files]{\emph{Output files}}}
+only to simulate the running of the pipeline.
+Does not dispatch commands remotely or locally. This is most useful to force a
+pipeline to acknowledge that a particular part is now up-to-date.
+
+See also: {\hyperref[pipeline_functions:pipeline-functions-pipeline-run-touch-files-only]{\emph{pipeline\_run(touch\_files\_only=True)}}}
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{drmaa_wrapper_functions:drmaa-wrapper-run-job-output-files}\begin{itemize}
+\item {}
+\emph{output\_files}
+\begin{quote}
+
+Output files which will be created or updated if {\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job-touch-only]{\emph{touch\_only}}} \code{=True}
+\end{quote}
+
+\end{itemize}
+
+
+\section{Installation}
+\label{installation:installation}\label{installation::doc}\label{installation:id1}
+\code{Ruffus} is a lightweight python module for building computational pipelines.
+
+
+\subsection{The easy way}
+\label{installation:the-easy-way}\begin{quote}
+
+\emph{Ruffus} is available as an
+\href{http://peak.telecommunity.com/DevCenter/EasyInstall}{easy-install} -able package
+on the \href{http://pypi.python.org/pypi/Sphinx}{Python Package Index}.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+sudo pip install ruffus --upgrade
+\end{Verbatim}
+
+This may also work for older installations
+\begin{enumerate}
+\item {}
+Install setuptools:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+wget peak.telecommunity.com/dist/ez\_setup.py
+sudo python ez\_setup.py
+\end{Verbatim}
+
+\item {}
+Install \emph{Ruffus} automatically:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+easy\_install -U ruffus
+\end{Verbatim}
+
+\end{enumerate}
+\end{quote}
+
+
+\subsection{The most up-to-date code:}
+\label{installation:the-most-up-to-date-code}\begin{itemize}
+\item {}
+\href{https://pypi.python.org/pypi/ruffus}{Download the latest sources} or
+
+\item {}
+Check out the latest code from Google using git:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+git clone https://bunbun68@code.google.com/p/ruffus/ .
+\end{Verbatim}
+
+\item {}
+Bleeding edge Ruffus development takes place on github:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+git clone git at github.com:bunbun/ruffus.git .
+\end{Verbatim}
+
+\item {}
+To install after downloading, change to the , type:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+python ./setup.py install
+\end{Verbatim}
+
+\end{itemize}
+
+
+\subsubsection{Graphical flowcharts}
+\label{installation:graphical-flowcharts}\begin{quote}
+
+\textbf{Ruffus} relies on the \code{dot} programme from \href{http://www.graphviz.org/}{Graphviz}
+(``Graph visualisation'') to make pretty flowchart representations of your pipelines in multiple
+graphical formats (e.g. \code{png}, \code{jpg}). The crossplatform Graphviz package can be
+\href{http://www.graphviz.org/Download.php}{downloaded here} for Windows,
+Linux, Macs and Solaris. Some Linux
+distributions may include prebuilt packages.
+\begin{description}
+\item[{For Fedora, try}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+yum list 'graphviz*'
+\end{Verbatim}
+
+\item[{For ubuntu / Debian, try}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+sudo apt-get install graphviz
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\index{Design!Ruffus}\index{Ruffus!Design}
+
+\section{Design \& Architecture}
+\label{design:index-0}\label{design::doc}\label{design:design-architecture}\begin{quote}
+
+The \emph{ruffus} module has the following design goals:
+\begin{itemize}
+\item {}
+Simplicity.
+
+\item {}
+Intuitive
+
+\item {}
+Lightweight
+
+\item {}
+Unintrusive
+
+\item {}
+Flexible/Powerful
+
+\end{itemize}
+
+Computational pipelines, especially in science, are best thought of in terms of data
+flowing through successive, dependent stages (\textbf{ruffus} calls these {\hyperref[glossary:term-task]{\emph{task}}}s).
+Traditionally, files have been used to
+link pipelined stages together. This means that computational pipelines can be managed
+using traditional software construction (\emph{build}) systems.
+\end{quote}
+
+
+\subsection{\emph{GNU Make}}
+\label{design:gnu-make}\begin{quote}
+
+The grand-daddy of these is UNIX \href{http://en.wikipedia.org/wiki/Make\_(software)}{make}.
+\href{http://www.gnu.org/software/make/}{GNU make} is ubiquitous in the linux world for
+installing and compiling software.
+It has been widely used to build computational pipelines because it supports:
+\begin{itemize}
+\item {}
+Stopping and restarting computational processes
+
+\item {}
+Running multiple, even thousands of jobs in parallel
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{Deficiencies of \emph{make} / \emph{gmake}}
+\label{design:design-make-syntax-ugly}\label{design:deficiencies-of-make-gmake}\begin{quote}
+
+However, make and \href{http://www.gnu.org/software/make/}{GNU make} use a specialised (domain-specific)
+language, which has is been much criticised because of poor support for modern
+programming languages features, such as variable scope, pattern matching, debugging.
+Make scripts require large amounts of often obscure shell scripting
+and makefiles can quickly become unmaintainable.
+\end{quote}
+
+
+\subsection{\emph{Scons}, \emph{Rake} and other \emph{Make} alternatives}
+\label{design:design-scons-and-rake}\label{design:scons-rake-and-other-make-alternatives}\begin{quote}
+
+Many attempts have been made to produce a more modern version of make, with less of its
+historical baggage. These include the Java-based \href{http://ant.apache.org/}{Apache ant} which is specified in xml.
+
+More interesting are a new breed of build systems whose scripts are written in modern programming
+languages, rather than a specially-invented ``build'' specificiation syntax.
+These include the Python \href{http://www.scons.org/}{scons}, Ruby \href{http://rake.rubyforge.org/}{rake} and
+its python port \href{http://packages.python.org/Smithy/}{Smithy}.
+
+The great advantages are that computation pipelines do not need to be artificially parcelled out
+between (the often second-class) workflow management code, and the logic which does the real computation
+in the pipeline. It also means that workflow management can use all the standard language and library
+features, for example, to read in directories, match file names using regular expressions and so on.
+
+\textbf{Ruffus} is much like scons in that the modern dynamic programming language python is used seamlessly
+throughout its pipeline scripts.
+\end{quote}
+
+
+\subsubsection{Implicit dependencies: disadvantages of \emph{make} / \emph{scons} / \emph{rake}}
+\label{design:design-implicit-dependencies}\label{design:implicit-dependencies-disadvantages-of-make-scons-rake}\begin{quote}
+
+Although Python \href{http://www.scons.org/}{scons} and Ruby \href{http://rake.rubyforge.org/}{rake}
+are in many ways more powerful and easier to use for building software, they are still an
+imperfect fit to the world of computational pipelines.
+
+This is a result of the way dependencies are specified, an essential part of their design inherited
+from \href{http://www.gnu.org/software/make/}{GNU make}.
+
+The order of operations in all of these tools is specified in a \emph{declarative} rather than
+\emph{imperative} manner. This means that the sequence of steps that a build should take are
+not spelled out explicity and directly. Instead recipes are provided for turning input files
+of each type to another.
+
+So, for example, knowing that \code{a-\textgreater{}b}, \code{b-\textgreater{}c}, \code{c-\textgreater{}d}, the build
+system can infer how to get from \code{a} to \code{d} by performing the necessary operations in the correct order.
+\begin{description}
+\item[{This is immensely powerful for three reasons:}] \leavevmode\begin{enumerate}
+\item {}
+The plumbing, such as dependency checking, passing output
+from one stage to another, are handled automatically by the build system. (This is the whole point!)
+
+\item {}
+The same \emph{recipe} can be re-used at different points in the build.
+
+\item {}
+\begin{DUlineblock}{0em}
+\item[] Intermediate files do not need to be retained.
+\item[] Given the automatic inference that \code{a-\textgreater{}b-\textgreater{}c-\textgreater{}d},
+we don't need to keep \code{b} and \code{c} files around once \code{d} has been produced.
+\item[]
+\end{DUlineblock}
+
+\end{enumerate}
+
+\end{description}
+
+The disadvantage is that because stages are specified only indirectly, in terms of
+file name matches, the flow through a complex build or a pipeline can be difficult to trace, and nigh
+impossible to debug when there are problems.
+\end{quote}
+
+
+\subsubsection{Explicit dependencies in \emph{Ruffus}}
+\label{design:explicit-dependencies-in-ruffus}\label{design:design-explicit-dependencies-in-ruffus}\begin{quote}
+
+\textbf{Ruffus} takes a different approach. The order of operations is specified explicitly rather than inferred
+indirectly from the input and output types. So, for example, we would explicitly specify three successive and
+linked operations \code{a-\textgreater{}b}, \code{b-\textgreater{}c}, \code{c-\textgreater{}d}. The build system knows that the operations always proceed in
+this order.
+
+Looking at a \textbf{Ruffus} script, it is always clear immediately what is the succession of computational steps
+which will be taken.
+
+\textbf{Ruffus} values clarity over syntactic cleverness.
+\end{quote}
+
+
+\subsubsection{Static dependencies: What \emph{make} / \emph{scons} / \emph{rake} can't do (easily)}
+\label{design:static-dependencies-what-make-scons-rake-can-t-do-easily}\label{design:design-static-dependencies}\begin{quote}
+
+\href{http://www.gnu.org/software/make/}{GNU make}, \href{http://www.scons.org/}{scons} and \href{http://rake.rubyforge.org/}{rake}
+work by infer a static dependency (diacyclic) graph between all the files which
+are used by a computational pipeline. These tools locate the target that they are supposed
+to build and work backward through the dependency graph from that target,
+rebuilding anything that is out of date.This is perfect for building software,
+where the list of files data files can be computed \textbf{statically} at the beginning of the build.
+
+This is not ideal matches for scientific computational pipelines because:
+\begin{itemize}
+\item {}
+\begin{DUlineblock}{0em}
+\item[] Though the \emph{stages} of a pipeline (i.e. \emph{compile} or \emph{DNA alignment}) are
+invariably well-specified in advance, the number of
+operations (\emph{job}s) involved at each stage may not be.
+\item[]
+\end{DUlineblock}
+
+\item {}
+\begin{DUlineblock}{0em}
+\item[] A common approach is to break up large data sets into manageable chunks which
+can be operated on in parallel in computational clusters or farms
+(See \href{http://en.wikipedia.org/wiki/Embarrassingly\_parallel}{embarassingly parallel problems}).
+\item[] This means that the number of parallel operations or jobs varies with the data (the number of manageable chunks),
+and dependency trees cannot be calculated statically beforehand.
+\item[]
+\end{DUlineblock}
+
+\end{itemize}
+
+Computational pipelines require \textbf{dynamic} dependencies which are not calculated up-front, but
+at each stage of the pipeline
+
+This is a \emph{known} issue with traditional build systems each of which has partial strategies to work around
+this problem:
+\begin{itemize}
+\item {}
+gmake always builds the dependencies when first invoked, so dynamic dependencies require (complex!) recursive calls to gmake
+
+\item {}
+\href{http://objectmix.com/ruby/759716-rake-dependencies-unknown-prior-running-tasks-2.html}{Rake dependencies unknown prior to running tasks}.
+
+\item {}
+\href{http://www.scons.org/wiki/DynamicSourceGenerator}{Scons: Using a Source Generator to Add Targets Dynamically}
+
+\end{itemize}
+
+\textbf{Ruffus} explicitly and straightforwardly handles tasks which produce an indeterminate (i.e. runtime dependent)
+number of output, using its \textbf{@split}, \textbf{@transform}, \textbf{merge} function annotations.
+\end{quote}
+
+
+\subsection{Managing pipelines stage-by-stage using \textbf{Ruffus}}
+\label{design:managing-pipelines-stage-by-stage-using-ruffus}\begin{quote}
+
+\textbf{Ruffus} manages pipeline stages directly.
+\begin{enumerate}
+\item {}
+\begin{DUlineblock}{0em}
+\item[] The computational operations for each stage of the pipeline are written by you, in
+separate python functions.
+\item[] (These correspond to \href{http://www.gnu.org/software/make/manual/make.html\#Pattern-Rules}{gmake pattern rules})
+\item[]
+\end{DUlineblock}
+
+\item {}
+\begin{DUlineblock}{0em}
+\item[] The dependencies between pipeline stages (python functions) are specified up-front.
+\item[] These can be displayed as a flow chart.
+\end{DUlineblock}
+
+\includegraphics{front_page_flowchart.png}
+
+\item {}
+\textbf{Ruffus} makes sure pipeline stage functions are called in the right order,
+with the right parameters, running in parallel using multiprocessing if necessary.
+
+\item {}
+Data file timestamps can be used to automatically determine if all or any parts
+of the pipeline are out-of-date and need to be rerun.
+
+\item {}
+Separate pipeline stages, and operations within each pipeline stage,
+can be run in parallel provided they are not inter-dependent.
+
+\end{enumerate}
+
+Another way of looking at this is that \textbf{ruffus} re-constructs datafile dependencies dynamically
+on-the-fly when it gets to each stage of the pipeline, giving much more flexibility.
+\end{quote}
+
+
+\subsubsection{Disadvantages of the Ruffus design}
+\label{design:disadvantages-of-the-ruffus-design}\begin{quote}
+
+Are there any disadvantages to this trade-off for additional clarity?
+\begin{enumerate}
+\item {}
+Each pipeline stage needs to take the right input and output. For example if we specified the
+steps in the wrong order: \code{a-\textgreater{}b}, \code{c-\textgreater{}d}, \code{b-\textgreater{}c}, then no useful output would be produced.
+
+\item {}
+We cannot re-use the same recipes in different parts of the pipeline
+
+\item {}
+Intermediate files need to be retained.
+
+\end{enumerate}
+
+In our experience, it is always obvious when pipeline operations are in the wrong order, precisely because the
+order of computation is the very essense of the design of each pipeline. Ruffus produces extra diagnostics when
+no output is created in a pipeline stage (usually happens for incorrectly specified regular expressions.)
+
+Re-use of recipes is as simple as an extra call to common function code.
+
+Finally, some users have proposed future enhancements to \textbf{Ruffus} to handle unnecessary temporary / intermediate files.
+\end{quote}
+
+\index{Design!Comparison of Ruffus with alternatives}\index{Comparison of Ruffus with alternatives!Design}
+
+\subsection{Alternatives to \textbf{Ruffus}}
+\label{design:index-1}\label{design:alternatives-to-ruffus}\begin{quote}
+
+A comparison of more make-like tools is available from \href{http://biowiki.org/MakeComparison}{Ian Holmes' group}.
+
+Build systems include:
+\begin{itemize}
+\item {}
+\href{http://www.gnu.org/software/make/}{GNU make}
+
+\item {}
+\href{http://www.scons.org/}{scons}
+
+\item {}
+\href{http://ant.apache.org/}{ant}
+
+\item {}
+\href{http://rake.rubyforge.org/}{rake}
+
+\end{itemize}
+
+There are also complete workload managements systems such as Condor.
+Various bioinformatics pipelines are also available, including that used by the
+leading genome annotation website Ensembl, Pegasys, GPIPE, Taverna, Wildfire, MOWserv,
+Triana, Cyrille2 etc. These all are either hardwired to specific databases, and tasks,
+or have steep learning curves for both the scientist/developer and the IT system
+administrators.
+
+\textbf{Ruffus} is designed to be lightweight and unintrusive enough to use for writing pipelines
+with just 10 lines of code.
+\end{quote}
+
+
+\strong{See also:}
+
+
+\textbf{Bioinformatics workload managements systems}
+\begin{quote}
+\begin{description}
+\item[{Condor:}] \leavevmode
+\href{http://www.cs.wisc.edu/condor/description.html}{http://www.cs.wisc.edu/condor/description.html}
+
+\item[{Ensembl Analysis pipeline:}] \leavevmode
+\href{http://www.ncbi.nlm.nih.gov/pubmed/15123589}{http://www.ncbi.nlm.nih.gov/pubmed/15123589}
+
+\item[{Pegasys:}] \leavevmode
+\href{http://www.ncbi.nlm.nih.gov/pubmed/15096276}{http://www.ncbi.nlm.nih.gov/pubmed/15096276}
+
+\item[{GPIPE:}] \leavevmode
+\href{http://www.biomedcentral.com/pubmed/15096276}{http://www.biomedcentral.com/pubmed/15096276}
+
+\item[{Taverna:}] \leavevmode
+\href{http://www.ncbi.nlm.nih.gov/pubmed/15201187}{http://www.ncbi.nlm.nih.gov/pubmed/15201187}
+
+\item[{Wildfire:}] \leavevmode
+\href{http://www.biomedcentral.com/pubmed/15788106}{http://www.biomedcentral.com/pubmed/15788106}
+
+\item[{MOWserv:}] \leavevmode
+\href{http://www.biomedcentral.com/pubmed/16257987}{http://www.biomedcentral.com/pubmed/16257987}
+
+\item[{Triana:}] \leavevmode
+\href{http://dx.doi.org/10.1007/s10723-005-9007-3}{http://dx.doi.org/10.1007/s10723-005-9007-3}
+
+\item[{Cyrille2:}] \leavevmode
+\href{http://www.biomedcentral.com/1471-2105/9/96}{http://www.biomedcentral.com/1471-2105/9/96}
+
+\end{description}
+\end{quote}
+
+
+
+\index{Acknowledgements}
+
+\subsubsection{Acknowledgements}
+\label{design:acknowledgements}\label{design:index-2}\begin{itemize}
+\item {}
+Bruce Eckel's insightful article on
+\href{http://www.artima.com/weblogs/viewpost.jsp?thread=241209}{A Decorator Based Build System}
+was the obvious inspiration for the use of decorators in \emph{Ruffus}.
+\begin{description}
+\item[{The rest of the \emph{Ruffus} takes uses a different approach. In particular:}] \leavevmode\begin{enumerate}
+\item {}
+\emph{Ruffus} uses task-based not file-based dependencies
+
+\item {}
+\emph{Ruffus} tries to have minimal impact on the functions it decorates.
+
+Bruce Eckel's design wraps functions in ``rule'' objects.
+
+\emph{Ruffus} tasks are added as attributes of the functions which can be still be
+called normally. This is how \emph{Ruffus} decorators can be layered in any order
+onto the same task.
+
+\end{enumerate}
+
+\end{description}
+
+\item {}
+Languages like c++ and Java would probably use a ``mixin'' approach.
+Python's easy support for reflection and function references,
+as well as the necessity of marshalling over process boundaries, dictated the
+internal architecture of \emph{Ruffus}.
+
+\item {}
+The \href{http://www.boost.org}{Boost Graph library} for text book implementations of directed
+graph traversals.
+
+\item {}
+\href{http://www.graphviz.org/}{Graphviz}. Just works. Wonderful.
+
+\item {}
+Andreas Heger, Christoffer Nellåker and Grant Belgard for driving Ruffus towards
+ever simpler syntax.
+
+\end{itemize}
+
+
+\section{Major Features added to Ruffus}
+\label{history:glob}\label{history::doc}\label{history:major-features-added-to-ruffus}
+\begin{notice}{note}{Note:}
+See {\hyperref[todo:todo]{\emph{To do list}}} for future enhancements to Ruffus
+\end{notice}
+
+
+\subsection{version 2.5RC}
+\label{history:version-2-5rc}\begin{quote}
+
+31st July 2014: Release Candidate
+
+5th August 2014: Release
+\end{quote}
+
+
+\subsubsection{1) Python3 compatability (but at least python 2.6 is now required)}
+\label{history:python3-compatability-but-at-least-python-2-6-is-now-required}\begin{quote}
+
+Ruffus v2.5 is now python3 compatible. This has required surprisingly many changes to the codebase. Please report any bugs to me.
+
+\begin{notice}{note}{Note:}
+\textbf{Ruffus now requires at least python 2.6}
+
+It proved to be impossible to support python 2.5 and python 3.x at the same time.
+\end{notice}
+\end{quote}
+
+
+\subsubsection{2) Ctrl-C interrupts}
+\label{history:ctrl-c-interrupts}\begin{quote}
+
+Ruffus now mostly(!) terminates gracefully when interrupted by Ctrl-C .
+
+Please send me bug reports for when this doesn't work with a minimally reproducible case.
+
+This means that, in general, if an \code{Exception} is thrown during your pipeline but you don't want to wait for the rest of the jobs to complete, you can still press Ctrl-C at any point.
+Note that you may still need to clean up spawned processes, for example, using \code{qdel} if you are using \code{Ruffus.drmaa\_wrapper}
+\end{quote}
+
+
+\subsubsection{3) Customising flowcharts in pipeline\_printout\_graph() with \texttt{@graphviz}}
+\label{history:customising-flowcharts-in-pipeline-printout-graph-with-graphviz}\begin{quote}
+
+\emph{Contributed by Sean Davis, with improved syntax via Jake Biesinger}
+
+The graphics for each task can have its own attributes (URL, shape, colour) etc. by adding
+\href{http://www.graphviz.org/doc/info/attrs.html}{graphviz attributes}
+using the \code{@graphviz} decorator.
+\begin{itemize}
+\item {}
+This allows HTML formatting in the task names (using the \code{label} parameter as in the following example).
+HTML labels \textbf{must} be enclosed in \code{\textless{}} and \code{\textgreater{}}. E.g.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{label} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}Line \PYGZlt{}BR/\PYGZgt{} wrapped task\PYGZus{}name()\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\item {}
+You can also opt to keep the task name and wrap it with a prefix and suffix:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{label\PYGZus{}suffix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{??? }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{label\PYGZus{}prefix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{: What is this?}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\item {}
+The \code{URL} attribute allows the generation of clickable svg, and also client / server
+side image maps usable in web pages.
+See \href{http://www.graphviz.org/content/output-formats\#dimap}{Graphviz documentation}
+
+\end{itemize}
+
+Example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@graphviz}\PYG{p}{(}\PYG{n}{URL}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{http://cnn.com}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{fillcolor} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FFCCCC}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{color} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF0000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{pencolor}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF0000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{fontcolor}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}4B6000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{label\PYGZus{}suffix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{???}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{label\PYGZus{}prefix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{What is this?\PYGZlt{}BR/\PYGZgt{} }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{label} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}What \PYGZlt{}FONT COLOR=}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s}{red}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s}{\PYGZgt{}is\PYGZlt{}/FONT\PYGZgt{}this\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{shape}\PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{component}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{height} \PYG{o}{=} \PYG{l+m+mf}{1.5}\PYG{p}{,} \PYG{n}{peripheries} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,}
+ \PYG{n}{style}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task2}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} Can use dictionary if you wish...}
+\PYG{n}{graphviz\PYGZus{}params} \PYG{o}{=} \PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{URL}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{http://cnn.com}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF00FF}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{\PYGZcb{}}
+\PYG{n+nd}{@graphviz}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n}{graphviz\PYGZus{}params}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{myTask}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,}\PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\scalebox{0.300000}{\includegraphics{history_html_flowchart.png}}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{4. Consistent verbosity levels}
+\label{history:consistent-verbosity-levels}\begin{quote}
+
+The verbosity levels are now more fine-grained and consistent between pipeline\_printout and pipeline\_run.
+Note that At verbosity \textgreater{} 2, \code{pipeline\_run} outputs lists of up-to-date tasks before running the pipeline.
+Many users who defaulted to using a verbosity of 3 may want to move up to \code{verbose = 4}.
+\begin{itemize}
+\item {}
+\textbf{level 0} : \emph{Nothing}
+
+\item {}
+\textbf{level 1} : \emph{Out-of-date Task names}
+
+\item {}
+\textbf{level 2} : \emph{All Tasks (including any task function docstrings)}
+
+\item {}
+\textbf{level 3} : \emph{Out-of-date Jobs in Out-of-date Tasks, no explanation}
+
+\item {}
+\textbf{level 4} : \emph{Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings}
+
+\item {}
+\textbf{level 5} : \emph{All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)}
+
+\item {}
+\textbf{level 6} : \emph{All jobs in All Tasks whether out of date or not}
+
+\item {}
+\textbf{level 10}: \emph{Logs messages useful only for debugging ruffus pipeline code}
+
+\end{itemize}
+\begin{itemize}
+\item {}
+Defaults to \textbf{level 4} for pipeline\_printout: \emph{Out of date jobs, with explanations and warnings}
+
+\item {}
+Defaults to \textbf{level 1} for pipeline\_run: \emph{Out-of-date Task names}
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{5. Allow abbreviated paths from \texttt{pipeline\_run} or \texttt{pipeline\_printout}}
+\label{history:allow-abbreviated-paths-from-pipeline-run-or-pipeline-printout}\begin{quote}
+
+\begin{notice}{note}{Note:}
+Please contact me with suggestions if you find the abbreviations useful but ``aesthetically challenged''!
+\end{notice}
+
+Some pipelines produce interminable lists of long filenames. It would be nice to be able to abbreviate this
+to just enough information to follow the progress.
+\begin{description}
+\item[{Ruffus now allows either}] \leavevmode\begin{enumerate}
+\item {}
+Only the nth top level sub-directories to be included
+
+\item {}
+The message to be truncated to a specified number of characters (to fit on a line, for example)
+
+Note that the number of characters specified is the separate length of the input and output parameters,
+not the entire message. You many need to specify a smaller limit that you expect (e.g. \code{60} rather than \emph{80})
+
+\end{enumerate}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{n}{NNN}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{n}{MMM}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+The \code{verbose\_abbreviated\_path} parameter restricts the length of input / output file paths to either
+\begin{quote}
+\begin{itemize}
+\item {}
+NNN levels of nested paths
+
+\item {}
+A total of MMM characters, MMM is specified by setting \code{verbose\_abbreviated\_path} to -MMM (i.e. negative values)
+
+\end{itemize}
+
+\code{verbose\_abbreviated\_path} defaults to \code{2}
+\end{quote}
+
+For example:
+\begin{quote}
+
+Given \code{{[}"aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"{]}}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{} Original relative paths}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{c}{\PYGZsh{} Full abspath}
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{c}{\PYGZsh{} Specifed level of nested directories}
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[.../dddd.txt, .../gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{2}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[.../cc/dddd.txt, .../ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{l+m+mi}{3}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+
+
+ \PYG{c}{\PYGZsh{} Truncated to MMM characters}
+ \PYG{n}{verbose\PYGZus{}abbreviated\PYGZus{}path} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{60}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}???\PYGZgt{} /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+If you are using \code{ruffus.cmdline}, the abbreviated path lengths can be specified on
+the command line as an extension to the verbosity:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{} verbosity of 4}
+ yourscript.py \PYGZhy{}\PYGZhy{}verbose 4
+
+ \PYG{c}{\PYGZsh{} display three levels of nested directories}
+ yourscript.py \PYGZhy{}\PYGZhy{}verbose 4:3
+
+ \PYG{c}{\PYGZsh{} restrict input and output parameters to 60 letters}
+ yourscript.py \PYGZhy{}\PYGZhy{}verbose 4:\PYGZhy{}60
+\end{Verbatim}
+
+The number after the colon is the abbreviated path length
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Other changes}
+\label{history:other-changes}\begin{itemize}
+\item {}
+BUG FIX: Output producing wild cards was not saved in the checksum files!!!
+
+\item {}
+BUG FIX: @mkdir bug under Windows. Thanks to Sean Turley. (Aargh! Different exceptions are thrown in Windows vs. Linux for the same condition!)
+
+\item {}
+Added {\hyperref[pipeline_functions:pipeline-functions-pipeline-get-task-names]{\emph{pipeline\_get\_task\_names(...)}}} which returns all task name as a list of strings. Thanks to Clare Sloggett
+
+\end{itemize}
+
+
+\subsection{version 2.4.1}
+\label{history:version-2-4-1}\begin{quote}
+
+26th April 2014
+\begin{itemize}
+\item {}
+Breaking changes to drmaa API suggested by Bernie Pope to ensure portability across different drmaa implementations (SGE, SLURM etc.)
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{version 2.4}
+\label{history:version-2-4}\begin{quote}
+
+4th April 2014
+\end{quote}
+
+
+\subsubsection{Additions to \texttt{ruffus} namespace}
+\label{history:additions-to-ruffus-namespace}\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/output_file_names:new-manual-formatter]{\emph{formatter()}}} ({\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{syntax}}})
+
+\item {}
+{\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{originate()}}} ({\hyperref[decorators/originate:decorators-originate]{\emph{syntax}}})
+
+\item {}
+{\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-subdivide]{\emph{subdivide()}}} ({\hyperref[decorators/subdivide:decorators-subdivide]{\emph{syntax}}})
+
+\end{itemize}
+
+
+\subsubsection{Installation: use pip}
+\label{history:installation-use-pip}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+sudo pip install ruffus --upgrade
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{1) Command Line support}
+\label{history:command-line-support}\begin{quote}
+
+The optional \code{Ruffus.cmdline} module provides support for a set of common command
+line arguments which make writing \emph{Ruffus} pipelines much more pleasant.
+See {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{manual}}}
+\end{quote}
+
+
+\subsubsection{2) Check pointing}
+\label{history:check-pointing}\begin{itemize}
+\item {}
+Contributed by \textbf{Jake Biesinger}
+
+\item {}
+See {\hyperref[tutorials/new_tutorial/checkpointing:new-manual-checkpointing]{\emph{Manual}}}
+
+\item {}
+Uses a fault resistant sqlite database file to log i/o files, and additional checksums
+
+\item {}
+defaults to checking file timestamps stored in the current directory (\code{ruffus\_utilility.RUFFUS\_HISTORY\_FILE = '.ruffus\_history.sqlite'})
+
+\item {}
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(..., checksum\_level = N, ...)}}}
+\begin{itemize}
+\item {}
+level 0 = CHECKSUM\_FILE\_TIMESTAMPS : Classic mode. Use only file timestamps (no checksum file will be created)
+
+\item {}
+level 1 = CHECKSUM\_HISTORY\_TIMESTAMPS : Also store timestamps in a database after successful job completion
+
+\item {}
+level 2 = CHECKSUM\_FUNCTIONS : As above, plus a checksum of the pipeline function body
+
+\item {}
+level 3 = CHECKSUM\_FUNCTIONS\_AND\_PARAMS : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\item {}
+defaults to level 1
+
+\end{itemize}
+
+\item {}
+Can speed up trivial tasks: Previously Ruffus always added an extra 1 second pause between tasks
+to guard against file systems (Ext3, FAT, some NFS) with low timestamp granularity.
+
+\end{itemize}
+
+
+\subsubsection{3) \emph{subdivide()} (\emph{syntax})}
+\label{history:subdivide-syntax}\begin{itemize}
+\item {}
+Take a list of input jobs (like {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}) but further splits each into multiple jobs, i.e. it is a \textbf{many-\textgreater{}even more} relationship
+
+\item {}
+synonym for the deprecated \code{@split(..., regex(), ...)}
+
+\end{itemize}
+
+
+\subsubsection{4) \emph{mkdir()} (\emph{syntax}) with \emph{formatter()}, \emph{suffix()} and \emph{regex()}}
+\label{history:mkdir-syntax-with-formatter-suffix-and-regex}\begin{itemize}
+\item {}
+allows directories to be created depending on runtime parameters or the output of previous tasks
+
+\item {}
+behaves just like {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} but with its own (internal) function which does the actual work of making a directory
+
+\item {}
+Previous behavior is retained:\code{mkdir} continues to work seamlessly inside {\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}}
+
+\end{itemize}
+
+
+\subsubsection{5) \emph{originate()} (\emph{syntax})}
+\label{history:originate-syntax}\begin{itemize}
+\item {}
+Generates output files without dependencies from scratch (\emph{ex nihilo}!)
+
+\item {}
+For first step in a pipeline
+
+\item {}
+Task function obviously only takes output and not input parameters. (There \emph{are} no inputs!)
+
+\item {}
+synonym for {\hyperref[decorators/split:decorators-split]{\emph{@split(None,...)}}}
+
+\item {}
+See {\hyperref[decorators/originate:decorators-originate]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{Manual}}}
+
+\end{itemize}
+
+
+\subsubsection{6) New flexible \emph{formatter()} (\emph{syntax}) alternative to \emph{regex()} \& \emph{suffix()}}
+\label{history:new-flexible-formatter-syntax-alternative-to-regex-suffix}\begin{itemize}
+\item {}
+Easy manipulation of path subcomponents in the style of \href{http://docs.python.org/2/library/os.path.html\#os.path.split}{os.path.split()}
+
+\item {}
+Regular expressions are no longer necessary for path manipulation
+
+\item {}
+Familiar python syntax
+
+\item {}
+Optional regular expression matches
+
+\item {}
+Can refer to any in the list of N input files (not only the first file as for \code{regex(...)})
+
+\item {}
+Can even refer to individual letters within a match
+
+\end{itemize}
+
+
+\subsubsection{7) Combinatorics (all vs. all decorators)}
+\label{history:combinatorics-all-vs-all-decorators}\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/combinatorics:new-manual-product]{\emph{@product}}} (See \href{http://docs.python.org/2/library/itertools.html\#itertools.product}{itertools.product})
+
+\item {}
+{\hyperref[tutorials/new_tutorial/combinatorics:new-manual-permutations]{\emph{@permutations}}} (See \href{http://docs.python.org/2/library/itertools.html\#itertools.permutations}{itertools.permutations})
+
+\item {}
+{\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinations]{\emph{@combinations}}} (See \href{http://docs.python.org/2/library/itertools.html\#itertools.combinations}{itertools.combinations})
+
+\item {}
+{\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}} (See \href{http://docs.python.org/2/library/itertools.html\#itertools.combinations\_with\_replacement}{itertools.combinations\_with\_replacement})
+
+\item {}
+in optional {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinatorics]{\emph{combinatorics}}} module
+
+\item {}
+Only {\hyperref[tutorials/new_tutorial/output_file_names:new-manual-formatter]{\emph{formatter()}}} provides the necessary flexibility to construct the output. ({\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} and {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}} are not supported.)
+
+\item {}
+See {\hyperref[decorators/decorators:decorators-combinatorics]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinatorics]{\emph{Manual}}}
+
+\end{itemize}
+
+
+\subsubsection{8) drmaa support and multithreading:}
+\label{history:drmaa-support-and-multithreading}\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-ruffus-drmaa-wrapper-run-job]{\emph{ruffus.drmaa\_wrapper.run\_job()}}} ({\hyperref[drmaa_wrapper_functions:drmaa-wrapper-run-job]{\emph{syntax}}})
+
+\item {}
+Optional helper module allows jobs to dispatch work to a computational cluster and wait until it completes.
+
+\item {}
+Requires \code{multithread} rather than \code{multiprocess}
+
+\end{itemize}
+
+
+\subsubsection{9) \texttt{pipeline\_run(...)} and exceptions}
+\label{history:pipeline-run-and-exceptions}\begin{quote}
+
+See {\hyperref[tutorials/new_tutorial/exceptions:new-manual-exceptions]{\emph{Manual}}}
+\begin{itemize}
+\item {}
+Optionally terminate pipeline after first exception
+
+\item {}
+Display exceptions without delay
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{10) Miscellaneous}
+\label{history:miscellaneous}\begin{quote}
+\begin{description}
+\item[{Better error messages for \code{formatter()}, \code{suffix()} and \code{regex()} for \code{pipeline\_printout(..., verbose \textgreater{}= 3, ...)}}] \leavevmode\begin{itemize}
+\item {}
+Error messages for showing mismatching regular expression and offending file name
+
+\item {}
+Wrong capture group names or out of range indices will raise informative Exception
+
+\end{itemize}
+
+\end{description}
+\end{quote}
+
+
+\subsection{version 2.3}
+\label{history:version-2-3}\begin{quote}
+
+1st September, 2013
+\begin{itemize}
+\item {} \begin{description}
+\item[{\code{@active\_if} turns off tasks at runtime}] \leavevmode
+The Design and initial implementation were contributed by Jacob Biesinger
+
+Takes one or more parameters which can be either booleans or functions or callable objects which return True / False:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}1} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{False}
+
+\PYG{n+nd}{@active\PYGZus{}if}\PYG{p}{(}\PYG{n}{run\PYGZus{}if\PYGZus{}true}\PYG{p}{,} \PYG{k}{lambda}\PYG{p}{:} \PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+The expressions inside @active\_if are evaluated each time
+\code{pipeline\_run}, \code{pipeline\_printout} or \code{pipeline\_printout\_graph} is called.
+
+Dormant tasks behave as if they are up to date and have no output.
+
+\end{description}
+
+\item {} \begin{description}
+\item[{Command line parsing}] \leavevmode\begin{itemize}
+\item {}
+Supports both argparse (python 2.7) and optparse (python 2.6):
+
+\item {}
+\code{Ruffus.cmdline} module is optional.
+
+\item {}
+See {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{manual}}}
+
+\end{itemize}
+
+\end{description}
+
+\item {} \begin{description}
+\item[{Optionally terminate pipeline after first exception}] \leavevmode
+To have all exceptions interrupt immediately:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{,} \PYG{n}{exceptions\PYGZus{}terminate\PYGZus{}immediately} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+
+By default ruffus accumulates \code{NN} errors before interrupting the pipeline prematurely. \code{NN} is the specified parallelism for \code{pipeline\_run(..., multiprocess = NN)}.
+
+Otherwise, a pipeline will only be interrupted immediately if exceptions of type \code{ruffus.JobSignalledBreak} are thrown.
+
+\end{description}
+
+\item {}
+Display exceptions without delay
+\begin{quote}
+
+By default, Ruffus re-throws exceptions in ensemble after pipeline termination.
+
+To see exceptions as they occur:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{,} \PYG{n}{log\PYGZus{}exceptions} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+
+\code{logger.error(...)} will be invoked with the string representation of the each exception, and associated stack trace.
+
+The default logger prints to sys.stderr, but this can be changed to any class from the logging module or compatible object via \code{pipeline\_run(..., logger = ???)}
+\end{quote}
+
+\item {}
+Improved \code{pipeline\_printout()}
+\begin{itemize}
+\item {}
+\emph{@split} operations now show the 1-\textgreater{}many output in pipeline\_printout
+\begin{quote}
+
+This make it clearer that \code{@split} is creating multiple output parameters (rather than a single output parameter consisting of a list):
+
+\begin{Verbatim}[commandchars=\\\{\}]
+Task = split\_animals
+ Job = [None
+ -\textgreater{} cows
+ -\textgreater{} horses
+ -\textgreater{} pigs
+ , any\_extra\_parameters]
+\end{Verbatim}
+\end{quote}
+
+\item {}
+File date and time are displayed in human readable form and out of date files are flagged with asterisks.
+
+\end{itemize}
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{version 2.2}
+\label{history:version-2-2}\begin{quote}
+
+22nd July, 2010
+\begin{itemize}
+\item {}
+Simplifying \textbf{@transform} syntax with \textbf{suffix(...)}
+\begin{quote}
+
+Regular expressions within ruffus are very powerful, and can allow files to be moved
+from one directory to another and renamed at will.
+
+However, using consistent file extensions and
+\code{@transform(..., suffix(...))} makes the code much simpler and easier to read.
+
+Previously, \code{suffix(...)} did not cooperate well with \code{inputs(...)}.
+For example, finding the corresponding header file (''.h'') for the matching input
+required a complicated \code{regex(...)} regular expression and \code{input(...)}. This simple case,
+e.g. matching ``something.c'' with ``something.h'', is now much easier in Ruffus.
+\begin{description}
+\item[{For example:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{source\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{something.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{more\PYGZus{}code.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{source\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{common.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{p}{(} \PYG{n}{source\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{header\PYGZus{}file}\PYG{p}{,}
+ \PYG{n}{common\PYGZus{}header}\PYG{p}{)} \PYG{o}{=} \PYG{n}{input\PYGZus{}files}
+ \PYG{c}{\PYGZsh{} call compiler to make object file}
+\end{Verbatim}
+
+This is equivalent to calling:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{something.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{something.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{common.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{something.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{more\PYGZus{}code.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{more\PYGZus{}code.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{common.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{more\PYGZus{}code.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\end{description}
+
+The \code{\textbackslash{}1} matches everything \emph{but} the suffix and will be applied to both \code{glob}s and file names.
+\end{quote}
+
+\end{itemize}
+
+For simplicity and compatibility with previous versions, there is always an implied r''1'' before
+the output parameters. I.e. output parameters strings are \emph{always} substituted.
+\begin{itemize}
+\item {}
+Tasks and glob in \textbf{inputs(...)} and \textbf{add\_inputs(...)}
+\begin{quote}
+
+\code{glob}s and tasks can be added as the prerequisites / input files using
+\code{inputs(...)} and \code{add\_inputs(...)}. \code{glob} expansions will take place when the task
+is run.
+\end{quote}
+
+\item {}
+Advanced form of \textbf{@split} with \textbf{regex}:
+\begin{quote}
+
+The standard \code{@split} divided one set of inputs into multiple outputs (the number of which
+can be determined at runtime).
+
+This is a \code{one-\textgreater{}many} operation.
+
+An advanced form of \code{@split} has been added which can split each of several files further.
+
+In other words, this is a \code{many-\textgreater{}"many more"} operation.
+\begin{description}
+\item[{For example, given three starting files:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{original\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{original\PYGZus{}0.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{original\PYGZus{}1.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{original\PYGZus{}2.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\end{Verbatim}
+
+\item[{We can split each into its own set of sub-sections:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{original\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{starting\PYGZus{}(}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).fa}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match starting files}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{files.split.}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.*.fa}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{} glob pattern}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} index of original file}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{,} \PYG{n}{original\PYGZus{}index}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Code to split each input\PYGZus{}file}
+\PYG{l+s+sd}{ \PYGZdq{}original\PYGZus{}0.file\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}files.split.0.*.fa\PYGZdq{}}
+\PYG{l+s+sd}{ \PYGZdq{}original\PYGZus{}1.file\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}files.split.1.*.fa\PYGZdq{}}
+\PYG{l+s+sd}{ \PYGZdq{}original\PYGZus{}2.file\PYGZdq{} \PYGZhy{}\PYGZgt{} \PYGZdq{}files.split.2.*.fa\PYGZdq{}}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\end{Verbatim}
+
+\end{description}
+
+This is, conceptually, the reverse of the @collate(...) decorator
+\end{quote}
+
+\item {}
+Ruffus will complain about unescaped regular expression special characters:
+\begin{quote}
+
+Ruffus uses ``\textbackslash{}1'' and ``\textbackslash{}2'' in regular expression substitutions. Even seasoned python
+users may not remember that these have to be `escaped' in strings. The best option is
+to use `raw' python strings e.g.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1\PYGZus{}substitutes}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2correctly}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{3four}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{4times}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+Ruffus will throw an exception if it sees an unescaped ``\textbackslash{}1'' or ``\textbackslash{}2'' in a file name,
+which should catch most of these bugs.
+\end{quote}
+
+\item {}
+Prettier output from \emph{pipeline\_printout\_graph}
+\begin{quote}
+
+Changed to nicer colours, symbols etc. for a more professional look.
+ at split and @merge tasks now look different from @transform.
+Colours, size and resolution are now fully customisable:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+pipeline\_printout\_graph( \#...
+ user\_colour\_scheme = \PYGZob{}
+ "colour\_scheme\_index":1,
+ "Task to run" : \PYGZob{}"fillcolor":"blue"\PYGZcb{},
+ pipeline\_name : "My flowchart",
+ size : (11,8),
+ dpi : 120)\PYGZcb{})
+\end{Verbatim}
+
+An SVG bug in firefox has been worked around so that font size are displayed correctly.
+\end{quote}
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{version 2.1.1}
+\label{history:version-2-1-1}\begin{itemize}
+\item {} \begin{description}
+\item[{\textbf{@transform(.., add\_inputs(...))}}] \leavevmode
+\code{add\_inputs(...)} allows the addition of extra input dependencies / parameters for each job.
+\begin{description}
+\item[{Unlike \code{inputs(...)}, the original input parameter is retained:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+from ruffus import *
+ at transform(["a.input", "b.input"], suffix(".input"), add\_inputs("just.1.more","just.2.more"), ".output")
+def task(i, o):
+""
+\end{Verbatim}
+
+\item[{Produces:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+Job = [[a.input, just.1.more, just.2.more] -\textgreater{}a.output]
+Job = [[b.input, just.1.more, just.2.more] -\textgreater{}b.output]
+\end{Verbatim}
+
+\end{description}
+
+Like \code{inputs}, \code{add\_inputs} accepts strings, tasks and \code{glob} s
+This minor syntactic change promises add much clarity to Ruffus code.
+\code{add\_inputs()} is available for \code{@transform}, \code{@collate} and \code{@split}
+
+\end{description}
+
+\end{itemize}
+
+
+\subsection{version 2.1.0}
+\label{history:version-2-1-0}\begin{itemize}
+\item {}
+\textbf{@jobs\_limit}
+Some tasks are resource intensive and too many jobs should not be run at the
+same time. Examples include disk intensive operations such as unzipping, or
+downloading from FTP sites.
+
+Adding:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@jobs\PYGZus{}limit}\PYG{p}{(}\PYG{l+m+mi}{4}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{new\PYGZus{}data\PYGZus{}list}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.big\PYGZus{}data.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.big\PYGZus{}data}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{unzip}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,} \PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{unzip code goes here}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+would limit the unzip operation to 4 jobs at a time, even if the rest of the
+pipeline runs highly in parallel.
+
+(Thanks to Rob Young for suggesting this.)
+
+\end{itemize}
+
+
+\subsection{version 2.0.10}
+\label{history:version-2-0-10}\begin{itemize}
+\item {}
+\textbf{touch\_files\_only} option for \textbf{pipeline\_run}
+
+When the pipeline runs, task functions will not be run. Instead, the output files for
+each job (in each task) will be \code{touch}-ed if necessary.
+This can be useful for simulating a pipeline run so that all files look as
+if they are up-to-date.
+
+Caveats:
+\begin{itemize}
+\item {}
+This may not work correctly where output files are only determined at runtime, e.g. with \textbf{@split}
+
+\item {}
+Only the output from pipelined jobs which are currently out-of-date will be \code{touch}-ed.
+In other words, the pipeline runs \emph{as normal}, the only difference is that the
+output files are \code{touch}-ed instead of being created by the python task functions
+which would otherwise have been called.
+
+\end{itemize}
+
+\item {}
+Parameter substitution for \textbf{inputs(...)}
+
+The \textbf{inputs(...)} parameter in \textbf{@transform}, \textbf{@collate} can now take tasks and \code{glob} s,
+and these will be expanded appropriately (after regular expression replacement).
+
+For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dir/a.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*)}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{/(.+).input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{n}{inputs}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2.other}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/*.more}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{elsewhere/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2.output}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task1}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,} \PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Some pipeline task}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\end{Verbatim}
+
+Is equivalent to calling:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{task1}\PYG{p}{(}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dir/a.other}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dir/1.more}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dir/2.more}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{elsewhere/a.output}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+
+\begin{quote}
+
+Here:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/*.more}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+is first converted to:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{dir/*.more}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+which matches:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dir/1.more}\PYG{l+s}{\PYGZdq{}}
+\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dir/2.more}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+\end{itemize}
+
+
+\subsection{version 2.0.9}
+\label{history:version-2-0-9}\begin{itemize}
+\item {}
+Better display of logging output
+
+\item {}
+Advanced form of \textbf{@split}
+This is an experimental feature.
+
+Hitherto, \textbf{@split} only takes 1 set of input (tasks/files/\code{glob} s) and split these
+into an indeterminate number of output.
+\begin{quote}
+
+This is a one-\textgreater{}many operation.
+\end{quote}
+
+Sometimes it is desirable to take multiple input files, and split each of them further.
+\begin{quote}
+
+This is a many-\textgreater{}many (more) operation.
+\end{quote}
+
+It is possible to hack something together using \textbf{@transform} but downstream tasks would not
+aware that each job in \textbf{@transform} produces multiple outputs (rather than one input,
+one output per job).
+
+The syntax looks like:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{get\PYGZus{}files}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.+).original}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.*.split}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}files}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,} \PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+If \code{get\_files()} returned \code{A.original}, \code{B.original} and \code{C.original},
+\code{split\_files()} might lead to the following operations:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+A.original
+ -\textgreater{} A.1.original
+ -\textgreater{} A.2.original
+ -\textgreater{} A.3.original
+B.original
+ -\textgreater{} B.1.original
+ -\textgreater{} B.2.original
+C.original
+ -\textgreater{} C.1.original
+ -\textgreater{} C.2.original
+ -\textgreater{} C.3.original
+ -\textgreater{} C.4.original
+ -\textgreater{} C.5.original
+\end{Verbatim}
+
+Note that each input (\code{A/B/C.original}) can produce a number of output, the exact
+number of which does not have to be pre-determined.
+This is similar to \textbf{@split}
+
+Tasks following \code{split\_files} will have ten inputs corresponding to each of the
+output from \code{split\_files}.
+
+If \textbf{@transform} was used instead of \textbf{@split}, then tasks following \code{split\_files}
+would only have 3 inputs.
+
+\end{itemize}
+
+
+\subsection{version 2.0.8}
+\label{history:version-2-0-8}\begin{itemize}
+\item {}
+File names can be in unicode
+
+\item {}
+File systems with 1 second timestamp granularity no longer cause problems.
+
+\end{itemize}
+
+
+\subsection{version 2.0.2}
+\label{history:version-2-0-2}\begin{itemize}
+\item {}
+Much prettier /useful output from {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout}}}
+
+\item {}
+New tutorial / manual
+
+\end{itemize}
+
+
+\subsection{version 2.0}
+\label{history:version-2-0}\begin{itemize}
+\item {}
+Revamped documentation:
+\begin{itemize}
+\item {}
+Rewritten tutorial
+
+\item {}
+Comprehensive manual
+
+\item {}
+New syntax help
+
+\end{itemize}
+
+\item {}
+Major redesign. New decorators include
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{@split}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{@transform}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{@merge}}}
+
+\item {}
+{\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-collate]{\emph{@collate}}}
+
+\end{itemize}
+
+\item {}
+Major redesign. Decorator \emph{inputs} can mix
+\begin{itemize}
+\item {}
+Output from previous tasks
+
+\item {}
+\href{http://docs.python.org/library/glob.html}{\emph{glob}} patterns e.g. \code{*.txt}
+
+\item {}
+Files names
+
+\item {}
+Any other data type
+
+\end{itemize}
+
+\end{itemize}
+
+
+\subsection{version 1.1.4}
+\label{history:version-1-1-4}\begin{quote}
+
+Tasks can get their input by automatically chaining to the output from one or more parent tasks using {\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re}}}
+\end{quote}
+
+
+\subsection{version 1.0.7}
+\label{history:version-1-0-7}\begin{quote}
+
+Added \emph{proxy\_logger} module for accessing a shared log across multiple jobs in different processes.
+\end{quote}
+
+
+\subsection{version 1.0}
+\label{history:version-1-0}\begin{quote}
+
+Initial Release in Oxford
+\end{quote}
+
+
+\section{Fixed Bugs}
+\label{history:fixed-bugs}\begin{quote}
+
+Full list at \href{http://code.google.com/p/ruffus/wiki/LatestChanges}{``Latest Changes wiki entry''}
+\end{quote}
+
+
+\section{Future Changes to Ruffus}
+\label{todo:todo}\label{todo::doc}\label{todo:future-changes-to-ruffus}\begin{quote}
+
+I would appreciated feedback and help on all these issues and where next to take \emph{ruffus}.
+
+\textbf{Future Changes} are features where we more or less know where we are going and how to get there.
+
+\textbf{Planned Improvements} describes features we would like in Ruffus but where the implementation
+or syntax has not yet been (fully) worked out.
+
+If you have suggestions or contributions, please either write to me ( ruffus\_lib at llew.org.uk) or
+send a pull request via the \href{https://github.com/bunbun/ruffus}{git site}.
+\end{quote}
+
+
+\subsection{Todo: pipeline\_printout\_graph should print inactive tasks}
+\label{todo:todo-pipeline-printout-graph-should-print-inactive-tasks}\label{todo:todo-inactive-tasks-in-pipeline-printout-graph}
+
+\subsection{Todo: Mark input strings as non-file names, and add support for dynamically returned parameters}
+\label{todo:todo-dynamic-strings}\label{todo:todo-mark-input-strings-as-non-file-names-and-add-support-for-dynamically-returned-parameters}\begin{quote}
+\begin{enumerate}
+\item {}
+Use indicator object.
+
+\item {}
+What is a good name? \code{"output\_from()"}, \code{"NOT\_FILE\_NAME"} :-)
+
+\item {}
+They will still participate in suffix, formatter and regex replacement
+
+\end{enumerate}
+
+Bernie Pope suggests that we should generalise this:
+
+If any object in the input parameters is a (non-list/tuple) class instance, check (getattr) whether it has a \code{ruffus\_params()} function.
+If it does, call it to obtain a list which is substituted in place.
+If there are string nested within, these will also take part in Ruffus string substitution.
+Objects with \code{ruffus\_params()} always ``decay'' to the results of the function call
+
+\code{output\_from} would be a simple wrapper which returns the internal string via \code{ruffus\_params()}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{class} \PYG{n+nc}{output\PYGZus{}from} \PYG{p}{(}\PYG{n+nb}{object}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{def} \PYG{n+nf}{\PYGZus{}\PYGZus{}init\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{,} \PYG{n+nb}{str}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{str} \PYG{o}{=} \PYG{n+nb}{str}
+ \PYG{k}{def} \PYG{n+nf}{ruffus\PYGZus{}params}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{return} \PYG{p}{[}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{str}\PYG{p}{]}
+\end{Verbatim}
+
+Returning a list should be like wildcards and should not introduce an unnecessary level of indirection for output parameters, i.e. suffix(''.txt'') or formatter() / ``\{basename{[}0{]}\}'' should work.
+
+Check!
+\end{quote}
+
+
+\subsection{Todo: Allow ``extra'' parameters to be used in output substitution}
+\label{todo:todo-allow-extra-parameters-to-be-used-in-output-substitution}\label{todo:todo-extra-parameters}\begin{quote}
+
+Formatter substitution can refer to the original elements in the input and extra parameters (without converting them to strings either). This refers to the original (nested) data structure.
+
+This will allow normal python datatypes to be handed down and slipstreamed into a pipeline more easily.
+
+The syntax would use Ruffus (\textgreater{} version 2.4) formatter:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{,} \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}EXTRAS[0][1][3]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} EXTRAS}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{[INPUTS[1][2]]}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}\PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{)} \PYG{c}{\PYGZsh{} INPUTS}
+\PYG{k}{def} \PYG{n+nf}{taskfunc}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\code{EXTRA} and \code{INPUTS} indicate that we are referring to the input and extra parameters.
+
+These are the full (nested) parameters in all their original form. In the case of the input parameters, this obvious depends on the decorator, so
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.text}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.text}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{,} \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}INPUTS[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{taskfunc}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+would give
+
+\begin{Verbatim}[commandchars=\\\{\}]
+job \#1
+ input == "a.text"
+ output == "a"
+
+job \#2
+ input == [1, "b.text"]
+ output == 1
+\end{Verbatim}
+
+The entire string must consist of \code{INPUTS} or \code{EXTRAS} followed by optionally N levels of square brackets. i.e. They must match \code{"(INPUTS\textbar{}EXTRAS)(\textbackslash{}{[}\textbackslash{}d+\textbackslash{}{]})+"}
+
+No string conversion takes place.
+
+For \code{INPUTS} or \code{EXTRAS} which have objects with a \code{ruffus\_params()} function (see Todo item above),
+the original object rather than the result of \code{ruffus\_params()} is forwarded.
+\end{quote}
+
+
+\subsection{Todo: Extra signalling before and after each task and job}
+\label{todo:todo-extra-signalling-before-and-after-each-task-and-job}\label{todo:todo-pre-post-job}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@prejob}\PYG{p}{(}\PYG{n}{custom\PYGZus{}func}\PYG{p}{)}
+\PYG{n+nd}{@postjob}\PYG{p}{(}\PYG{n}{custom\PYGZus{}func}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\code{@prejob} / \code{@postjob} would be run in the child processes.
+\end{quote}
+
+
+\subsection{Todo: \texttt{@split} / \texttt{@subdivide} returns the actual output created}
+\label{todo:todo-new-decorators}\label{todo:todo-split-subdivide-returns-the-actual-output-created}\begin{itemize}
+\item {}
+\textbf{overrides} (not replaces) wild cards.
+
+\item {}
+Returns a list, each with output and extra paramters.
+
+\item {}
+Won't include extraneous files which were not created in the pipeline but which just happened to match the wild card
+
+\item {}
+We should have \code{ruffus\_output\_params}, \code{ruffus\_extra\_params} wrappers for clarity:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}into\PYGZus{}txt\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{output\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file\PYGZus{}name} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:}
+ \PYG{k}{pass}
+ \PYG{k}{return} \PYG{p}{[}
+ \PYG{n}{ruffus\PYGZus{}output}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{n}{ruffus\PYGZus{}output}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}\PYG{p}{,} \PYG{n}{ruffus\PYGZus{}extras}\PYG{p}{(}\PYG{l+m+mi}{13}\PYG{p}{,} \PYG{l+m+mi}{14}\PYG{p}{)}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{]}
+\end{Verbatim}
+
+\item {}
+Consider yielding?
+
+\end{itemize}
+
+
+\subsubsection{Checkpointing}
+\label{todo:checkpointing}\begin{itemize}
+\item {}
+If checkpoint file is used, the actual files are saved and checked the next time
+
+\item {}
+If no files are generated, no files are checked the next time...
+
+\item {}
+The output files do not have to match the wildcard though we can output a warning message if that happens...
+This is obviously dangerous because the behavior will change if the pipeline is rerun without using the checkpoint file
+
+\item {}
+What happens if the task function changes?
+
+\end{itemize}
+
+
+\subsection{Todo: New decorators}
+\label{todo:id1}
+
+\subsubsection{Todo: \texttt{@originate}}
+\label{todo:todo-originate}\begin{quote}
+
+Each (serial) invocation returns lists of output parameters until returns
+None. (Empty list = \code{continue}, None = \code{break}).
+\end{quote}
+
+
+\subsubsection{Todo: \texttt{@recombine}}
+\label{todo:todo-recombine}\begin{quote}
+
+Like \code{@collate} but automatically regroups jobs which were a result of a previous \code{@subdivide} / \code{@split} (even after intervening \code{@transform} )
+
+This is the only way job trickling can work without stalling the pipeline: We would know
+how many jobs were pending for each \code{@recombine} job and which jobs go together.
+\end{quote}
+
+
+\subsection{Todo: Named parameters in decorators for clarity}
+\label{todo:todo-named-parameters-in-decorators-for-clarity}
+
+\subsection{Todo: Bioinformatics example to end all examples}
+\label{todo:todo-bioinformatics-example-to-end-all-examples}\label{todo:todo-bioinformatics-example}\begin{quote}
+\begin{description}
+\item[{Uses}] \leavevmode\begin{itemize}
+\item {}
+\code{@product}
+
+\item {}
+\code{@subdivide}
+
+\item {}
+\code{@transform}
+
+\item {}
+\code{@collate}
+
+\item {}
+\code{@merge}
+
+\end{itemize}
+
+\end{description}
+\end{quote}
+
+
+\subsection{Todo: Allow the next task to start before all jobs in the previous task have finished}
+\label{todo:todo-allow-the-next-task-to-start-before-all-jobs-in-the-previous-task-have-finished}\begin{quote}
+
+Jake (Biesinger) calls this \textbf{Job Trickling}!
+\begin{itemize}
+\item {}
+A single long running job no longer will hold up the entire pipeline
+
+\item {}
+Calculates dependencies dynamically at the job level.
+
+\item {}
+Goal is to have a long running (months) pipeline to which we can keep adding input...
+
+\item {}
+We can choose between prioritising completion of the entire pipeline for some jobs
+(depth first) or trying to complete as many tasks as possible (breadth first)
+
+\end{itemize}
+\end{quote}
+
+
+\subsubsection{Converting to per-job rather than per task dependencies}
+\label{todo:converting-to-per-job-rather-than-per-task-dependencies}\begin{quote}
+
+Some decorators prevent per job (rather than per task) dependency calculations, and
+will call a pipeline stall until the dependent tasks are completed (the current situation):
+\begin{itemize}
+\item {} \begin{description}
+\item[{Some types of jobs unavoidably depend on an entire previous task completing:}] \leavevmode\begin{itemize}
+\item {}
+\code{add\_inputs()}, \code{inputs()}
+
+\item {}
+\code{@merge}
+
+\item {}
+\code{@split} (implicit \code{@merge})
+
+\end{itemize}
+
+\end{description}
+
+\item {} \begin{description}
+\item[{\code{@split}, \code{@originate} produce variable amount of output at runtime and must be completed before the next task can be run.}] \leavevmode\begin{itemize}
+\item {}
+Should \code{yield} instead of return?
+
+\end{itemize}
+
+\end{description}
+
+\item {} \begin{description}
+\item[{\code{@collate} needs to pattern match all the inputs of a previous task}] \leavevmode\begin{itemize}
+\item {}
+Replace \code{@collate} with \code{@recombine} which ``remembers'' and reverses the results of a previous
+\code{@subdivide} or \code{@split}
+
+\item {}
+Jobs need unique job\_id tag
+
+\item {}
+Jobs are assigned (nested) grouping id which accompany them down the
+pipeline after \code{@subdivide} / \code{@split} and are removed after \code{@recombine}
+
+\item {}
+Should have a count of jobs so we always know \emph{when} an ``input slot'' is full
+
+\end{itemize}
+
+\end{description}
+
+\item {}
+Funny ``single file'' mode for \code{@transform,} \code{@files} needs to be
+regularised so it is a syntactic (front end) convenience (oddity!)
+and not plague the inards of ruffus
+
+\end{itemize}
+
+Breaking change: to force the entirety of the previous task to complete before the next one, use \code{@follows}
+\end{quote}
+
+
+\subsubsection{Implementation}
+\label{todo:implementation}\begin{itemize}
+\item {}
+``Push'' model. Completing jobs ``check in'' their outputs to ``input slots'' for all the sucessor jobs.
+
+\item {}
+When ``input slots'' are full for any job, it is put on the dispatch queue to be run.
+
+\item {}
+The priority (depth first or breadth first) can be set here.
+
+\item {}
+\code{pipeline\_run} / \code{Pipeline\_printout} create a task dependency tree structure (from decorator dependencies) (a runtime pipeline object)
+
+\item {}
+Each task in the pipeline object knows which other tasks wait on it.
+
+\item {}
+When output is created by a job, it sends messages to (i.e. function calls) all dependent tasks in the pipeline object with the new output
+
+\item {}
+Sets of output such as from \code{@split} and \code{@subdivide} and \code{@originate} have a
+terminating condition and/or a associated count (\# of output)
+
+\item {}
+Tasks in the pipeline object forward incoming inputs to task input slots (for slots common to all jobs in a
+task: \code{@inputs}, \code{@add\_inputs}) or to slots in new jobs in the pipeline object
+
+\item {}
+When all slots are full in each job, this triggers putting the job parameters onto the job submission queue
+
+\item {}
+The pipeline object should allow Ruffus to be reentrant?
+
+\end{itemize}
+
+
+\section{Planned Improvements to Ruffus}
+\label{todo:planned-improvements-to-ruffus}\phantomsection\label{todo:todo-run-on-cluster}\begin{itemize}
+\item {}
+\code{@split} needs to be able to specify at run time the number of
+resulting jobs without using wild cards
+
+\item {}
+legacy support for wild cards and file names.
+
+\end{itemize}
+
+
+\subsection{Planned: Running python code (task functions) transparently on remote cluster nodes}
+\label{todo:planned-running-python-code-task-functions-transparently-on-remote-cluster-nodes}\begin{quote}
+
+Wait until next release.
+
+Will bump Ruffus to v.3.0 if can run python jobs transparently on a cluster!
+
+abstract out \code{task.run\_pooled\_job\_without\_exceptions()} as a function which can be supplied to \code{pipeline\_run}
+
+Common ``job'' interface:
+\begin{itemize}
+\item {}
+marshalled arguments
+
+\item {}
+marshalled function
+
+\item {}
+submission timestamp
+
+\end{itemize}
+\begin{description}
+\item[{Returns}] \leavevmode\begin{itemize}
+\item {}
+completion timestamp
+
+\item {}
+returned values
+
+\item {}
+exception
+
+\end{itemize}
+
+\end{description}
+\begin{enumerate}
+\item {}
+Full version use libpythongrid?
+* Christian Widmer \textless{}\href{mailto:ckwidmer at gmail.com}{ckwidmer at gmail.com}\textgreater{}
+* Cheng Soon Ong \textless{}\href{mailto:chengsoon.ong at unimelb.edu.au}{chengsoon.ong at unimelb.edu.au}\textgreater{}
+* \href{https://code.google.com/p/pythongrid/source/browse/\#git\%2Fpythongrid}{https://code.google.com/p/pythongrid/source/browse/\#git\%2Fpythongrid}
+* Probably not good to base Ruffus entirely on libpythongrid to minimise dependencies, the use of sophisticated configuration policies etc.
+
+\item {}
+Start with light-weight file-based protocol
+* specify where the scripts should live
+* use drmaa to start jobs
+* have executable ruffus module which knows how to load deserialise (unmarshall) function / parameters from disk. This would be what drmaa starts up, given the marshalled data as an argument
+* time stamp
+* ``heart beat'' to check that the job is still running
+
+\item {}
+Next step: socket-based protocol
+* use specified master port in ruffus script
+* start remote processes using drmaa
+* child receives marshalled data and the address::port in the ruffus script (head node) to initiate hand shake or die
+* process recycling: run successive jobs on the same remote process for reduced overhead, until exceeds max number of jobs on the same process, min/max time on the same process
+* resubmit if die (Don't do sophisticated stuff like libpythongrid).
+
+\end{enumerate}
+\end{quote}
+\phantomsection\label{todo:todo-job-trickling}
+
+\subsection{Planned: Custom parameter generator}
+\label{todo:todo-job-trickling}\label{todo:planned-custom-parameter-generator}\label{todo:todo-custom-parameters}\begin{quote}
+
+Request on mailing list
+\begin{quote}
+\begin{quote}
+
+I've often wished that I could use an arbitrary function to process the input filepath instead of just a regex.
+\end{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{def} \PYG{n+nf}{f}\PYG{p}{(}\PYG{n}{inputs}\PYG{p}{,} \PYG{n}{outputs}\PYG{p}{,} \PYG{n}{extra\PYGZus{}param1}\PYG{p}{,} \PYG{n}{extra\PYGZus{}param2}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do something to generate parameters}
+ \PYG{k}{return} \PYG{n}{new\PYGZus{}output\PYGZus{}param}\PYG{p}{,} \PYG{n}{new\PYGZus{}extra\PYGZus{}param1}\PYG{p}{,} \PYG{n}{new\PYGZus{}extra\PYGZus{}param2}
+\end{Verbatim}
+
+now f() can be used inside a Ruffus decorator to generate the outputs from inputs, instead of being forced to use a regex for the job.
+
+Cheers,
+Bernie.
+\end{quote}
+
+Leverages built-in Ruffus functionality.
+Don't have to write entire parameter generation from scratch.
+\begin{itemize}
+\item {}
+Gets passed an iterator where you can do a for loop to get input parameters / a flattened list of files
+
+\item {}
+Other parameters are forwarded as is
+
+\item {}
+The duty of the function is to \code{yield} input, output, extra parameters
+
+\end{itemize}
+
+Simple to do but how do we prevent this from being a job-trickling barrier?
+
+Postpone until we have an initial design for job-trickling: Ruffus v.4 ;-(
+\end{quote}
+
+
+\subsection{Planned: Ruffus GUI interface.}
+\label{todo:planned-ruffus-gui-interface}\label{todo:todo-gui}\begin{quote}
+
+Desktop (PyQT or web-based solution?) I'd love to see an svg pipeline picture that I could actually interact with
+\end{quote}
+
+
+\subsection{Planned: Non-decorator / Function interface to Ruffus}
+\label{todo:planned-non-decorator-function-interface-to-ruffus}
+
+\subsection{Planned: Remove intermediate files}
+\label{todo:planned-remove-intermediate-files}\label{todo:todo-intermediate-files}\begin{quote}
+
+Often large intermediate files are produced in the middle of a pipeline which could be
+removed. However, their absence would cause the pipeline to appear out of date. What is
+the best way to solve this?
+
+In gmake, all intermediate files which are not marked \code{.PRECIOUS} are deleted.
+\begin{description}
+\item[{We do not want to manually mark intermediate files for several reasons:}] \leavevmode\begin{itemize}
+\item {}
+The syntax would be horrible and clunky
+
+\item {}
+The gmake distinction between \code{implicit} and \code{explicit} rules is not one we
+would like to impose on Ruffus
+
+\item {}
+Gmake uses statically determined (DAG) dependency trees so it is quite natural and
+easy to prune intermediate paths
+
+\end{itemize}
+
+\end{description}
+
+Our preferred solution should impose little to no semantic load on Ruffus, i.e. it should
+not make it more complex / difficult to use. There are several alternatives we are
+considering:
+\begin{enumerate}
+\item {}
+Have an \textbf{update} mode in which pipeline\_run would ignore missing files and only run tasks with existing, out-of-date files.
+
+\item {}
+Optionally ignore all out-of-date dependencies beyond a specified point in the pipeline
+
+\item {}
+Add a decorator to flag sections of the pipeline where intermediate files can be removed
+
+\end{enumerate}
+
+Option (1) is rather unnerving because it makes inadvertent errors difficult to detect.
+
+Option (2) involves relying on the user of a script to remember the corect chain of dependencies in
+often complicated pipelines. It would be advised to keep a flowchart to hand. Again,
+the chances of error are much greater.
+
+Option (3) springs from the observation by Andreas Heger that parts of a pipeline with
+disposable intermediate files can usually be encapsulated as an autonomous section.
+Within this subpipeline, all is well provided that the outputs of the last task are complete
+and up-to-date with reference to the inputs of the first task. Intermediate files
+could be removed with impunity.
+
+The suggestion is that these autonomous subpipelines could be marked out using the Ruffus
+decorator syntax:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} First task in autonomous subpipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{who.isit}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{its.me}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Several intermediate tasks}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{subpipeline\PYGZus{}task1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.me}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.her}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2\PYGZus{}etc}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Final task}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@sub\PYGZus{}pipeline}\PYG{p}{(}\PYG{n}{subpipeline\PYGZus{}task1}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{subpipeline\PYGZus{}task1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.her}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.you}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{final\PYGZus{}task}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\textbf{@sub\_pipeline} marks out all tasks between \code{first\_task} and \code{final\_task} and
+intermediate files such as \code{"its.me"}, \code{"its.her} can be deleted. The pipeline will
+only run if \code{"its.you"} is missing or out-of-date compared with \code{"who.isit"}.
+
+Over the next few Ruffus releases we will see if this is a good design, and whether
+better keyword can be found than \textbf{@sub\_pipeline} (candidates include \textbf{@shortcut}
+and \textbf{@intermediate})
+\end{quote}
+
+
+\subsection{Planned: @retry\_on\_error(NUM\_OF\_RETRIES)}
+\label{todo:planned-retry-on-error-num-of-retries}\label{todo:todo-retry}
+
+\subsection{Planned: Clean up}
+\label{todo:planned-clean-up}\label{todo:todo-cleanup}\begin{quote}
+
+The plan is to store the files and directories created via
+a standard interface.
+
+The placeholders for this are a function call \code{register\_cleanup}.
+
+Jobs can specify the files they created and which need to be
+deleted by returning a list of file names from the job function.
+
+So:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+raise Exception = Error
+
+return False = halt pipeline now
+
+return string / list of strings = cleanup files/directories later
+
+return anything else = ignored
+\end{Verbatim}
+
+The cleanup file/directory store interface can be connected to
+a text file or a database.
+
+The cleanup function would look like this:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+pipeline\_cleanup(cleanup\_log("../cleanup.log"), [instance ="october19th" ])
+pipeline\_cleanup(cleanup\_msql\_db("user", "password", "hash\_record\_table"))
+\end{Verbatim}
+
+The parameters for where and how to store the list of created files could be
+similarly passed to pipeline\_run as an extra parameter:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+pipeline\_run(cleanup\_log("../cleanup.log"), [instance ="october19th" ])
+pipeline\_run(cleanup\_msql\_db("user", "password", "hash\_record\_table"))
+\end{Verbatim}
+
+where \emph{cleanup\_log} and \emph{cleanup\_msql\_db} are classes which have functions for
+\begin{enumerate}
+\item {}
+storing file
+
+\item {}
+retrieving file
+
+\item {}
+clearing entries
+
+\end{enumerate}
+\begin{itemize}
+\item {}
+Files would be deleted in reverse order, and directories after files.
+
+\item {}
+By default, only empty directories would be removed.
+
+But this could be changed with a \code{-{-}forced\_remove\_dir} option
+
+\item {}
+An \code{-{-}remove\_empty\_parent\_directories} option would be
+supported by \href{http://docs.python.org/library/os.html\#os.removedirs}{os.removedirs(path)}.
+
+\end{itemize}
+\end{quote}
+
+
+\section{Implementation Tips}
+\label{implementation_notes:implementation-tips}\label{implementation_notes::doc}
+
+\subsection{Release}
+\label{implementation_notes:release}\begin{itemize}
+\item {}
+Change \code{ruffus\_version.py}
+
+\item {}
+Rebuild pdf and copy it to \code{doc/static\_data}
+\begin{quote}
+
+cd doc
+make latexpdf
+cp \_build/latex/ruffus.pdf static\_data
+\end{quote}
+
+\item {}
+Rebuild documentation:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+make htmlsync
+\end{Verbatim}
+
+\item {}
+tag git with, for example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+git tag -a v2.5RC -m "Version 2.5 Release Candidate"
+\end{Verbatim}
+
+\item {}
+Upload to pypi:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+python setup.py sdist --format=gztar upload
+\end{Verbatim}
+
+\end{itemize}
+
+
+\subsection{dbdict.py}
+\label{implementation_notes:dbdict-py}\begin{quote}
+
+This is an sqlite backed dictionary originally written by Jacob Sondergaard and
+contributed by Jake Biesinger who added automatic pickling of python objects.
+
+The pickling code was refactored out by Leo Goodstadt into separate functions as
+part of the preparation to make Ruffus python3 ready.
+
+Python original saved (pickled) objects as 7 bit ASCII strings. Later formats
+(protocol = -1 is the latest format) uses 8 bit strings and are rather more efficient.
+
+These then need to be saved as BLOBs to sqlite3 rather than normal strings. We
+can signal this by wrapping the pickled string in a object providing a ``buffer interface''.
+This is \code{buffer} in python2.6/2.7 and \code{memoryview} in python3.
+
+\href{http://bugs.python.org/issue7723}{http://bugs.python.org/issue7723} suggests there is no portable python2/3 way to write
+blobs to Sqlite without these two incompatible wrappers.
+This would require conditional compilation:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{if} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{hexversion} \PYG{o}{\PYGZgt{}}\PYG{o}{=} \PYG{l+m+mh}{0x03000000}\PYG{p}{:}
+ \PYG{n}{value} \PYG{o}{=} \PYG{n}{memoryview}\PYG{p}{(}\PYG{n}{pickle}\PYG{o}{.}\PYG{n}{dumps}\PYG{p}{(}\PYG{n}{value}\PYG{p}{,} \PYG{n}{protocol} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{else}\PYG{p}{:}
+ \PYG{n}{value} \PYG{o}{=} \PYG{n+nb}{buffer}\PYG{p}{(}\PYG{n}{pickle}\PYG{o}{.}\PYG{n}{dumps}\PYG{p}{(}\PYG{n}{value}\PYG{p}{,} \PYG{n}{protocol} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+
+Despite the discussion on the bug report, sqlite3.Binary seems to work.
+We shall see if this is portable to python3.
+\end{quote}
+
+
+\subsection{how to write new decorators}
+\label{implementation_notes:how-to-write-new-decorators}\begin{quote}
+
+New placeholder class. E.g. for \code{@new\_deco}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{class} \PYG{n+nc}{new\PYGZus{}deco}\PYG{p}{(}\PYG{n}{task\PYGZus{}decorator}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Add to list of action names and ids:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{action\PYGZus{}names} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{unspecified}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{o}{.}\PYG{o}{.}\PYG{o}{.}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task\PYGZus{}new\PYGZus{}deco}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+\PYG{n}{action\PYGZus{}task\PYGZus{}new\PYGZus{}deco} \PYG{o}{=} \PYG{l+m+mi}{15}
+\end{Verbatim}
+
+Add function:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{def} \PYG{n+nf}{task\PYGZus{}transform} \PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{,} \PYG{n}{orig\PYGZus{}args}\PYG{p}{)}\PYG{p}{:}
+\end{Verbatim}
+
+Add documentation to:
+\begin{itemize}
+\item {}
+decorators/NEW\_DECORATOR.rst
+
+\item {}
+decorators/decorators.rst
+
+\item {}
+\_templates/layout.html
+
+\item {}
+manual
+
+\end{itemize}
+\end{quote}
+
+
+\section{Implementation notes}
+\label{implementation_notes:implementation-notes}
+N.B. Remember to cite Jake Biesinger and see if he is interested to be a co-author if we ever resubmit the drastically changed version...
+He contributed checkpointing, travis and tox etc.
+
+
+\subsection{\texttt{Ctrl-C} handling}
+\label{implementation_notes:ctrl-c-handling}\label{implementation_notes:todo-misfeatures}\begin{quote}
+
+Pressing \code{Ctrl-C} left dangling process in Ruffus 2.4 because \code{KeyboardInterrupt} does not play nice with python \code{multiprocessing.Pool}
+See \href{http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool/1408476\#1408476}{http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool/1408476\#1408476}
+
+\href{http://bryceboe.com/2012/02/14/python-multiprocessing-pool-and-keyboardinterrupt-revisited/}{http://bryceboe.com/2012/02/14/python-multiprocessing-pool-and-keyboardinterrupt-revisited/} provides a reimplementation of Pool which
+however only works when you have a fixed number of jobs which should then run in parallel to completion. Ruffus is considerably more
+complicated because we have a variable number of jobs completing and being submitted into the job queue at any one time. Think
+of tasks stalling waiting for the dependent tasks to complete and then all the jobs of the task being released onto the queue
+
+The solution is
+\begin{enumerate}
+\item {}
+Use a \code{timeout} parameter when using \code{IMapIterator.next(timeout=None)} to iterate through \code{pool.imap\_unordered} because only timed \code{condition} s can be interruptible by signals...!!
+
+\item {}
+This involves rewriting the \code{for} loop manually as a \code{while} loop
+
+\item {}
+We use a timeout of \code{99999999}, i.e. 3 years, which should be enough for any job to complete...
+
+\item {}
+Googling after the fact, it looks like the galaxy guys (cool dudes or what) have written similar \href{https://galaxy-dist.readthedocs.org/en/latest/\_modules/galaxy/objectstore/s3\_multipart\_upload.html}{code}
+
+\item {}
+\code{next()} for normal iterators do not take \code{timeout} as an extra parameter so we have to wrap next in a conditional :-(. The galaxy guys do a \href{http://en.wikipedia.org/wiki/Shim\_(computing)}{shim} around \code{next()} but that is as much obsfucation as a simple if...
+
+\item {}
+After jobs are interrupted by a signal, we rethrow with our own exception because we want something that inherits from \code{Exception} unlike \code{KeyboardInterrupt}
+
+\item {}
+When a signal happens, we need to immediately stop \code{feed\_job\_params\_to\_process\_pool()} from sending more parameters into the job queue (\code{parameter\_q})
+We use a proxy to a \code{multiprocessing.Event} (via \code{syncmanager.Event()}). When \code{death\_event} is set, all further processing stops...
+
+\item {}
+We also signal that all jobs should finish by putting \code{all\_tasks\_complete()} into \code{parameter\_q} but only \code{death\_event} prevents jobs already in the queue from going through
+
+\item {}
+Ater signalling, some of the child processes appear to be dead by the time we start cleaning up. \code{pool.terminate()} sometimes tries and fails to
+re-connect to the the \code{death\_event} proxy via sockets and throws an exception. We should really figure out a better solution but in the meantime
+wrapping it in a \code{try / except} allows a clean exit.
+
+\item {}
+If a vanilla exception is raised without multiprocessing running, we still need to first save the exception in \code{job\_errors} (even if it is just one) before
+cleaning up, because the cleaning up process may lead to further (ignored) exceptions which would overwrite the current exception when we need to rethrow it
+
+\end{enumerate}
+
+Exceptions thrown in the middle of a multiprocessing / multithreading job appear to be handled gracefully.
+
+For drmaa jobs, \code{qdel} may still be necessary.
+\end{quote}
+
+
+\subsection{Python3 compatability}
+\label{implementation_notes:python3-compatability}\begin{quote}
+
+Required extensive changes especially in unit test code.
+
+Changes:
+\begin{enumerate}
+\item {}
+\code{sort} in python3 does not order mixed types, i.e. \code{int()}, \code{list()} and \code{str()} are incommensurate
+\begin{itemize}
+\item {}
+In \code{task.get\_output\_files (...)}, sort after conversion to string
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{sorted}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{output\PYGZus{}filenames}\PYG{p}{,} \PYG{n}{key} \PYG{o}{=} \PYG{k}{lambda} \PYG{n}{x}\PYG{p}{:} \PYG{n+nb}{str}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+
+\item {}
+In \code{file\_name\_parameters.py}: \code{collate\_param\_factory (...)}, \code{sort} after conversion to string, then \code{groupby} without string conversion. This is
+because we can't guarantee that two different objects do not have the same string representation. But \code{groupby} requires that similar things are adjacent...
+
+In other words, \code{groupby} is a refinement of \code{sorted}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{for} \PYG{n}{output\PYGZus{}extra\PYGZus{}params}\PYG{p}{,} \PYG{n}{grouped\PYGZus{}params} \PYG{o+ow}{in} \PYG{n}{groupby}\PYG{p}{(}\PYG{n+nb}{sorted}\PYG{p}{(}\PYG{n}{io\PYGZus{}params\PYGZus{}iter}\PYG{p}{,} \PYG{n}{key} \PYG{o}{=} \PYG{n}{get\PYGZus{}output\PYGZus{}extras\PYGZus{}str}\PYG{p}{)}\PYG{p}{,} \PYG{n}{key} \PYG{o}{=} \PYG{n}{get\PYGZus{}output\PYGZus{}extras}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{itemize}
+
+\item {}
+\code{print()} is a function
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{\PYGZus{}\PYGZus{}future\PYGZus{}\PYGZus{}} \PYG{k+kn}{import} \PYG{n}{print\PYGZus{}function}
+\end{Verbatim}
+
+\item {}
+\code{items()} only returns a list in python2. Rewrite \code{dict.iteritems()} whenever this might cause a performance bottleneck
+
+\item {}
+\code{zip} and \code{map} return iterators. Conditionally import in python2
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{k}{if} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{hexversion} \PYG{o}{\PYGZlt{}} \PYG{l+m+mh}{0x03000000}\PYG{p}{:}
+ \PYG{k+kn}{from} \PYG{n+nn}{future\PYGZus{}builtins} \PYG{k+kn}{import} \PYG{n+nb}{zip}\PYG{p}{,} \PYG{n+nb}{map}
+\end{Verbatim}
+
+\item {}
+\code{cPickle-\textgreater{}pickle} \code{CStringIO-\textgreater{}io} need to be conditionally imported
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{try}\PYG{p}{:}
+ \PYG{k+kn}{import} \PYG{n+nn}{StringIO} \PYG{k+kn}{as} \PYG{n+nn}{io}
+\PYG{k}{except}\PYG{p}{:}
+ \PYG{k+kn}{import} \PYG{n+nn}{io} \PYG{k+kn}{as} \PYG{n+nn}{io}
+\end{Verbatim}
+
+\item {}
+\code{map} code can be changed to list comprehensions. Use \code{2to3} to do heavy lifting
+
+\item {}
+All normal strings are unicode in python3. Have to use \code{bytes} to support 8-bit char arrays.
+Normally, this means that \code{str} ``just works''. However, to provide special handling of
+both 8-bit and unicode strings in python2, we often need to check for \code{isinstance(xxx, basestring)}.
+
+We need to conditionally define:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{if} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{hexversion} \PYG{o}{\PYGZgt{}}\PYG{o}{=} \PYG{l+m+mh}{0x03000000}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} everything is unicode in python3}
+ \PYG{n}{path\PYGZus{}str\PYGZus{}type} \PYG{o}{=} \PYG{n+nb}{str}
+\PYG{k}{else}\PYG{p}{:}
+ \PYG{n}{path\PYGZus{}str\PYGZus{}type} \PYG{o}{=} \PYG{n+nb}{basestring}
+
+\PYG{c}{\PYGZsh{} further down...}
+\PYG{k}{if} \PYG{n+nb}{isinstance}\PYG{p}{(}\PYG{n}{compiled\PYGZus{}regex}\PYG{p}{,} \PYG{n}{path\PYGZus{}str\PYGZus{}type}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{enumerate}
+\end{quote}
+
+
+\subsection{Refactoring: parameter handling}
+\label{implementation_notes:refactoring-parameter-handling}\begin{quote}
+\begin{description}
+\item[{Though the code is still split in a not very sensible way between \code{ruffus\_utility.py}, \code{file\_name\_parameters.py} and \code{task.py},}] \leavevmode
+some rationalisation has taken place, and comments added so further refactoring can be made more easily.
+
+\end{description}
+
+Common code for:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{file\PYGZus{}name\PYGZus{}parameters}\PYG{o}{.}\PYG{n}{split\PYGZus{}ex\PYGZus{}param\PYGZus{}factory}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{file\PYGZus{}name\PYGZus{}parameters}\PYG{o}{.}\PYG{n}{transform\PYGZus{}param\PYGZus{}factory}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{file\PYGZus{}name\PYGZus{}parameters}\PYG{o}{.}\PYG{n}{collate\PYGZus{}param\PYGZus{}factory}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+has been moved to \code{file\_name\_parameters.py.yield\_io\_params\_per\_job()}
+
+unit tests added to \code{test\_file\_name\_parameters.py} and \code{test\_ruffus\_utility.py}
+\end{quote}
+
+
+\subsection{\texttt{formatter}}
+\label{implementation_notes:formatter}\begin{quote}
+
+\code{get\_all\_paths\_components(paths, regex\_str)} in \code{ruffus\_utility.py}
+
+Input files names are first squished into a flat list of files.
+\code{get\_all\_paths\_components()} returns both the regular expression matches and the break down of the path.
+
+In case of name clashes, the classes with higher priority override:
+\begin{quote}
+\begin{enumerate}
+\item {}
+Captures by name
+
+\item {}
+Captures by index
+
+\item {} \begin{description}
+\item[{Path components:}] \leavevmode
+`ext' = extension with dot
+`basename' = file name without extension
+`path' = path before basename, not ending with slash
+`subdir' = list of directories starting with the most nested and ending with the root (if normalised)
+`subpath' = list of `path' with successive directories removed starting with the most nested and ending with the root (if normalised)
+
+\end{description}
+
+\end{enumerate}
+
+E.g. \code{name = '/a/b/c/sample1.bam'}, \code{formatter=r"(.*)(?P\textless{}id\textgreater{}\textbackslash{}d+)\textbackslash{}.(.+)")} returns:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+m+mi}{0}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/a/b/c/sample1.bam}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{o}{/}\PYG{o}{/} \PYG{n}{Entire} \PYG{n}{match} \PYG{n}{captured} \PYG{n}{by} \PYG{n}{index}
+\PYG{l+m+mi}{1}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/a/b/c/sample}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{o}{/}\PYG{o}{/} \PYG{n}{captured} \PYG{n}{by} \PYG{n}{index}
+\PYG{l+m+mi}{2}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{bam}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{o}{/}\PYG{o}{/} \PYG{n}{captured} \PYG{n}{by} \PYG{n}{index}
+\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{id}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZsq{}} \PYG{o}{/}\PYG{o}{/} \PYG{n}{captured} \PYG{n}{by} \PYG{n}{name}
+\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ext}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{.bam}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{subdir}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{c}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{subpath}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/a/b/c}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/a/b}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/a}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{path}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{/a/b/c}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{basename}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{sample1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+\end{Verbatim}
+\end{quote}
+
+The code is in \code{ruffus\_utility.py}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{results} \PYG{o}{=} \PYG{n}{get\PYGZus{}all\PYGZus{}paths\PYGZus{}components}\PYG{p}{(}\PYG{n}{paths}\PYG{p}{,} \PYG{n}{regex\PYGZus{}str}\PYG{p}{)}
+\PYG{n}{string}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{n}{results}\PYG{p}{[}\PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+All the magic is hidden inside black boxes \code{filename\_transform} classes:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{class} \PYG{n+nc}{t\PYGZus{}suffix\PYGZus{}filename\PYGZus{}transform}\PYG{p}{(}\PYG{n}{t\PYGZus{}filename\PYGZus{}transform}\PYG{p}{)}\PYG{p}{:}
+\PYG{k}{class} \PYG{n+nc}{t\PYGZus{}regex\PYGZus{}filename\PYGZus{}transform}\PYG{p}{(}\PYG{n}{t\PYGZus{}filename\PYGZus{}transform}\PYG{p}{)}\PYG{p}{:}
+\PYG{k}{class} \PYG{n+nc}{t\PYGZus{}format\PYGZus{}filename\PYGZus{}transform}\PYG{p}{(}\PYG{n}{t\PYGZus{}filename\PYGZus{}transform}\PYG{p}{)}\PYG{p}{:}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{\texttt{formatter()}: \texttt{regex()} and \texttt{suffix()}}
+\label{implementation_notes:formatter-regex-and-suffix}\begin{quote}
+
+The previous behaviour with regex() where mismatches fail even if no substitution is made is retained by the use of \code{re.subn()}.
+This is a corner case but I didn't want user code to break
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} filter on \PYGZdq{}.txt\PYGZdq{}}
+\PYG{n}{input\PYGZus{}filenames} \PYG{o}{=} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.wrong}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+\PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.txt)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} fails, no substitution possible}
+\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{c}{\PYGZsh{} fails anyway even through regular expression matches not referenced...}
+\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{output.filename}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{@product()}
+\label{implementation_notes:product}\begin{quote}
+\begin{itemize}
+\item {}
+Use combinatoric generators from itertools and keep that naming scheme
+
+\item {}
+Put all new generators in an \code{combinatorics} submodule namespace to avoid breaking user code. (They can imported if necessary.)
+
+\item {}
+test code in test/test\_combinatorics.py
+
+\item {}
+The \code{itertools.product(repeat)} parameter doesn't make sense for Ruffus and will not be used
+
+\item {}
+Flexible number of pairs of \code{task} / \code{glob} / file names + \code{formatter()}
+
+\item {}
+Only \code{formatter({[}OPTIONAl\_REGEX{]})} provides the necessary flexibility to construct the output so we won't bother with suffix and regex
+
+\item {}
+Similar to \code{@transform} but with extra level of nested-ness
+
+\end{itemize}
+\begin{description}
+\item[{Retain same code for \code{@product} and \code{@transform} by adding an additional level of indirection:}] \leavevmode\begin{itemize}
+\item {}
+generator wrap around \code{get\_strings\_in\_nested\_sequence} to convert nested input parameters either to a single flat list of file names or to nested lists of file names
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{file\PYGZus{}name\PYGZus{}parameters}\PYG{o}{.}\PYG{n}{input\PYGZus{}param\PYGZus{}to\PYGZus{}file\PYGZus{}name\PYGZus{}list} \PYG{p}{(}\PYG{n}{input\PYGZus{}params}\PYG{p}{)}
+\PYG{n}{file\PYGZus{}name\PYGZus{}parameters}\PYG{o}{.}\PYG{n}{list\PYGZus{}input\PYGZus{}param\PYGZus{}to\PYGZus{}file\PYGZus{}name\PYGZus{}list} \PYG{p}{(}\PYG{n}{input\PYGZus{}params}\PYG{p}{)}
+\end{Verbatim}
+
+\item {}
+\code{t\_file\_names\_transform} class which stores a list of regular expressions, one for each \code{formatter()} object corresponding to a single set of input parameters
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{t\PYGZus{}formatter\PYGZus{}file\PYGZus{}names\PYGZus{}transform}
+\PYG{n}{t\PYGZus{}nested\PYGZus{}formatter\PYGZus{}file\PYGZus{}names\PYGZus{}transform}
+\end{Verbatim}
+
+\item {}
+string substitution functions which will apply a list of \code{formatter} changes
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{ruffus}\PYG{o}{.}\PYG{n}{utility}\PYG{o}{.}\PYG{n}{t\PYGZus{}formatter\PYGZus{}replace}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{ruffus}\PYG{o}{.}\PYG{n}{utility}\PYG{o}{.}\PYG{n}{t\PYGZus{}nested\PYGZus{}formatter\PYGZus{}replace}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+\item {}
+\code{ruffus\_uilility.swap\_doubly\_nested\_order()} makes the syntax / implementation very orthogonal
+
+\end{itemize}
+
+\end{description}
+\end{quote}
+
+
+\subsection{\texttt{@permutations(...),} \texttt{@combinations(...),} \texttt{@combinations\_with\_replacement(...)}}
+\label{implementation_notes:permutations-combinations-combinations-with-replacement}\begin{quote}
+
+Similar to \code{@product} extra level of nested-ness is self versus self
+\begin{description}
+\item[{Retain same code for \code{@product}}] \leavevmode\begin{itemize}
+\item {}
+forward to a sinble \code{file\_name\_parameters.combinatorics\_param\_factory()}
+
+\item {}
+use \code{combinatorics\_type} to dispatch to \code{combinatorics.permutations}, \code{combinatorics.combinations} and \code{combinatorics.combinations\_with\_replacement}
+
+\item {}
+use \code{list\_input\_param\_to\_file\_name\_list} from \code{file\_name\_parameters.product\_param\_factory()}
+
+\end{itemize}
+
+\end{description}
+\end{quote}
+
+
+\subsection{drmaa alternatives}
+\label{implementation_notes:drmaa-alternatives}\begin{quote}
+
+Alternative, non-drmaa polling code at
+
+\href{https://github.com/bjpop/rubra/blob/master/rubra/cluster\_job.py}{https://github.com/bjpop/rubra/blob/master/rubra/cluster\_job.py}
+\end{quote}
+
+
+\subsection{Task completion monitoring}
+\label{implementation_notes:task-completion-monitoring}
+
+\subsubsection{How easy is it to abstract out the database?}
+\label{implementation_notes:how-easy-is-it-to-abstract-out-the-database}\begin{quote}
+\begin{itemize}
+\item {} \begin{description}
+\item[{The database is Jacob Sondergaard's \code{dbdict} which is a nosql / key-value store wrapper around sqlite}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{job\PYGZus{}history} \PYG{o}{=} \PYG{n}{dbdict}\PYG{o}{.}\PYG{n}{open}\PYG{p}{(}\PYG{n}{RUFFUS\PYGZus{}HISTORY\PYGZus{}FILE}\PYG{p}{,} \PYG{n}{picklevalues}\PYG{o}{=}\PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\item {}
+The key is the output file name, so it is important not to confuse Ruffus by having different tasks generate the same output file!
+
+\item {}
+Is it possible to abstract this so that \textbf{jobs} get timestamped as well?
+
+\item {}
+If we should ever want to abstract out \code{dbdict}, we need to have a similar key-value store class,
+and make sure that a single instance of \code{dbdict} is used through \code{pipeline\_run} which is passed up
+and down the function call chain. \code{dbdict} would then be drop-in replaceable by our custom (e.g. flat-file-based) dbdict alternative.
+
+\end{itemize}
+
+To peek into the database:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nv}{\PYGZdl{} }sqlite3 .ruffus\PYGZus{}history.sqlite
+sqlite\PYGZgt{} .tables
+data
+sqlite\PYGZgt{} .schema data
+CREATE TABLE data \PYG{o}{(}key PRIMARY KEY,value\PYG{o}{)};
+sqlite\PYGZgt{} \PYG{k}{select }key from data order by key;
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Can we query the database, get Job history / stats?}
+\label{implementation_notes:can-we-query-the-database-get-job-history-stats}\begin{quote}
+
+Yes, if we write a function to read and dump the entire database but this is only useful with timestamps and task names. See below
+\end{quote}
+
+
+\subsubsection{What are the run time performance implications?}
+\label{implementation_notes:what-are-the-run-time-performance-implications}\begin{quote}
+
+Should be fast: a single db connection is created and used inside \code{pipeline\_run}, \code{pipeline\_printout}, \code{pipeline\_printout\_graph}
+\end{quote}
+
+
+\subsubsection{Avoid pauses between tasks}
+\label{implementation_notes:avoid-pauses-between-tasks}\begin{quote}
+
+Allows Ruffus to avoid adding an extra 1 second pause between tasks to guard against file systems with low timestamp granularity.
+\begin{itemize}
+\item {}
+If the local file time looks to be in sync with the underlying file system, saved system time is used instead of file timestamps
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{\texttt{@mkdir(...),}}
+\label{implementation_notes:mkdir}\begin{itemize}
+\item {}
+\code{mkdir} continues to work seamlessly inside \code{@follows}) but also as its own decorator \code{@mkdir} due to the original happy orthogonal design
+
+\item {}
+fixed bug in checking so that Ruffus does't blow up if non strings are in the output (number...)
+
+\item {}
+note: adding the decorator to a previously undecorated function might have unintended consequences. The undecorated function turns into a zombie.
+
+\item {}
+fixed ugly bug in \code{pipeline\_printout} for printing single line output
+
+\item {}
+fixed description and printout indent
+
+\end{itemize}
+
+
+\section{FAQ}
+\label{faq:glob}\label{faq:faq}\label{faq::doc}
+
+\subsection{Citations}
+\label{faq:citations}
+
+\subsubsection{Q. How should \emph{Ruffus} be cited in academic publications?}
+\label{faq:q-how-should-ruffus-be-cited-in-academic-publications}\begin{quote}
+
+The official publication describing the original version of \emph{Ruffus} is:
+\begin{quote}
+
+\href{http://bioinformatics.oxfordjournals.org/content/early/2010/09/16/bioinformatics.btq524}{Leo Goodstadt (2010)} : \textbf{Ruffus: a lightweight Python library for computational pipelines.} \emph{Bioinformatics} 26(21): 2778-2779
+\end{quote}
+\end{quote}
+
+
+\subsection{Good practices}
+\label{faq:good-practices}
+
+\subsubsection{Q. What is the best way of keeping my data and workings separate?}
+\label{faq:q-what-is-the-best-way-of-keeping-my-data-and-workings-separate}\begin{quote}
+
+It is good practice to run your pipeline in a temporary, ``working'' directory away from your original data.
+
+The first step of your pipeline might be to make softlinks to your original data in your working directory.
+This is example (relatively paranoid) code to do just this:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{k}{def} \PYG{n+nf}{re\PYGZus{}symlink} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Helper function: relinks soft symbolic link if necessary}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{} Guard agains soft linking to oneself: Disastrous consequences of deleting the original files!!}
+ \PYG{k}{if} \PYG{n}{input\PYGZus{}file} \PYG{o}{==} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Warning: No symbolic link made. You are using the original data directory as the working directory.}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{return}
+ \PYG{c}{\PYGZsh{} Soft link already exists: delete for relink?}
+ \PYG{k}{if} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{lexists}\PYG{p}{(}\PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do not delete or overwrite real (non\PYGZhy{}soft link) file}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{islink}\PYG{p}{(}\PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ exists and is not a link}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}
+ \PYG{k}{try}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}
+ \PYG{k}{except}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Can}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{t unlink }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}\PYG{p}{)}
+ \PYG{k}{with} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{os.symlink(}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{, }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{)}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} symbolic link relative to original directory so that the entire path}
+ \PYG{c}{\PYGZsh{} can be moved around with breaking everything}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{symlink}\PYG{p}{(} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{relpath}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{)}\PYG{p}{,}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{dirname}\PYG{p}{(}\PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}\PYG{p}{,} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{)}
+
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} First task should soft link data to working directory}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nd}{@jobs\PYGZus{}limit}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{)}
+ \PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{n}{options}\PYG{o}{.}\PYG{n}{working\PYGZus{}dir}\PYG{p}{)}
+ \PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{input\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} move to working directory}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{options}\PYG{o}{.}\PYG{n}{working\PYGZus{}dir}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0]\PYGZcb{}\PYGZob{}ext[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logging\PYGZus{}mutex}
+ \PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{soft\PYGZus{}link\PYGZus{}inputs\PYGZus{}to\PYGZus{}working\PYGZus{}directory} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Make soft link in working directory}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{k}{with} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{info}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Linking files }\PYG{l+s+si}{\PYGZpc{}(input\PYGZus{}file)s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}(soft\PYGZus{}link\PYGZus{}name)s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{re\PYGZus{}symlink}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{soft\PYGZus{}link\PYGZus{}name}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Q. What is the best way of handling data in file pairs (or triplets etc.)}
+\label{faq:q-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc}\label{faq:faq-paired-files}\begin{quote}
+
+In Bioinformatics, DNA data often consists of only the nucleotide sequence at the two ends of larger fragments.
+The \href{http://www.illumina.com/technology/next-generation-sequencing/paired-end-sequencing\_assay.ilmn}{paired\_end} or
+\href{http://en.wikipedia.org/wiki/Shotgun\_sequencing\#Whole\_genome\_shotgun\_sequencing}{mate pair} data frequently
+consists of of file pairs with conveniently related names such as ``\emph{.R1.fastq'' and ``}.R2.fastq''.
+
+At some point in data pipeline, these file pairs or triplets must find each other and be analysed in the same job.
+
+Provided these file pairs or triplets are named consistently, an easiest way to regroup them is to use the
+Ruffus {\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-collate]{\emph{@collate}}} decorator. For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{original\PYGZus{}data\PYGZus{}files}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} match file name up to the \PYGZdq{}R1.fastq.gz\PYGZdq{}}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{([\PYGZca{}/]+)R[12].fastq.gz\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Create output parameter supplied to next task}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}.sam}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{handle\PYGZus{}paired\PYGZus{}end}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}paired\PYGZus{}files}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} check that we really have a pair of two files not an orphaned singleton}
+ \PYG{k}{if} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{)} \PYG{o}{!=} \PYG{l+m+mi}{2}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{One of read pairs }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ missing}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,}\PYG{p}{)}\PYG{p}{)}
+
+ \PYG{c}{\PYGZsh{} do stuff here}
+\end{Verbatim}
+
+This (incomplete, untested) {\hyperref[examples/paired_end_data.py:faq-paired-files-code]{\emph{example code}}} shows what this would look like \emph{in vivo}.
+\end{quote}
+
+
+\subsection{General}
+\label{faq:general}
+
+\subsubsection{Q. \emph{Ruffus} won't create dependency graphs}
+\label{faq:q-ruffus-won-t-create-dependency-graphs}\begin{quote}
+
+A. You need to have installed \code{dot} from \href{http://www.graphviz.org/}{Graphviz} to produce
+pretty flowcharts likes this:
+\begin{quote}
+
+\includegraphics{pretty_flowchart.png}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Q. \emph{Ruffus} seems to be hanging in the same place}
+\label{faq:q-ruffus-seems-to-be-hanging-in-the-same-place}\begin{quote}
+
+A. If \emph{ruffus} is interrupted, for example, by a Ctrl-C,
+you will often find the following lines of code highlighted:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+File "build/bdist.linux-x86\_64/egg/ruffus/task.py", line 1904, in pipeline\_run
+File "build/bdist.linux-x86\_64/egg/ruffus/task.py", line 1380, in run\_all\_jobs\_in\_task
+File "/xxxx/python2.6/multiprocessing/pool.py", line 507, in next
+ self.\_cond.wait(timeout)
+File "/xxxxx/python2.6/threading.py", line 237, in wait
+ waiter.acquire()
+\end{Verbatim}
+
+This is \emph{not} where \emph{ruffus} is hanging but the boundary between the main programme process
+and the sub-processes which run \emph{ruffus} jobs in parallel.
+
+This is naturally where broken execution threads get washed up onto.
+\end{quote}
+
+
+\subsubsection{Q. Regular expression substitutions don't work}
+\label{faq:q-regular-expression-substitutions-don-t-work}\begin{quote}
+
+A. If you are using the special regular expression forms \code{"\textbackslash{}1"}, \code{"\textbackslash{}2"} etc.
+to refer to matching groups, remember to `escape' the subsitution pattern string.
+The best option is to use \href{http://docs.python.org/library/re.html}{`raw' python strings}.
+For example:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1\PYGZus{}substitutes}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2correctly}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{3four}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{4times}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+\end{quote}
+
+Ruffus will throw an exception if it sees an unescaped \code{"\textbackslash{}1"} or \code{"\textbackslash{}2"} in a file name.
+\end{quote}
+
+
+\subsubsection{Q. How to force a pipeline to appear up to date?}
+\label{faq:q-how-to-force-a-pipeline-to-appear-up-to-date}\begin{quote}
+
+\emph{I have made a trivial modification to one of my data files and now Ruffus wants to rerun my month long pipeline. How can I convince Ruffus that everything is fine and to leave things as they are?}
+
+The standard way to do what you are trying to do is to touch all the files downstream...
+That way the modification times of your analysis files would postdate your existing files.
+You can do this manually but Ruffus also provides direct support:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run} \PYG{p}{(}\PYG{n}{touch\PYGZus{}files\PYGZus{}only} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\end{Verbatim}
+
+pipeline\_run will run your script normally stepping over up-to-date tasks and starting
+with jobs which look out of date. However, after that, none of your pipeline task functions
+will be called, instead, each non-up-to-date file is \href{https://en.wikipedia.org/wiki/Touch\_(Unix)}{touch}-ed in
+turn so that the file modification dates follow on successively.
+
+See the documentation for {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run()}}}
+
+It is even simpler if you are using the new Ruffus.cmdline support from version 2.4. You can just type
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+your script \PYGZhy{}\PYGZhy{}touch\PYGZus{}files\PYGZus{}only \PYG{o}{[}\PYGZhy{}\PYGZhy{}other\PYGZus{}options\PYGZus{}of\PYGZus{}your\PYGZus{}own\PYGZus{}etc\PYG{o}{]}
+\end{Verbatim}
+\end{quote}
+
+See {\hyperref[tutorials/new_tutorial/command_line:new-manual-cmdline]{\emph{command line}}} documentation.
+\end{quote}
+
+
+\subsubsection{Q. How can I use my own decorators with Ruffus?}
+\label{faq:q-how-can-i-use-my-own-decorators-with-ruffus}
+(Thanks to Radhouane Aniba for contributing to this answer.)
+\begin{enumerate}
+\item {}
+With care! If the following two points are observed:
+
+\end{enumerate}
+
+
+\paragraph{1. Use @wraps from \texttt{functools} or Michele Simionato's decorator module}
+\label{faq:use-wraps-from-functools-or-michele-simionato-s-decorator-module}\begin{quote}
+
+These will automatically forward attributes from the task function correctly:
+\begin{itemize}
+\item {}
+\code{\_\_name\_\_} and \code{\_\_module\_\_} is used to identify functions uniquely in a Ruffus pipeline, and
+
+\item {}
+\code{pipeline\_task} is used to hold per task data
+
+\end{itemize}
+\end{quote}
+
+
+\paragraph{2. Always call Ruffus decorators first before your own decorators.}
+\label{faq:always-call-ruffus-decorators-first-before-your-own-decorators}\begin{quote}
+
+Otherwise, your decorator will be ignored.
+
+So this works:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{prev\PYGZus{}task}\PYG{p}{)}
+\PYG{n+nd}{@custom\PYGZus{}decorator}\PYG{p}{(}\PYG{n}{something}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{test}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This is a bit futile
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} ignore @custom\PYGZus{}decorator}
+\PYG{n+nd}{@custom\PYGZus{}decorator}\PYG{p}{(}\PYG{n}{something}\PYG{p}{)}
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{prev\PYGZus{}task}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{test}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This order dependency is an unfortunate quirk of how python decorators work. The last (rather futile)
+piece of code is equivalent to:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{test} \PYG{o}{=} \PYG{n}{custom\PYGZus{}decorator}\PYG{p}{(}\PYG{n}{something}\PYG{p}{)}\PYG{p}{(}\PYG{n}{ruffus}\PYG{o}{.}\PYG{n}{follows}\PYG{p}{(}\PYG{n}{prev\PYGZus{}task}\PYG{p}{)}\PYG{p}{(}\PYG{n}{test}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+
+Unfortunately, Ruffus has no idea that someone else (\code{custom\_decorator}) is also modifying the \code{test()} function
+after it (\code{ruffus.follows}) has had its go.
+\end{quote}
+
+
+\paragraph{Example decorator:}
+\label{faq:example-decorator}\begin{quote}
+
+Let us look at a decorator to time jobs:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{sys}\PYG{o}{,} \PYG{n+nn}{time}
+\PYG{k}{def} \PYG{n+nf}{time\PYGZus{}func\PYGZus{}call}\PYG{p}{(}\PYG{n}{func}\PYG{p}{,} \PYG{n}{stream}\PYG{p}{,} \PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}prints elapsed time to standard out, or any other file\PYGZhy{}like object with a .write() method.}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{start} \PYG{o}{=} \PYG{n}{time}\PYG{o}{.}\PYG{n}{time}\PYG{p}{(}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} Run the decorated function.}
+ \PYG{n}{ret} \PYG{o}{=} \PYG{n}{func}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} Stop the timer.}
+ \PYG{n}{end} \PYG{o}{=} \PYG{n}{time}\PYG{o}{.}\PYG{n}{time}\PYG{p}{(}\PYG{p}{)}
+ \PYG{n}{elapsed} \PYG{o}{=} \PYG{n}{end} \PYG{o}{\PYGZhy{}} \PYG{n}{start}
+ \PYG{n}{stream}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}\PYGZcb{} took \PYGZob{}\PYGZcb{} seconds}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{n}{func}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{elapsed}\PYG{p}{)}\PYG{p}{)}
+ \PYG{k}{return} \PYG{n}{ret}
+
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{k+kn}{import} \PYG{n+nn}{time}
+
+\PYG{n+nd}{@time\PYGZus{}job}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{First task}\PYG{l+s}{\PYGZdq{}}
+
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{first\PYGZus{}task}\PYG{p}{)}
+\PYG{n+nd}{@time\PYGZus{}job}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{second\PYGZus{}task}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Second task}\PYG{l+s}{\PYGZdq{}}
+
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{second\PYGZus{}task}\PYG{p}{)}
+\PYG{n+nd}{@time\PYGZus{}job}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{final\PYGZus{}task}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Final task}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+What would \code{@time\_job} look like?
+\end{quote}
+
+
+\paragraph{1. Using functools @wraps}
+\label{faq:using-functools-wraps}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{functools}
+\PYG{k}{def} \PYG{n+nf}{time\PYGZus{}job}\PYG{p}{(}\PYG{n}{stream}\PYG{o}{=}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{def} \PYG{n+nf}{actual\PYGZus{}time\PYGZus{}job}\PYG{p}{(}\PYG{n}{func}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nd}{@functools.wraps}\PYG{p}{(}\PYG{n}{func}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{wrapper}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{return} \PYG{n}{time\PYGZus{}func\PYGZus{}call}\PYG{p}{(}\PYG{n}{func}\PYG{p}{,} \PYG{n}{stream}\PYG{p}{,} \PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}
+ \PYG{k}{return} \PYG{n}{wrapper}
+ \PYG{k}{return} \PYG{n}{actual\PYGZus{}time\PYGZus{}job}
+\end{Verbatim}
+\end{quote}
+
+
+\paragraph{2. Using Michele Simionato's decorator module}
+\label{faq:using-michele-simionato-s-decorator-module}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{decorator}
+\PYG{k}{def} \PYG{n+nf}{time\PYGZus{}job}\PYG{p}{(}\PYG{n}{stream}\PYG{o}{=}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{def} \PYG{n+nf}{time\PYGZus{}job}\PYG{p}{(}\PYG{n}{func}\PYG{p}{,} \PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{return} \PYG{n}{time\PYGZus{}func\PYGZus{}call}\PYG{p}{(}\PYG{n}{func}\PYG{p}{,} \PYG{n}{stream}\PYG{p}{,} \PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}
+ \PYG{k}{return} \PYG{n}{decorator}\PYG{o}{.}\PYG{n}{decorator}\PYG{p}{(}\PYG{n}{time\PYGZus{}job}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\paragraph{2. By hand, using a callable object}
+\label{faq:by-hand-using-a-callable-object}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{class} \PYG{n+nc}{time\PYGZus{}job}\PYG{p}{(}\PYG{n+nb}{object}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{def} \PYG{n+nf}{\PYGZus{}\PYGZus{}init\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{,} \PYG{n}{stream}\PYG{o}{=}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{stream} \PYG{o}{=} \PYG{n}{stream}
+ \PYG{k}{def} \PYG{n+nf}{\PYGZus{}\PYGZus{}call\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{,} \PYG{n}{func}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{def} \PYG{n+nf}{inner}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{return} \PYG{n}{time\PYGZus{}func\PYGZus{}call}\PYG{p}{(}\PYG{n}{func}\PYG{p}{,} \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{stream}\PYG{p}{,} \PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} remember to forward \PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}
+ \PYG{n}{inner}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}} \PYG{o}{=} \PYG{n}{func}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}
+ \PYG{n}{inner}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}module\PYGZus{}\PYGZus{}} \PYG{o}{=} \PYG{n}{func}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}module\PYGZus{}\PYGZus{}}
+ \PYG{n}{inner}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}doc\PYGZus{}\PYGZus{}} \PYG{o}{=} \PYG{n}{func}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}doc\PYGZus{}\PYGZus{}}
+ \PYG{k}{if} \PYG{n+nb}{hasattr}\PYG{p}{(}\PYG{n}{func}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{pipeline\PYGZus{}task}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{inner}\PYG{o}{.}\PYG{n}{pipeline\PYGZus{}task} \PYG{o}{=} \PYG{n}{func}\PYG{o}{.}\PYG{n}{pipeline\PYGZus{}task}
+ \PYG{k}{return} \PYG{n}{inner}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Q. Can a task function in a \emph{Ruffus} pipeline be called normally outside of Ruffus?}
+\label{faq:q-can-a-task-function-in-a-ruffus-pipeline-be-called-normally-outside-of-ruffus}\begin{quote}
+
+A. Yes. Most python decorators wrap themselves around a function. However, \emph{Ruffus} leaves the
+original function untouched and unwrapped. Instead, \emph{Ruffus} adds a \code{pipeline\_task} attribute
+to the task function to signal that this is a pipelined function.
+
+This means the original task function can be called just like any other python function.
+\end{quote}
+
+
+\subsubsection{Q. My \emph{Ruffus} tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?}
+\label{faq:q-my-ruffus-tasks-create-two-files-at-a-time-why-is-the-second-one-ignored-in-successive-stages-of-my-pipeline}\begin{quote}
+
+\emph{This is my code:}
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{start.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}output.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{second\PYGZus{}output.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task1}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,} \PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{task2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{3}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+Tasks which will be run:
+
+Task = task1
+ Job = [start.input
+ -\textgreater{}[first\_output.txt, second\_output.txt]]
+
+Task = task2
+ Job = [[first\_output.txt, second\_output.txt]
+ -\textgreater{}first\_output.result]
+
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+\end{Verbatim}
+\end{quote}
+
+A: This code produces a single output of a tuple of 2 files. In fact, you want two
+outputs, each consisting of 1 file.
+
+You want a single job (single input) to be produce multiple outputs (multiple jobs
+in downstream tasks). This is a one-to-many operation which calls for
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{start.input}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{first\PYGZus{}output.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{second\PYGZus{}output.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task1}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{task1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.result}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,} \PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{task2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{3}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+Tasks which will be run:
+
+Task = task1
+ Job = [start.input
+ -\textgreater{}[first\_output.txt, second\_output.txt]]
+
+Task = task2
+ Job = [first\_output.txt
+ -\textgreater{}first\_output.result]
+ Job = [second\_output.txt
+ -\textgreater{}second\_output.result]
+
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Q. How can a \emph{Ruffus} task produce output which goes off in different directions?}
+\label{faq:q-how-can-a-ruffus-task-produce-output-which-goes-off-in-different-directions}\begin{quote}
+
+A. As above, anytime there is a situation which requires a one-to-many operation, you should reach
+for {\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}. The advanced form takes a regular expression, making
+it easier to produce multiple derivatives of the input file. The following example subdivides
+\emph{2} jobs each into \emph{3}, so that the subsequence task will run \emph{2} x \emph{3} = \emph{6} jobs.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{n+nd}{@subdivide}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.+).input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match file prefix}
+ \PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.file\PYGZus{}type1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.file\PYGZus{}type2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.file\PYGZus{}type3}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}task}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,} \PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{split\PYGZus{}task}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.+)}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.test}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{test\PYGZus{}split\PYGZus{}output}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,} \PYG{n}{o}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{test\PYGZus{}split\PYGZus{}output}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+\end{Verbatim}
+
+Each of the original 2 files have been split in three so that test\_split\_output will run
+6 jobs simultaneously.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+Tasks which will be run:
+
+Task = split\_task
+ Job = [1.input\_file -\textgreater{}[1.file\_type1, 1.file\_type2, 1.file\_type3]]
+ Job = [2.input\_file -\textgreater{}[2.file\_type1, 2.file\_type2, 2.file\_type3]]
+
+Task = test\_split\_output
+ Job = [1.file\_type1 -\textgreater{}1.file\_type1.test]
+ Job = [1.file\_type2 -\textgreater{}1.file\_type2.test]
+ Job = [1.file\_type3 -\textgreater{}1.file\_type3.test]
+ Job = [2.file\_type1 -\textgreater{}2.file\_type1.test]
+ Job = [2.file\_type2 -\textgreater{}2.file\_type2.test]
+ Job = [2.file\_type3 -\textgreater{}2.file\_type3.test]
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+\end{Verbatim}
+\end{quote}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Q. Can I call extra code before each job?}
+\label{faq:q-can-i-call-extra-code-before-each-job}\begin{quote}
+
+A. This is easily accomplished by hijacking the process
+for checking if jobs are up to date or not ({\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate}}}):
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}
+
+\PYG{k}{def} \PYG{n+nf}{run\PYGZus{}this\PYGZus{}before\PYGZus{}each\PYGZus{}job} \PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Calling function before each job using these args}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{args}
+ \PYG{c}{\PYGZsh{} Remember to delegate to the default *Ruffus* code for checking if}
+ \PYG{c}{\PYGZsh{} jobs need to run.}
+ \PYG{k}{return} \PYG{n}{needs\PYGZus{}update\PYGZus{}check\PYGZus{}modify\PYGZus{}time}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{)}
+
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate}\PYG{p}{(}\PYG{n}{run\PYGZus{}this\PYGZus{}before\PYGZus{}each\PYGZus{}job}\PYG{p}{)}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{p}{[}\PYG{p}{[}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{p}{[}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task\PYGZus{}func}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,} \PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{task\PYGZus{}func}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_\_
+\textgreater{}\textgreater{}\textgreater{} pipeline\_run([task\_func])
+Calling function before each job using these args (None, 'a.1')
+Calling function before each job using these args (None, 'a.1')
+Calling function before each job using these args (None, 'b.1')
+ Job = [None -\textgreater{} a.1] completed
+ Job = [None -\textgreater{} b.1] completed
+Completed Task = task\_func
+\end{Verbatim}
+
+\begin{notice}{note}{Note:}
+Because \code{run\_this\_before\_each\_job(...)} is called whenever \emph{Ruffus} checks to see if
+a job is up to date or not, the function may be called twice for some jobs
+(e.g. \code{(None, 'a.1')} above).
+\end{notice}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Q. Does \emph{Ruffus} allow checkpointing: to distinguish interrupted and completed results?}
+\label{faq:q-does-ruffus-allow-checkpointing-to-distinguish-interrupted-and-completed-results}
+
+\paragraph{A. Use the builtin sqlite checkpointing}
+\label{faq:a-use-the-builtin-sqlite-checkpointing}\begin{quote}
+
+By default, \code{pipeline\_run(...)} will save the timestamps for output files from successfully run jobs to an sqlite database file (\code{.ruffus\_history.sqlite}) in the current directory .
+\begin{itemize}
+\item {}
+If you are using \code{Ruffus.cmdline}, you can change the checksum / timestamp database file name on the command line using \code{-{-}checksum\_file\_name NNNN}
+
+\item {}
+\end{itemize}
+
+The level of timestamping / checksumming can be set via the \code{checksum\_level} parameter:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{,} \PYG{n}{checksum\PYGZus{}level} \PYG{o}{=} \PYG{n}{N}\PYG{p}{,} \PYG{o}{.}\PYG{o}{.}\PYG{o}{.}\PYG{p}{)}
+\end{Verbatim}
+
+where the default is 1:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+level 0 : Use only file timestamps
+level 1 : above, plus timestamp of successful job completion
+level 2 : above, plus a checksum of the pipeline function body
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+\end{Verbatim}
+\end{quote}
+
+
+\paragraph{A. Use a flag file}
+\label{faq:a-use-a-flag-file}\begin{quote}
+
+When gmake is interrupted, it will delete the target file it is updating so that the target is
+remade from scratch when make is next run. Ruffus, by design, does not do this because, more often than
+not, the partial / incomplete file may be usefully if only to reveal, for example, what might have caused an interrupting error
+or exception. It also seems a bit too clever and underhand to go around the programmer's back to delete files...
+
+A common \emph{Ruffus} convention is create an empty checkpoint or ``flag'' file whose sole purpose
+is to record a modification-time and the successful completion of a job.
+
+This would be task with a completion flag:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Assuming a pipelined task function named \PYGZdq{}stage1\PYGZdq{}}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{stage1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2\PYGZus{}finished}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage2} \PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{task\PYGZus{}output\PYGZus{}file}\PYG{p}{,} \PYG{n}{flag\PYGZus{}file} \PYG{o}{=} \PYG{n}{output\PYGZus{}files}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{do\PYGZus{}something2 }\PYG{l+s+si}{\PYGZpc{}(input\PYGZus{}file)s}\PYG{l+s}{ \PYGZgt{}\textbar{} }\PYG{l+s+si}{\PYGZpc{}(task\PYGZus{}output\PYGZus{}file)s}\PYG{l+s}{ }\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{n}{cmd} \PYG{o}{\PYGZpc{}} \PYG{p}{\PYGZob{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{n}{input\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task\PYGZus{}output\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{n}{task\PYGZus{}output\PYGZus{}file}
+ \PYG{p}{\PYGZcb{}}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{os}\PYG{o}{.}\PYG{n}{system}\PYG{p}{(} \PYG{n}{cmd} \PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} It worked: Create completion flag\PYGZus{}file}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{flag\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888}
+\end{Verbatim}
+
+The flag\_files \code{xxx.stage2\_finished} indicate that each job is finished. If this is missing,
+\code{xxx.stage2} is only a partial, interrupted result.
+
+The only thing to be aware of is that the flag file will appear in the list of inputs of the
+downstream task, which should accordingly look like this:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{stage2}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage3}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage3\PYGZus{}finished}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage3} \PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+
+ \PYG{c}{\PYGZsh{}888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Note that the first parameter is a LIST of input files, the last of which}
+ \PYG{c}{\PYGZsh{} is the flag file from the previous task which we can ignore}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{previous\PYGZus{}flag\PYGZus{}file} \PYG{o}{=} \PYG{n}{input\PYGZus{}files}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{}888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+ \PYG{n}{task\PYGZus{}output\PYGZus{}file}\PYG{p}{,} \PYG{n}{flag\PYGZus{}file} \PYG{o}{=} \PYG{n}{output\PYGZus{}files}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{do\PYGZus{}something3 }\PYG{l+s+si}{\PYGZpc{}(input\PYGZus{}file)s}\PYG{l+s}{ \PYGZgt{}\textbar{} }\PYG{l+s+si}{\PYGZpc{}(task\PYGZus{}output\PYGZus{}file)s}\PYG{l+s}{ }\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{n}{cmd} \PYG{o}{\PYGZpc{}} \PYG{p}{\PYGZob{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{n}{input\PYGZus{}file}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task\PYGZus{}output\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{n}{task\PYGZus{}output\PYGZus{}file}
+ \PYG{p}{\PYGZcb{}}
+ \PYG{c}{\PYGZsh{} completion flag file for this task}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{os}\PYG{o}{.}\PYG{n}{system}\PYG{p}{(} \PYG{n}{cmd} \PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{flag\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+The {\hyperref[examples/bioinformatics/part2:examples-bioinformatics-part2-step2]{\emph{Bioinformatics example}}} contains {\hyperref[examples/bioinformatics/part2_code:examples-bioinformatics-part2-code]{\emph{code}}} for checkpointing.
+\end{quote}
+
+
+\paragraph{A. Use a temp file}
+\label{faq:a-use-a-temp-file}\begin{quote}
+
+Thanks to Martin Goodson for suggesting this and providing an example. In his words:
+
+``I normally use a decorator to create a temporary file which is only renamed after the task has completed without any problems. This seems a more elegant solution to the problem:''
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{def} \PYG{n+nf}{usetemp}\PYG{p}{(}\PYG{n}{task\PYGZus{}func}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{} Decorate a function to write to a tmp file and then rename it. So half finished tasks cannot create up to date targets.}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n+nd}{@wraps}\PYG{p}{(}\PYG{n}{task\PYGZus{}func}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{wrapper\PYGZus{}function}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{args}\PYG{o}{=}\PYG{n+nb}{list}\PYG{p}{(}\PYG{n}{args}\PYG{p}{)}
+ \PYG{n}{outnames}\PYG{o}{=}\PYG{n}{args}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n+nb}{isinstance}\PYG{p}{(}\PYG{n}{outnames}\PYG{p}{,} \PYG{n+nb}{basestring}\PYG{p}{)} \PYG{o+ow}{and} \PYG{n+nb}{hasattr}\PYG{p}{(}\PYG{n}{outnames}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZus{}\PYGZus{}getitem\PYGZus{}\PYGZus{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{tmpnames}\PYG{o}{=}\PYG{p}{[}\PYG{n+nb}{str}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{o}{+}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.tmp}\PYG{l+s}{\PYGZdq{}} \PYG{k}{for} \PYG{n}{x} \PYG{o+ow}{in} \PYG{n}{outnames}\PYG{p}{]}
+ \PYG{n}{args}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{=}\PYG{n}{tmpnames}
+ \PYG{n}{task\PYGZus{}func}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}
+ \PYG{k}{try}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{tmp}\PYG{p}{,} \PYG{n}{name} \PYG{o+ow}{in} \PYG{n+nb}{zip}\PYG{p}{(}\PYG{n}{tmpnames}\PYG{p}{,} \PYG{n}{outnames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{tmp}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{rename}\PYG{p}{(}\PYG{n}{tmp}\PYG{p}{,} \PYG{n+nb}{str}\PYG{p}{(}\PYG{n}{name}\PYG{p}{)}\PYG{p}{)}
+ \PYG{k}{except} \PYG{n+ne}{BaseException} \PYG{k}{as} \PYG{n}{e}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{name} \PYG{o+ow}{in} \PYG{n}{outnames}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{remove}\PYG{p}{(}\PYG{n}{name}\PYG{p}{)}
+ \PYG{k}{raise} \PYG{p}{(}\PYG{n}{e}\PYG{p}{)}
+ \PYG{k}{else}\PYG{p}{:}
+ \PYG{n}{tmp}\PYG{o}{=}\PYG{n+nb}{str}\PYG{p}{(}\PYG{n}{outnames}\PYG{p}{)}\PYG{o}{+}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{.tmp}\PYG{l+s}{\PYGZsq{}}
+ \PYG{n}{args}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{=}\PYG{n}{tmp}
+ \PYG{n}{task\PYGZus{}func}\PYG{p}{(}\PYG{o}{*}\PYG{n}{args}\PYG{p}{,} \PYG{o}{*}\PYG{o}{*}\PYG{n}{kwargs}\PYG{p}{)}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{rename}\PYG{p}{(}\PYG{n}{tmp}\PYG{p}{,} \PYG{n+nb}{str}\PYG{p}{(}\PYG{n}{outnames}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{return} \PYG{n}{wrapper\PYGZus{}function}
+\end{Verbatim}
+\end{quote}
+
+Use like this:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{client1.price}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{n+nd}{@usetemp}
+\PYG{k}{def} \PYG{n+nf}{getusers}\PYG{p}{(}\PYG{n}{inputfile}\PYG{p}{,} \PYG{n}{outputname}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}**************************************************}
+ \PYG{c}{\PYGZsh{} code goes here}
+ \PYG{c}{\PYGZsh{} outputname now refers to temporary file}
+ \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Windows}
+\label{faq:windows}
+
+\subsubsection{Q. Windows seems to spawn \emph{ruffus} processes recursively}
+\label{faq:q-windows-seems-to-spawn-ruffus-processes-recursively}\begin{quote}
+
+A. It is necessary to protect the ``entry point'' of the program under windows.
+Otherwise, a new process will be started each time the main module is imported
+by a new Python interpreter as an unintended side effects. Causing a cascade
+of new processes.
+
+See: \href{http://docs.python.org/library/multiprocessing.html\#multiprocessing-programming}{http://docs.python.org/library/multiprocessing.html\#multiprocessing-programming}
+
+This code works:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+if \_\_name\_\_ == '\_\_main\_\_':
+ try:
+ pipeline\_run([parallel\_task], multiprocess = 5)
+except Exception, e:
+ print e.args
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Sun Grid Engine / PBS / SLURM etc}
+\label{faq:sun-grid-engine-pbs-slurm-etc}
+
+\subsubsection{Q. Can Ruffus be used to manage a cluster or grid based pipeline?}
+\label{faq:q-can-ruffus-be-used-to-manage-a-cluster-or-grid-based-pipeline}\begin{quote}
+\begin{enumerate}
+\item {}
+Some minimum modifications have to be made to your \emph{Ruffus} script to allow it to submit jobs to a cluster
+
+\end{enumerate}
+
+See {\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-ruffus-drmaa-wrapper-run-job]{\emph{ruffus.drmaa\_wrapper}}}
+
+Thanks to Andreas Heger and others at CGAT and Bernie Pope for contributing ideas and code.
+\end{quote}
+
+
+\subsubsection{Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies}
+\label{faq:q-when-i-submit-lots-of-jobs-via-sun-grid-engine-sge-the-head-node-occassionally-freezes-and-dies}\begin{enumerate}
+\item {}
+You need to use multithreading rather than multiprocessing. See {\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-ruffus-drmaa-wrapper-run-job]{\emph{ruffus.drmaa\_wrapper}}}
+
+\end{enumerate}
+
+
+\subsubsection{Q. Keeping Large intermediate files}
+\label{faq:q-keeping-large-intermediate-files}\begin{quote}
+
+Sometimes pipelines create a large number of intermediate files which might not be needed later.
+
+Unfortunately, the current design of \emph{Ruffus} requires these files to hang around otherwise the pipeline
+will not know that it ran successfully.
+
+We have some tentative plans to get around this but in the meantime, Bernie Pope suggests
+truncating intermediate files in place, preserving timestamps:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} truncate a file to zero bytes, and preserve its original modification time}
+\PYG{k}{def} \PYG{n+nf}{zeroFile}\PYG{p}{(}\PYG{n+nb}{file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n+nb}{file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} save the current time of the file}
+ \PYG{n}{timeInfo} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{stat}\PYG{p}{(}\PYG{n+nb}{file}\PYG{p}{)}
+ \PYG{k}{try}\PYG{p}{:}
+ \PYG{n}{f} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n+nb}{file}\PYG{p}{,}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+ \PYG{k}{except} \PYG{n+ne}{IOError}\PYG{p}{:}
+ \PYG{k}{pass}
+ \PYG{k}{else}\PYG{p}{:}
+ \PYG{n}{f}\PYG{o}{.}\PYG{n}{truncate}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)}
+ \PYG{n}{f}\PYG{o}{.}\PYG{n}{close}\PYG{p}{(}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} change the time of the file back to what it was}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{utime}\PYG{p}{(}\PYG{n+nb}{file}\PYG{p}{,}\PYG{p}{(}\PYG{n}{timeInfo}\PYG{o}{.}\PYG{n}{st\PYGZus{}atime}\PYG{p}{,} \PYG{n}{timeInfo}\PYG{o}{.}\PYG{n}{st\PYGZus{}mtime}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsection{Sharing python objects between Ruffus processes running concurrently}
+\label{faq:sharing-python-objects-between-ruffus-processes-running-concurrently}\begin{quote}
+
+The design of Ruffus envisages that much of the data flow in pipelines occurs in files but it is also possible to pass python objects in memory.
+
+Ruffus uses the \href{http://docs.python.org/2/library/multiprocessing.html}{multiprocessing} module and much of the following is a summary of what is covered
+in depth in the Python Standard Library \href{http://docs.python.org/2/library/multiprocessing.html\#sharing-state-between-processes}{Documentation}.
+
+Running Ruffus using \code{pipeline\_run(..., multiprocess = NNN)} where \code{NNN} \textgreater{} 1 runs each job concurrently on up to \code{NNN} separate local processes.
+Each task function runs independently in a different python intepreter, possibly on a different CPU, in the most efficient way.
+However, this does mean we have to pay some attention to how data is sent across process boundaries (unlike the situation with \code{pipeline\_run(..., multithread = NNN)} ).
+
+The python code and data which comprises your multitasking Ruffus job is sent to a separate process in three ways:
+\begin{enumerate}
+\item {}
+The python function code and data objects are \href{http://docs.python.org/2/library/pickle.html}{pickled}, i.e. converting into a byte stream, by the master process, sent to the remote process
+before being converted back into normal python (unpickling).
+
+\item {}
+The parameters for your jobs, i.e. what Ruffus calls your task functions with, are separately \href{http://docs.python.org/2/library/pickle.html}{pickled} and sent to the remote process via
+\href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Queue}{multiprocessing.Queue}
+
+\item {}
+You can share and synchronise other data yourselves. The canonical example is the logger provided by \code{Ruffus.cmdline.setup\_logging}
+
+\end{enumerate}
+
+\begin{notice}{note}{Note:}
+Check that your function code and data can be \href{http://docs.python.org/2/library/pickle.html\#what-can-be-pickled-and-unpickled}{pickled}.
+
+Only functions, built-in functions and classes defined at the top level of a module are picklable.
+\end{notice}
+
+The following answers are a short ``how-to'' for sharing and synchronising data yourselves.
+\end{quote}
+
+
+\subsubsection{Can ordinary python objects be shared between processes?}
+\label{faq:can-ordinary-python-objects-be-shared-between-processes}\begin{enumerate}
+\item {}
+Objects which can be \href{http://docs.python.org/2/library/pickle.html}{pickled} can be shared as is. These include
+\begin{itemize}
+\item {}
+numbers
+
+\item {}
+strings
+
+\item {}
+tuples, lists, sets, and dictionaries containing only objects which can be \href{http://docs.python.org/2/library/pickle.html}{pickled}.
+
+\end{itemize}
+
+\item {}
+If these do not change during your pipeline, you can just use them without any further effort in your task.
+
+\item {}
+If you need to use the value at the point when the task function is \emph{called}, then you need to pass the python object as parameters to your task.
+For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ \PYG{c}{\PYGZsh{} changing\PYGZus{}list changes...}
+ \PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{changing\PYGZus{}list}\PYG{p}{)}
+ \PYG{k}{def} \PYG{n+nf}{next\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{changing\PYGZus{}list}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\item {}
+If you need to use the value when the task function is \emph{run} then see {\hyperref[faq:how-about-synchronising-python-objects-in-real-time]{\emph{the following answer.}}}.
+
+\end{enumerate}
+
+
+\subsubsection{Why am I getting \texttt{PicklingError}?}
+\label{faq:why-am-i-getting-picklingerror}\begin{quote}
+
+What is happening? Didn't \href{https://en.wikipedia.org/wiki/Battle\_of\_the\_Herrings}{Joan of Arc} solve this once and for all?
+\begin{enumerate}
+\item {}
+Some of the data or code in your function cannot be \href{http://docs.python.org/2/library/pickle.html}{pickled} and is being asked to be sent by python \code{mulitprocessing} across process boundaries.
+\begin{quote}
+
+When you run your pipeline using multiprocess:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{n}{logger} \PYG{o}{=} \PYG{n}{ruffusLoggerProxy}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+You will get the following errors:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+ne}{Exception} \PYG{o+ow}{in} \PYG{n}{thread} \PYG{n}{Thread}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{2}\PYG{p}{:}
+\PYG{n}{Traceback} \PYG{p}{(}\PYG{n}{most} \PYG{n}{recent} \PYG{n}{call} \PYG{n}{last}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{File} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/path/to/python/python2.7/threading.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{line} \PYG{l+m+mi}{808}\PYG{p}{,} \PYG{o+ow}{in} \PYG{n}{\PYGZus{}\PYGZus{}bootstrap\PYGZus{}inner}
+ \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{run}\PYG{p}{(}\PYG{p}{)}
+ \PYG{n}{File} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/path/to/python/python2.7/threading.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{line} \PYG{l+m+mi}{761}\PYG{p}{,} \PYG{o+ow}{in} \PYG{n}{run}
+ \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}target}\PYG{p}{(}\PYG{o}{*}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}args}\PYG{p}{,} \PYG{o}{*} \PYG{o}{*}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{\PYGZus{}\PYGZus{}kwargs}\PYG{p}{)}
+ \PYG{n}{File} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/path/to/python/python2.7/multiprocessing/pool.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{line} \PYG{l+m+mi}{342}\PYG{p}{,} \PYG{o+ow}{in} \PYG{n}{\PYGZus{}handle\PYGZus{}tasks}
+ \PYG{n}{put}\PYG{p}{(}\PYG{n}{task}\PYG{p}{)}
+\PYG{n}{PicklingError}\PYG{p}{:} \PYG{n}{Can}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{t pickle \PYGZlt{}type }\PYG{l+s}{\PYGZsq{}}\PYG{n}{function}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZgt{}: attribute lookup \PYGZus{}\PYGZus{}builtin\PYGZus{}\PYGZus{}.function failed}
+\end{Verbatim}
+\end{quote}
+
+which go away when you set \code{pipeline\_run({[}{]}, multiprocess = 1, ...)}
+\end{quote}
+
+\end{enumerate}
+
+Unfortunately, pickling errors are particularly ill-served by standard python error messages. The only really good advice is to take the offending
+code and try and \href{http://docs.python.org/2/library/pickle.html}{pickle} it yourself and narrow down the errors. Check your objects against the list
+in the \href{http://docs.python.org/2/library/pickle.html\#what-can-be-pickled-and-unpickled}{pickle} module.
+Watch out especially for nested functions. These will have to be moved to file scope.
+Other objects may have to be passed in proxy (see below).
+\end{quote}
+
+
+\subsubsection{How about synchronising python objects in real time?}
+\label{faq:id1}\label{faq:how-about-synchronising-python-objects-in-real-time}\begin{quote}
+\begin{enumerate}
+\item {}
+You can use managers and proxy objects from the \href{http://docs.python.org/library/multiprocessing.html}{multiprocessing} module.
+
+\end{enumerate}
+
+The underlying python object would be owned and managed by a (hidden) server process. Other processes can access the shared objects transparently by using proxies. This is how the logger provided by
+\code{Ruffus.cmdline.setup\_logging} works:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} optional logger which can be passed to ruffus tasks}
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+\end{Verbatim}
+
+\code{logger} is a proxy for the underlying python \href{http://docs.python.org/2/library/logging.html}{logger} object, and it can be shared freely between processes.
+
+The best course is to pass \code{logger} as a parameter to a \emph{Ruffus} task.
+
+The only caveat is that we should make sure multiple jobs are not writting to the log at the same time. To synchronise logging, we use proxy to a non-reentrant \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Lock}{multiprocessing.lock}.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{n}{\PYGZus{}\PYGZus{}name\PYGZus{}\PYGZus{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{next\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{info}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{We are in the middle of next\PYGZus{}task: }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\subsubsection{Can I share and synchronise my own python classes via proxies?}
+\label{faq:can-i-share-and-synchronise-my-own-python-classes-via-proxies}\begin{quote}
+\begin{enumerate}
+\item {}
+\href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.managers.SyncManager}{multiprocessing.managers.SyncManager} provides out of the box support for lists, arrays and dicts etc.
+\begin{quote}
+
+Most of the time, we can use a ``vanilla'' manager provided by \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.sharedctypes.multiprocessing.Manager}{multiprocessing.Manager()}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{multiprocessing}
+\PYG{n}{manager} \PYG{o}{=} \PYG{n}{multiprocessing}\PYG{o}{.}\PYG{n}{Manager}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{n}{list\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{list}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{dict\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{dict}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{lock\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Lock}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{namespace\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Namespace}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{queue\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Queue}\PYG{p}{(}\PYG{p}{[}\PYG{n}{maxsize}\PYG{p}{]}\PYG{p}{)}
+\PYG{n}{rentrant\PYGZus{}lock\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{RLock}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{semaphore\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Semaphore}\PYG{p}{(}\PYG{p}{[}\PYG{n}{value}\PYG{p}{]}\PYG{p}{)}
+\PYG{n}{char\PYGZus{}array\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Array}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{c}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{n}{integer\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Value}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{i}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{lock\PYGZus{}proxy}\PYG{p}{,} \PYG{n}{dict\PYGZus{}proxy}\PYG{p}{,} \PYG{n}{list\PYGZus{}proxy}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{next\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{lock\PYGZus{}proxy}\PYG{p}{,} \PYG{n}{dict\PYGZus{}proxy}\PYG{p}{,} \PYG{n}{list\PYGZus{}proxy}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n}{lock\PYGZus{}proxy}\PYG{p}{:}
+ \PYG{n}{list\PYGZus{}proxy}\PYG{o}{.}\PYG{n}{append}\PYG{p}{(}\PYG{l+m+mi}{3}\PYG{p}{)}
+ \PYG{n}{dict\PYGZus{}proxy}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{5}
+\end{Verbatim}
+\end{quote}
+
+\end{enumerate}
+
+However, you can also create proxy custom classes for your own objects.
+
+In this case you may need to derive from \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.managers.SyncManager}{multiprocessing.managers.SyncManager}
+and register proxy functions. See \code{Ruffus.proxy\_logger} for an example of how to do this.
+\end{quote}
+
+
+\subsubsection{How do I send python objects back and forth without tangling myself in horrible synchronisation code?}
+\label{faq:how-do-i-send-python-objects-back-and-forth-without-tangling-myself-in-horrible-synchronisation-code}\begin{quote}
+\begin{enumerate}
+\item {}
+Sharing python objects by passing messages is a much more modern and safer way to coordinate multitasking than using synchronization primitives like locks.
+
+\end{enumerate}
+
+The python \href{http://docs.python.org/2/library/multiprocessing.html\#pipes-and-queues}{multiprocessing} module provides support for passing python objects as messages between processes.
+You can either use \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Pipe}{pipes}
+or \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Queue}{queues}.
+The idea is that one process pushes and object onto a \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Pipe}{pipe} or \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Queue}{queue}
+and the other processes pops it out at the other end. \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Pipe}{Pipes} are
+only two ended so \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Queue}{queues} are usually a better fit for sending data to multiple Ruffus jobs.
+
+Proxies for \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.managers.SyncManager.Queue}{queues} can be passed between processes as in the previous section
+\end{quote}
+
+
+\subsubsection{How do I share large amounts of data efficiently across processes?}
+\label{faq:how-do-i-share-large-amounts-of-data-efficiently-across-processes}\begin{quote}
+\begin{enumerate}
+\item {}
+If it is really impractical to use data files on disk, you can put the data in shared memory.
+
+\end{enumerate}
+
+It is possible to create shared objects using shared memory which can be inherited by child processes or passed as Ruffus parameters.
+This is probably most efficently done via the \href{http://docs.python.org/2/library/multiprocessing.html\#multiprocessing.Array}{array}
+interface. Again, it is easy to create locks and proxies for synchronised access:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{multiprocessing} \PYG{k+kn}{import} \PYG{n}{Process}\PYG{p}{,} \PYG{n}{Lock}
+\PYG{k+kn}{from} \PYG{n+nn}{multiprocessing.sharedctypes} \PYG{k+kn}{import} \PYG{n}{Value}\PYG{p}{,} \PYG{n}{Array}
+\PYG{k+kn}{from} \PYG{n+nn}{ctypes} \PYG{k+kn}{import} \PYG{n}{Structure}\PYG{p}{,} \PYG{n}{c\PYGZus{}double}
+
+\PYG{n}{manager} \PYG{o}{=} \PYG{n}{multiprocessing}\PYG{o}{.}\PYG{n}{Manager}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{n}{lock\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Lock}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{int\PYGZus{}array\PYGZus{}proxy} \PYG{o}{=} \PYG{n}{manager}\PYG{o}{.}\PYG{n}{Array}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{i}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+m+mi}{123}\PYG{p}{]} \PYG{o}{*} \PYG{l+m+mi}{100}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{lock\PYGZus{}proxy}\PYG{p}{,} \PYG{n}{int\PYGZus{}array\PYGZus{}proxy}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{next\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{lock\PYGZus{}proxy}\PYG{p}{,} \PYG{n}{int\PYGZus{}array\PYGZus{}proxy}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n}{lock\PYGZus{}proxy}\PYG{p}{:}
+ \PYG{n}{int\PYGZus{}array\PYGZus{}proxy}\PYG{p}{[}\PYG{l+m+mi}{23}\PYG{p}{]} \PYG{o}{=} \PYG{l+m+mi}{71}
+\end{Verbatim}
+\end{quote}
+
+
+\section{Glossary}
+\label{glossary:glossary}\label{glossary:glob}\label{glossary::doc}\phantomsection\label{glossary:id1}\phantomsection\label{glossary:glossary-task}\begin{description}
+\item[{\index{task|textbf}task}] \leavevmode\phantomsection\label{glossary:term-task}
+A stage in a computational pipeline.
+
+Each \textbf{task} in \emph{ruffus} is represented by a python function.
+
+For example, a task might be to find the products of a sets of two numbers:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+4 x 5 = 20
+5 x 6 = 30
+2 x 7 = 14
+\end{Verbatim}
+
+\item[{\index{job|textbf}job}] \leavevmode\phantomsection\label{glossary:term-job}
+Any number of operations which can be run in parallel and make up
+the work in a stage of a computional pipeline.
+
+Each \textbf{task} in \emph{ruffus} is a separate call to the \textbf{task} function.
+
+For example, if a task is to find products of numbers, each of these will be a separate job.
+\begin{quote}
+
+Job1:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+4 x 5 = 20
+\end{Verbatim}
+
+Job2:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+5 x 6 = 30
+\end{Verbatim}
+
+Job3:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+2 x 7 = 14
+\end{Verbatim}
+\end{quote}
+
+Jobs need not complete in order.
+
+\item[{\index{decorator|textbf}decorator}] \leavevmode\phantomsection\label{glossary:term-decorator}
+Ruffus decorators allow functions to be incorporated into a computational
+pipeline, with automatic generation of parameters, dependency checking etc.,
+without modifying any code within the function.
+Quoting from the \href{http://wiki.python.org/moin/PythonDecorators}{python wiki}:
+\begin{quote}
+
+A Python decorator is a specific change to the Python syntax that
+allows us to more conveniently alter functions and methods.
+
+Decorators dynamically alter the functionality of a function, method, or
+class without having to directly use subclasses or change the source code
+of the function being decorated.
+\end{quote}
+
+\item[{\index{generator|textbf}generator}] \leavevmode\phantomsection\label{glossary:term-generator}
+python generators are new to python 2.2
+(see \href{http://www.ibm.com/developerworks/library/l-pycon.html}{Charming Python: Iterators and simple generators}).
+They allow iterable data to be generated on the fly.
+
+Ruffus asks for generators when you want to generate \textbf{job} parameters dynamically.
+
+Each set of job parameters is returned by the \code{yield} keyword for
+greater clarity. For example,:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{def} \PYG{n+nf}{generate\PYGZus{}job\PYGZus{}parameters}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+
+ \PYG{k}{for} \PYG{n}{file\PYGZus{}index}\PYG{p}{,} \PYG{n}{file\PYGZus{}name} \PYG{o+ow}{in} \PYG{n}{iterate}\PYG{p}{(}\PYG{n}{all\PYGZus{}file\PYGZus{}names}\PYG{p}{)}\PYG{p}{:}
+
+ \PYG{c}{\PYGZsh{} parameter for each job}
+ \PYG{k}{yield} \PYG{n}{file\PYGZus{}index}\PYG{p}{,} \PYG{n}{file\PYGZus{}name}
+\end{Verbatim}
+
+Each job takes the parameters \code{file\_index} and \code{file\_name}
+
+\end{description}
+
+\includegraphics{logo.jpg}
+
+
+\section{Hall of Fame: User contributed flowcharts}
+\label{gallery:hall-of-fame-user-contributed-flowcharts}\label{gallery::doc}
+Please contribute your own work flows in your favourite colours with (an optional) short description
+to email: ruffus\_lib at llew.org.uk
+
+
+\subsection{RNASeq pipeline}
+\label{gallery:rnaseq-pipeline}\begin{quote}
+
+\href{http://en.wikipedia.org/wiki/RNA-Seq}{http://en.wikipedia.org/wiki/RNA-Seq}
+
+Mapping transcripts onto genomes using high-throughput sequencing technologies (\code{svg}).
+\includegraphics{gallery_rna_seq.png}\end{quote}
+
+
+\subsection{non-coding evolutionary constraints}
+\label{gallery:non-coding-evolutionary-constraints}\begin{quote}
+
+\href{http://en.wikipedia.org/wiki/Noncoding\_DNA}{http://en.wikipedia.org/wiki/Noncoding\_DNA}
+
+Non-protein coding evolutionary constraints in different species (\code{svg}).
+\includegraphics{gallery_dless.png}\end{quote}
+
+
+\subsection{SNP annotation}
+\label{gallery:snp-annotation}
+Predicting impact of different Single Nucleotide Polymorphisms
+
+\href{http://en.wikipedia.org/wiki/Single-nucleotide\_polymorphism}{http://en.wikipedia.org/wiki/Single-nucleotide\_polymorphism}
+
+Population variation across genomes (\code{svg}).
+\includegraphics{gallery_snp_annotation.png}
+Using ``pseudo'' targets to run only part of the pipeline (\code{svg}).
+\includegraphics{gallery_snp_annotation_consequences.png}
+
+\subsection{Chip-Seq analysis}
+\label{gallery:chip-seq-analysis}
+Analysing DNA binding sites with Chip-Seq
+\href{http://en.wikipedia.org/wiki/Chip-Sequencing}{http://en.wikipedia.org/wiki/Chip-Sequencing}
+\begin{quote}
+
+(\code{svg})
+\includegraphics{gallery_big_pipeline.png}\end{quote}
+
+\index{Ruffus!Etymology}\index{Etymology!Ruffus}\index{Ruffus!Name origins}\index{Name origins!Ruffus}
+
+\section{Why \emph{Ruffus}?}
+\label{why_ruffus:index-0}\label{why_ruffus:why-ruffus}\label{why_ruffus::doc}\label{why_ruffus:design-why-ruffus}
+\textbf{Cylindrophis ruffus} is the name of the
+\href{http://en.wikipedia.org/wiki/Cylindrophis\_ruffus}{red-tailed pipe snake} (bad python-y pun)
+which can be found in \href{http://www.discoverhongkong.com/eng/index.html}{Hong Kong} where the original author comes from.
+
+\emph{Ruffus} is a shy creature, and pretends to be a cobra or a \href{http://en.wikipedia.org/wiki/File:Bandedkrait.jpg}{banded krait} by putting up its red tail and ducking its
+head in its coils when startled.
+
+\begin{tabular}{|p{0.475\linewidth}|p{0.475\linewidth}|}
+\hline
+
+\includegraphics{wikimedia_cyl_ruffus.jpg}
+ &
+\scalebox{0.770000}{\includegraphics{wikimedia_bandedkrait.jpg}}
+\\\hline
+\begin{itemize}
+\item {}
+Not venomous
+
+\item {}
+\href{http://en.wikipedia.org/wiki/Mostly\_Harmless}{Mostly Harmless}
+
+\end{itemize}
+ & \begin{itemize}
+\item {}
+Deadly poisonous
+
+\item {}
+\href{http://en.wikipedia.org/wiki/List\_of\_races\_and\_species\_in\_The\_Hitchhiker's\_Guide\_to\_the\_Galaxy\#Ravenous\_Bugblatter\_Beast\_of\_Traal}{Seriously unfriendly}
+
+\end{itemize}
+\\\hline
+\end{tabular}
+
+
+Be careful not to step on one when running down country park lanes at full speed
+in Hong Kong: this snake is a \href{http://www.hkras.org/eng/info/hkspp.htm}{rare breed}!
+
+\emph{Ruffus} does most of its work at night and sleeps during the day: typical of many (but alas not all) python programmers!
+
+The original \href{http://upload.wikimedia.org/wikipedia/commons/a/a1/Cyl\_ruffus\_061212\_2025\_tdp.jpg}{red-tail pipe} and \href{http://en.wikipedia.org/wiki/File:AB\_054\_Banded\_Krait.JPG}{banded krait} images are from wikimedia.
+
+
+\chapter{Examples}
+\label{contents:examples}
+
+\section{Construction of a simple pipeline to run BLAST jobs}
+\label{examples/bioinformatics/index:construction-of-a-simple-pipeline-to-run-blast-jobs}\label{examples/bioinformatics/index::doc}\label{examples/bioinformatics/index:examples-bioinformatics-part1}
+
+\subsection{Overview}
+\label{examples/bioinformatics/index:overview}\begin{quote}
+
+This is a simple example to illustrate the convenience \textbf{Ruffus}
+brings to simple tasks in bioinformatics.
+\begin{enumerate}
+\item {}
+\textbf{Split} a problem into multiple fragments that can be
+
+\item {}
+\textbf{Run in parallel} giving partial solutions that can be
+
+\item {}
+\textbf{Recombined} into the complete solution.
+
+\end{enumerate}
+
+The example code runs a \href{http://blast.ncbi.nlm.nih.gov/}{ncbi}
+\href{http://en.wikipedia.org/wiki/BLAST}{blast} search for four sequences
+against the human \href{http://en.wikipedia.org/wiki/RefSeq}{refseq} protein sequence database.
+\begin{enumerate}
+\item {}
+\textbf{Split} each of the four sequences into a separate file.
+
+\item {}
+\textbf{Run in parallel} Blastall on each sequence file
+
+\item {}
+\textbf{Recombine} the BLAST results by simple concatenation.
+
+\end{enumerate}
+
+In real life,
+\begin{itemize}
+\item {}
+\href{http://blast.ncbi.nlm.nih.gov/}{BLAST} already provides support for multiprocessing
+
+\item {}
+Sequence files would be split in much larger chunks, with many sequences
+
+\item {}
+The jobs would be submitted to large computational farms (in our case, using the SunGrid Engine).
+
+\item {}
+The High Scoring Pairs (HSPs) would be parsed / filtered / stored in your own formats.
+
+\end{itemize}
+
+\begin{notice}{note}{Note:}
+This bioinformatics example is intended to showcase \emph{some} of the features of Ruffus.
+\begin{enumerate}
+\item {}
+See the {\hyperref[tutorials/new_tutorial/introduction:new-manual-introduction]{\emph{manual}}} to learn about the various features in Ruffus.
+
+\end{enumerate}
+\end{notice}
+\end{quote}
+
+
+\subsection{Prerequisites}
+\label{examples/bioinformatics/index:prerequisites}
+
+\subsubsection{1. Ruffus}
+\label{examples/bioinformatics/index:ruffus}\begin{quote}
+
+To install Ruffus on most systems with python installed:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+easy\_install -U ruffus
+\end{Verbatim}
+\end{quote}
+
+Otherwise, \href{http://code.google.com/p/ruffus/downloads/list}{download} Ruffus and run:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+tar -xvzf ruffus-xxx.tar.gz
+cd ruffus-xxx
+./setup install
+\end{Verbatim}
+\end{quote}
+
+where xxx is the latest Ruffus version.
+\end{quote}
+
+
+\subsubsection{2. BLAST}
+\label{examples/bioinformatics/index:blast}\begin{quote}
+
+This example assumes that the \href{http://blast.ncbi.nlm.nih.gov/}{BLAST} \code{blastall} and \code{formatdb} executables are
+installed and on the search path. Otherwise download from \href{http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web\&PAGE\_TYPE=BlastDocs\&DOC\_TYPE=Download}{here}.
+\end{quote}
+
+
+\subsubsection{3. human refseq sequence database}
+\label{examples/bioinformatics/index:human-refseq-sequence-database}\begin{quote}
+
+We also need to download the human refseq sequence file and format the ncbi database:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+wget ftp://ftp.ncbi.nih.gov/refseq/H\_sapiens/mRNA\_Prot/human.protein.faa.gz
+gunzip human.protein.faa.gz
+
+formatdb -i human.protein.faa
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{4. test sequences}
+\label{examples/bioinformatics/index:test-sequences}\begin{quote}
+
+Query sequences in FASTA format can be found in original.fa
+\end{quote}
+
+
+\subsection{Code}
+\label{examples/bioinformatics/index:code}\begin{quote}
+
+The code for this example can be found {\hyperref[examples/bioinformatics/part1_code:examples-bioinformatics-part1-code]{\emph{here}}} and
+pasted into the python command shell.
+\end{quote}
+
+
+\subsection{Step 1. Splitting up the query sequences}
+\label{examples/bioinformatics/index:step-1-splitting-up-the-query-sequences}\begin{quote}
+
+We want each of our sequences in the query file original.fa to be placed
+in a separate files named \code{XXX.segment} where \code{XXX} = 1 -\textgreater{} the number of sequences.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{current\PYGZus{}file\PYGZus{}index} \PYG{o}{=} \PYG{l+m+mi}{0}
+\PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{original.fa}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} start a new file for each accession line}
+ \PYG{k}{if} \PYG{n}{line}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZgt{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{current\PYGZus{}file\PYGZus{}index} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{current\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.segment}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{current\PYGZus{}file\PYGZus{}index}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{current\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+To use this in a pipeline, we only need to wrap this in a function, ``decorated'' with the Ruffus
+keyword {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{@split}}}:
+\begin{quote}
+
+\includegraphics{examples_bioinformatics_split.jpg}
+\end{quote}
+
+\begin{DUlineblock}{0em}
+\item[] This indicates that we are splitting up the input file original.fa into however many
+\code{*.segment} files as it takes.
+\item[] The pipelined function itself takes two arguments, for the input and output.
+\end{DUlineblock}
+
+We shall see later this simple {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{@split}}} decorator already gives all the benefits of:
+\begin{itemize}
+\item {}
+Dependency checking
+
+\item {}
+Flowchart printing
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{Step 2. Run BLAST jobs in parallel}
+\label{examples/bioinformatics/index:step-2-run-blast-jobs-in-parallel}\begin{quote}
+
+Assuming that blast is already installed, sequence matches can be found with this python
+code:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{os}\PYG{o}{.}\PYG{n}{system}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{blastall \PYGZhy{}p blastp \PYGZhy{}d human.protein.faa \PYGZhy{}i 1.segment \PYGZgt{} 1.blastResult}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+To pipeline this, we need to simply wrap in a function, decorated with the \textbf{Ruffus}
+keyword {\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{@transform}}}.
+\begin{quote}
+
+\includegraphics{examples_bioinformatics_transform.jpg}
+\end{quote}
+
+This indicates that we are taking all the output files from the previous \code{splitFasta}
+operation (\code{*.segment}) and {\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{@transform}}}-ing each to a new file with the \code{.blastResult}
+suffix. Each of these transformation operations can run in parallel if specified.
+\end{quote}
+
+
+\subsection{Step 3. Combining BLAST results}
+\label{examples/bioinformatics/index:step-3-combining-blast-results}\begin{quote}
+\begin{description}
+\item[{The following python code will concatenate the results together}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{final.blast\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.blastResults}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+To pipeline this, we need again to decorate with the \textbf{Ruffus} keyword {\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{@merge}}}.
+\begin{quote}
+
+\includegraphics{examples_bioinformatics_merge.jpg}
+\end{quote}
+
+This indicates that we are taking all the output files from the previous \code{runBlast}
+operation (\code{*.blastResults}) and {\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{@merge}}}-ing them to the new file \code{final.blast\_results}.
+\end{quote}
+
+
+\subsection{Step 4. Running the pipeline}
+\label{examples/bioinformatics/index:step-4-running-the-pipeline}\begin{quote}
+
+We can run the completed pipeline using a maximum of 4 parallel processes by calling
+{\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}} :
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Though we have only asked Ruffus to run \code{combineBlastResults}, it traces all the dependencies
+of this task and runs all the necessary parts of the pipeline.
+
+\begin{notice}{note}{Note:}
+The full code for this example can be found {\hyperref[examples/bioinformatics/part1_code:examples-bioinformatics-part1-code]{\emph{here}}}
+suitable for pasting into the python command shell.
+\end{notice}
+
+The \code{verbose} parameter causes the following output to be printed to stderr as the pipeline
+runs:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+\PYG{g+go}{ Job = [original.fa \PYGZhy{}\PYGZgt{} *.segment] completed}
+\PYG{g+go}{Completed Task = splitFasta}
+\PYG{g+go}{ Job = [1.segment \PYGZhy{}\PYGZgt{} 1.blastResult] completed}
+\PYG{g+go}{ Job = [3.segment \PYGZhy{}\PYGZgt{} 3.blastResult] completed}
+\PYG{g+go}{ Job = [2.segment \PYGZhy{}\PYGZgt{} 2.blastResult] completed}
+\PYG{g+go}{ Job = [4.segment \PYGZhy{}\PYGZgt{} 4.blastResult] completed}
+\PYG{g+go}{Completed Task = runBlast}
+\PYG{g+go}{ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] \PYGZhy{}\PYGZgt{} final.blast\PYGZus{}results] completed}
+\PYG{g+go}{Completed Task = combineBlastResults}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Step 5. Testing dependencies}
+\label{examples/bioinformatics/index:step-5-testing-dependencies}\begin{quote}
+
+If we invoked {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}} again, nothing
+further would happen because the
+pipeline is now up-to-date. But what if the pipeline had not run to completion?
+
+We can simulate the failure of one of the \code{blastall} jobs by deleting its results:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{4.blastResult}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Let us use the {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout}}}
+function to print out the dependencies of the pipeline at a high \code{verbose} level which
+will show both complete and incomplete jobs:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{import} \PYG{n+nn}{sys}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which are up\PYGZhy{}to\PYGZhy{}date:}
+
+\PYG{g+go}{Task = splitFasta}
+\PYG{g+go}{ \PYGZdq{}Split sequence file into as many fragments as appropriate depending on the size of}
+\PYG{g+go}{ original\PYGZus{}fasta\PYGZdq{}}
+
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\PYG{g+go}{Tasks which will be run:}
+
+\PYG{g+go}{Task = runBlast}
+\PYG{g+go}{ \PYGZdq{}Run blast\PYGZdq{}}
+\PYG{g+go}{ Job = [4.segment}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{}4.blastResult]}
+\PYG{g+go}{ Job needs update: Missing file 4.blastResult}
+
+\PYG{g+go}{Task = combineBlastResults}
+\PYG{g+go}{ \PYGZdq{}Combine blast results\PYGZdq{}}
+\PYG{g+go}{ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult]}
+\PYG{g+go}{ \PYGZhy{}\PYGZgt{}final.blast\PYGZus{}results]}
+\PYG{g+go}{ Job needs update: Missing file 4.blastResult}
+
+\PYG{g+go}{\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}}
+\end{Verbatim}
+\end{quote}
+
+Only the parts of the pipeline which involve the missing BLAST result will be rerun.
+We can confirm this by invoking the pipeline.
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+
+\PYG{g+go}{ Job = [1.segment \PYGZhy{}\PYGZgt{} 1.blastResult] unnecessary: already up to date}
+\PYG{g+go}{ Job = [2.segment \PYGZhy{}\PYGZgt{} 2.blastResult] unnecessary: already up to date}
+\PYG{g+go}{ Job = [3.segment \PYGZhy{}\PYGZgt{} 3.blastResult] unnecessary: already up to date}
+\PYG{g+go}{ Job = [4.segment \PYGZhy{}\PYGZgt{} 4.blastResult] completed}
+\PYG{g+go}{Completed Task = runBlast}
+\PYG{g+go}{ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] \PYGZhy{}\PYGZgt{} final.blast\PYGZus{}results] completed}
+\PYG{g+go}{Completed Task = combineBlastResults}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{What is next?}
+\label{examples/bioinformatics/index:what-is-next}\begin{quote}
+
+In the {\hyperref[examples/bioinformatics/part2:examples-bioinformatics-part2]{\emph{next (short) part}}},
+we shall add some standard (boilerplate) code to
+turn this BLAST pipeline into a (slightly more) useful python program.
+\end{quote}
+
+
+\section{Part 2: A slightly more practical pipeline to run blasts jobs}
+\label{examples/bioinformatics/part2:part-2-a-slightly-more-practical-pipeline-to-run-blasts-jobs}\label{examples/bioinformatics/part2::doc}\label{examples/bioinformatics/part2:examples-bioinformatics-part2}
+
+\subsection{Overview}
+\label{examples/bioinformatics/part2:overview}\begin{quote}
+
+{\hyperref[examples/bioinformatics/index:examples-bioinformatics-part1]{\emph{Previously}}}, we had built
+a simple pipeline to split up a FASTA file of query sequences so
+that these can be matched against a sequence database in parallel.
+
+We shall wrap this code so that
+\begin{itemize}
+\item {}
+It is more robust to interruptions
+
+\item {}
+We can specify the file names on the command line
+
+\end{itemize}
+\end{quote}
+
+
+\subsection{Step 1. Cleaning up any leftover junk from previous pipeline runs}
+\label{examples/bioinformatics/part2:step-1-cleaning-up-any-leftover-junk-from-previous-pipeline-runs}\begin{quote}
+
+\begin{DUlineblock}{0em}
+\item[] We split up each of our sequences in the query file original.fa
+into a separate files named \code{XXX.segment} where \code{XXX} is the number of sequences in
+the FASTA file.
+\end{DUlineblock}
+
+\begin{DUlineblock}{0em}
+\item[] However, if we start with 6 sequences (giving \code{1.segment} ... \code{6.segment}), and we
+then edited original.fa
+so that only 5 were left, the file \code{6.segment} would still be left
+hanging around as an unwanted, extraneous and confusing orphan.
+\end{DUlineblock}
+
+As a general rule, it is a good idea to clean up the results of a previous run in
+a \emph{@split} operation:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{original.fa}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{splitFasta} \PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{segments}\PYG{p}{)}\PYG{p}{:}
+
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Clean up any segment files from previous runs before creating new one}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{o}{.}\PYG{n}{glob}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}
+
+ \PYG{c}{\PYGZsh{} code as before...}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Step 2. Adding a ``flag'' file to mark successful completion}
+\label{examples/bioinformatics/part2:step-2-adding-a-flag-file-to-mark-successful-completion}\label{examples/bioinformatics/part2:examples-bioinformatics-part2-step2}\begin{quote}
+
+When pipelined tasks are interrupted half way through an operation, the output may
+only contain part of the results in an incomplete or inconsistent state.
+There are three general options to deal with this:
+\begin{enumerate}
+\item {}
+Catch any interrupting conditions and delete the incomplete output
+
+\item {}
+Tag successfully completed output with a special marker at the end of the file
+
+\item {}
+Create an empty ``flag'' file whose only point is to signal success
+
+\end{enumerate}
+
+Option (3) is the most reliable way and involves the least amount of work in Ruffus.
+We add flag files with the suffix \code{.blastSuccess} for our parallel BLAST jobs:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{splitFasta}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.blastResult}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.blastSuccess}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{runBlast}\PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+
+ \PYG{n}{blastResultFile}\PYG{p}{,} \PYG{n}{flag\PYGZus{}file} \PYG{o}{=} \PYG{n}{output\PYGZus{}files}
+
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Existing code unchanged}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{system}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{blastall \PYGZhy{}p blastp \PYGZhy{}d human.protein.faa }\PYG{l+s}{\PYGZdq{}}\PYG{o}{+}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}i }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{blastResultFile}\PYG{p}{)}\PYG{p}{)}
+
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} \PYGZdq{}touch\PYGZdq{} flag file to indicate success}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{flag\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Step 3. Allowing the script to be invoked on the command line}
+\label{examples/bioinformatics/part2:step-3-allowing-the-script-to-be-invoked-on-the-command-line}\begin{quote}
+
+We allow the query sequence file, as well as the sequence database and end results
+to be specified at runtime using the standard python \href{http://docs.python.org/library/optparse.html}{optparse} module.
+We find this approach to run time arguments generally useful for many Ruffus scripts.
+The full code can be {\hyperref[examples/bioinformatics/part2_code:examples-bioinformatics-part2-code]{\emph{viewed here}}} and
+downloaded from run\_parallel\_blast.py.
+
+The different options can be inspected by running the script with the \code{-{-}help} or \code{-h}
+argument.
+
+The following options are useful for developing Ruffus scripts:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+--verbose \textbar{} -v : Print more detailed messages for each additional verbose level.
+ E.g. run\_parallel\_blast --verbose --verbose --verbose ... (or -vvv)
+
+--jobs \textbar{} -j : Specifies the number of jobs (operations) to run in parallel.
+
+--flowchart FILE : Print flowchart of the pipeline to FILE. Flowchart format
+ depends on extension. Alternatives include (".dot", ".jpg",
+ "*.svg", "*.png" etc). Formats other than ".dot" require
+ the dot program to be installed (http://www.graphviz.org/).
+
+--just\_print \textbar{} -n Only print a trace (description) of the pipeline.
+ The level of detail is set by --verbose.
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+
+\subsection{Step 4. Printing out a flowchart for the pipeline}
+\label{examples/bioinformatics/part2:step-4-printing-out-a-flowchart-for-the-pipeline}\begin{quote}
+
+The \code{-{-}flowchart} argument results in a call to \code{pipeline\_printout\_graph(...)}
+This prints out a flowchart of the pipeline. Valid formats include ''.dot'', ''.jpg'', ''.svg'', ''.png''
+but all except for the first require the \code{dot} program to be installed
+(\href{http://www.graphviz.org/}{http://www.graphviz.org/}).
+
+The state of the pipeline is reflected in the flowchart:
+
+\includegraphics{examples_bioinformatics_pipeline.jpg}
+\end{quote}
+
+
+\subsection{Step 5. Errors}
+\label{examples/bioinformatics/part2:step-5-errors}\begin{quote}
+
+Because Ruffus scripts are just normal python functions, you can debug them using
+your usual tools, or jump to the offending line(s) even when the pipeline is running in
+parallel.
+
+For example, these are the what the error messages would look like if we had mis-spelt \code{blastal}.
+In {\hyperref[examples/bioinformatics/part2_code:examples-bioinformatics-part2-code]{\emph{run\_parallel\_blast.py}}},
+python exceptions are raised if the \code{blastall} command fails.
+
+Each of the exceptions for the parallel operations are printed out with the
+offending lines (line 204), and problems (\code{blastal} not found)
+highlighted in red.
+\begin{quote}
+
+\includegraphics{examples_bioinformatics_error.png}
+\end{quote}
+\end{quote}
+
+
+\subsection{Step 6. Will it run?}
+\label{examples/bioinformatics/part2:step-6-will-it-run}\begin{quote}
+
+The full code can be {\hyperref[examples/bioinformatics/part2_code:examples-bioinformatics-part2-code]{\emph{viewed here}}} and
+downloaded from run\_parallel\_blast.py.
+\end{quote}
+
+
+\section{Ruffus code}
+\label{examples/bioinformatics/part1_code:examples-bioinformatics-part1-code}\label{examples/bioinformatics/part1_code:ruffus-code}\label{examples/bioinformatics/part1_code::doc}
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{import} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{sys}
+
+\PYG{n}{exe\PYGZus{}path} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{argv}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{)}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}
+\PYG{n}{sys}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{insert}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{,} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{exe\PYGZus{}path}\PYG{p}{,}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{..}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{..}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{..}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+
+\PYG{n}{original\PYGZus{}fasta} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{original.fa}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{database\PYGZus{}file} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{human.protein.faa}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{original\PYGZus{}fasta}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{splitFasta} \PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{segments}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}Split sequence file into}
+\PYG{l+s+sd}{ as many fragments as appropriate}
+\PYG{l+s+sd}{ depending on the size of original\PYGZus{}fasta\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{current\PYGZus{}file\PYGZus{}index} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{original\PYGZus{}fasta}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} start a new file for each accession line}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{if} \PYG{n}{line}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZgt{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{current\PYGZus{}file\PYGZus{}index} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{current\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.segment}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{current\PYGZus{}file\PYGZus{}index}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{current\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{splitFasta}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.blastResult}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{runBlast}\PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{blastResultFile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}Run blast\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{system}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{blastall \PYGZhy{}p blastp \PYGZhy{}d }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}i }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}}
+ \PYG{p}{(}\PYG{n}{database\PYGZus{}file}\PYG{p}{,} \PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{blastResultFile}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{n+nd}{@merge}\PYG{p}{(}\PYG{n}{runBlast}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{final.blast\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combineBlastResults} \PYG{p}{(}\PYG{n}{blastResultFiles}\PYG{p}{,} \PYG{n}{combinedBlastResultFile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}Combine blast results\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{combinedBlastResultFile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n}{blastResultFiles}\PYG{p}{:}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Simulate interuption of the pipeline by}
+\PYG{c}{\PYGZsh{} deleting the output of one of the BLAST jobs}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{4.blastResult}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Re\PYGZhy{}running the pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{4}\PYG{p}{)}
+\end{Verbatim}
+
+
+\section{Ruffus code}
+\label{examples/bioinformatics/part2_code:ruffus-code}\label{examples/bioinformatics/part2_code:examples-bioinformatics-part2-code}\label{examples/bioinformatics/part2_code::doc}
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}!/usr/bin/env python}
+\PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+
+\PYG{l+s+sd}{ run\PYGZus{}parallel\PYGZus{}blast.py}
+\PYG{l+s+sd}{ [\PYGZhy{}\PYGZhy{}log\PYGZus{}file PATH]}
+\PYG{l+s+sd}{ [\PYGZhy{}\PYGZhy{}quiet]}
+
+\PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+
+\PYG{c}{\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} run\PYGZus{}parallel\PYGZus{}blast}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Copyright (c) 4/21/2010 Leo Goodstadt}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Permission is hereby granted, free of charge, to any person obtaining a copy}
+\PYG{c}{\PYGZsh{} of this software and associated documentation files (the \PYGZdq{}Software\PYGZdq{}), to deal}
+\PYG{c}{\PYGZsh{} in the Software without restriction, including without limitation the rights}
+\PYG{c}{\PYGZsh{} to use, copy, modify, merge, publish, distribute, sublicense, and/or sell}
+\PYG{c}{\PYGZsh{} copies of the Software, and to permit persons to whom the Software is}
+\PYG{c}{\PYGZsh{} furnished to do so, subject to the following conditions:}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} The above copyright notice and this permission notice shall be included in}
+\PYG{c}{\PYGZsh{} all copies or substantial portions of the Software.}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} THE SOFTWARE IS PROVIDED \PYGZdq{}AS IS\PYGZdq{}, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR}
+\PYG{c}{\PYGZsh{} IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,}
+\PYG{c}{\PYGZsh{} FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE}
+\PYG{c}{\PYGZsh{} AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER}
+\PYG{c}{\PYGZsh{} LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,}
+\PYG{c}{\PYGZsh{} OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN}
+\PYG{c}{\PYGZsh{} THE SOFTWARE.}
+\PYG{c}{\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh{}\PYGZsh [...]
+\PYG{k+kn}{import} \PYG{n+nn}{os}\PYG{o}{,} \PYG{n+nn}{sys}
+\PYG{n}{exe\PYGZus{}path} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{argv}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{)}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}
+\PYG{n}{sys}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{insert}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{exe\PYGZus{}path}\PYG{p}{,}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{..}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{..}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} options}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{k+kn}{from} \PYG{n+nn}{optparse} \PYG{k+kn}{import} \PYG{n}{OptionParser}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}\PYG{o}{,} \PYG{n+nn}{os}
+
+\PYG{n}{exe\PYGZus{}path} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{abspath}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{argv}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{)}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}
+
+
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{OptionParser}\PYG{p}{(}\PYG{n}{version}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZpc{}}\PYG{l+s}{prog 1.0}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{usage} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{ }\PYG{l+s}{\PYGZpc{}}\PYG{l+s}{prog \PYGZhy{}\PYGZhy{}input\PYGZus{}file QUERY\PYGZus{}FASTA \PYGZhy{}\PYGZhy{}database\PYGZus{}file FASTA\PYGZus{}DATABASE [more\PYGZus{}options]}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}i}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FILE}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{string}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Name and path of query sequence file in FASTA format. }\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}d}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}database\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{database\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FILE}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{string}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Name and path of FASTA database to search. }\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}result\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{result\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FILE}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{string}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{default}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{final.blast\PYGZus{}results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Name and path of where the files should end up. }\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}t}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}temp\PYGZus{}directory}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{temp\PYGZus{}directory}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{PATH}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{string}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{default}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tmp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Name and path of temporary directory where calculations }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{should take place. }\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} general options: verbosity / logging}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}v}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}verbose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{verbose}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{action}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{count}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{default}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Print more detailed messages for each additional verbose level.}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ E.g. run\PYGZus{}parallel\PYGZus{}blast \PYGZhy{}\PYGZhy{}verbose \PYGZhy{}\PYGZhy{}verbose \PYGZhy{}\PYGZhy{}verbose ... (or \PYGZhy{}vvv)}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} pipeline}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}j}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}jobs}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{jobs}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{default}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{jobs}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{int}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Specifies the number of jobs (operations) to run in parallel.}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}flowchart}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{flowchart}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FILE}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n+nb}{type}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{string}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Print flowchart of the pipeline to FILE. Flowchart format }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{depends on extension. Alternatives include (}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{.dot}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{, }\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{.jpg}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{, }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.svg}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{, }\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.png}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ etc). Formats other than }\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{.dot}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ require }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{the dot program to be installed (http://www.graphviz.org/).}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}option}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}just\PYGZus{}print}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{dest}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{just\PYGZus{}print}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{action}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{store\PYGZus{}true}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{default}\PYG{o}{=}\PYG{n+nb+bp}{False}\PYG{p}{,}
+ \PYG{n}{help}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Only print a trace (description) of the pipeline. }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ The level of detail is set by \PYGZhy{}\PYGZhy{}verbose.}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{p}{(}\PYG{n}{options}\PYG{p}{,} \PYG{n}{remaining\PYGZus{}args}\PYG{p}{)} \PYG{o}{=} \PYG{n}{parser}\PYG{o}{.}\PYG{n}{parse\PYGZus{}args}\PYG{p}{(}\PYG{p}{)}
+
+
+\PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{:}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{options}\PYG{o}{.}\PYG{n}{database\PYGZus{}file}\PYG{p}{:}
+ \PYG{n}{parser}\PYG{o}{.}\PYG{n}{error}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}t}\PYG{l+s}{Missing parameter \PYGZhy{}\PYGZhy{}database\PYGZus{}file FILE}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{options}\PYG{o}{.}\PYG{n}{input\PYGZus{}file}\PYG{p}{:}
+ \PYG{n}{parser}\PYG{o}{.}\PYG{n}{error}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}t}\PYG{l+s}{Missing parameter \PYGZhy{}\PYGZhy{}input\PYGZus{}file FILE}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} imports}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{subprocess}
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Functions}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{k}{def} \PYG{n+nf}{run\PYGZus{}cmd}\PYG{p}{(}\PYG{n}{cmd\PYGZus{}str}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ Throw exception if run command fails}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n}{process} \PYG{o}{=} \PYG{n}{subprocess}\PYG{o}{.}\PYG{n}{Popen}\PYG{p}{(}\PYG{n}{cmd\PYGZus{}str}\PYG{p}{,} \PYG{n}{stdout} \PYG{o}{=} \PYG{n}{subprocess}\PYG{o}{.}\PYG{n}{PIPE}\PYG{p}{,}
+ \PYG{n}{stderr} \PYG{o}{=} \PYG{n}{subprocess}\PYG{o}{.}\PYG{n}{PIPE}\PYG{p}{,} \PYG{n}{shell} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+ \PYG{n}{stdout\PYGZus{}str}\PYG{p}{,} \PYG{n}{stderr\PYGZus{}str} \PYG{o}{=} \PYG{n}{process}\PYG{o}{.}\PYG{n}{communicate}\PYG{p}{(}\PYG{p}{)}
+ \PYG{k}{if} \PYG{n}{process}\PYG{o}{.}\PYG{n}{returncode} \PYG{o}{!=} \PYG{l+m+mi}{0}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Failed to run }\PYG{l+s}{\PYGZsq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZsq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{Non\PYGZhy{}zero exit status }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}}
+ \PYG{p}{(}\PYG{n}{cmd\PYGZus{}str}\PYG{p}{,} \PYG{n}{stdout\PYGZus{}str}\PYG{p}{,} \PYG{n}{stderr\PYGZus{}str}\PYG{p}{,} \PYG{n}{process}\PYG{o}{.}\PYG{n}{returncode}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Logger}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{k+kn}{import} \PYG{n+nn}{logging}
+\PYG{n}{logger} \PYG{o}{=} \PYG{n}{logging}\PYG{o}{.}\PYG{n}{getLogger}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{run\PYGZus{}parallel\PYGZus{}blast}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} We are interesting in all messages}
+\PYG{c}{\PYGZsh{}}
+\PYG{k}{if} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{setLevel}\PYG{p}{(}\PYG{n}{logging}\PYG{o}{.}\PYG{n}{DEBUG}\PYG{p}{)}
+ \PYG{n}{stderrhandler} \PYG{o}{=} \PYG{n}{logging}\PYG{o}{.}\PYG{n}{StreamHandler}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{p}{)}
+ \PYG{n}{stderrhandler}\PYG{o}{.}\PYG{n}{setFormatter}\PYG{p}{(}\PYG{n}{logging}\PYG{o}{.}\PYG{n}{Formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ }\PYG{l+s+si}{\PYGZpc{}(message)s}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{stderrhandler}\PYG{o}{.}\PYG{n}{setLevel}\PYG{p}{(}\PYG{n}{logging}\PYG{o}{.}\PYG{n}{DEBUG}\PYG{p}{)}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{addHandler}\PYG{p}{(}\PYG{n}{stderrhandler}\PYG{p}{)}
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Pipeline tasks}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{n}{original\PYGZus{}fasta} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{input\PYGZus{}file}
+\PYG{n}{database\PYGZus{}file} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{database\PYGZus{}file}
+\PYG{n}{temp\PYGZus{}directory} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{temp\PYGZus{}directory}
+\PYG{n}{result\PYGZus{}file} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{result\PYGZus{}file}
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{mkdir}\PYG{p}{(}\PYG{n}{temp\PYGZus{}directory}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n}{original\PYGZus{}fasta}\PYG{p}{,} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{temp\PYGZus{}directory}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{splitFasta} \PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{segments}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}Split sequence file into}
+\PYG{l+s+sd}{ as many fragments as appropriate}
+\PYG{l+s+sd}{ depending on the size of original\PYGZus{}fasta\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} Clean up any segment files from previous runs before creating new one}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n}{segments}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{current\PYGZus{}file\PYGZus{}index} \PYG{o}{=} \PYG{l+m+mi}{0}
+ \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{original\PYGZus{}fasta}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} start a new file for each accession line}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{if} \PYG{n}{line}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{==} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZgt{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{:}
+ \PYG{n}{current\PYGZus{}file\PYGZus{}index} \PYG{o}{+}\PYG{o}{=} \PYG{l+m+mi}{1}
+ \PYG{n}{file\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.segment}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{current\PYGZus{}file\PYGZus{}index}
+ \PYG{n}{file\PYGZus{}path} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{temp\PYGZus{}directory}\PYG{p}{,} \PYG{n}{file\PYGZus{}name}\PYG{p}{)}
+ \PYG{n}{current\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{file\PYGZus{}path}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n}{current\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{line}\PYG{p}{)}
+
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{splitFasta}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.segment}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.blastResult}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.blastSuccess}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{runBlast}\PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{blastResultFile}\PYG{p}{,} \PYG{n}{flag\PYGZus{}file} \PYG{o}{=} \PYG{n}{output\PYGZus{}files}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{run\PYGZus{}cmd}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{blastall \PYGZhy{}p blastp \PYGZhy{}d human.protein.faa \PYGZhy{}i }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{seqFile}\PYG{p}{,} \PYG{n}{blastResultFile}\PYG{p}{)}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} \PYGZdq{}touch\PYGZdq{} flag file to indicate success}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{flag\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{n+nd}{@merge}\PYG{p}{(}\PYG{n}{runBlast}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combineBlastResults} \PYG{p}{(}\PYG{n}{blastResult\PYGZus{}and\PYGZus{}flag\PYGZus{}Files}\PYG{p}{,} \PYG{n}{combinedBlastResultFile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}Combine blast results\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{n}{output\PYGZus{}file} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{combinedBlastResultFile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{blastResult\PYGZus{}file}\PYG{p}{,} \PYG{n}{flag\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{blastResult\PYGZus{}and\PYGZus{}flag\PYGZus{}Files}\PYG{p}{:}
+ \PYG{n}{output\PYGZus{}file}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{blastResult\PYGZus{}file}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+
+
+
+
+
+
+
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Print list of tasks}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{k}{if} \PYG{n}{options}\PYG{o}{.}\PYG{n}{just\PYGZus{}print}\PYG{p}{:}
+ \PYG{n}{pipeline\PYGZus{}printout}\PYG{p}{(}\PYG{n}{sys}\PYG{o}{.}\PYG{n}{stdout}\PYG{p}{,} \PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Print flowchart}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{k}{elif} \PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} use file extension for output format}
+ \PYG{n}{output\PYGZus{}format} \PYG{o}{=} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{splitext}\PYG{p}{(}\PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{)}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{:}\PYG{p}{]}
+ \PYG{n}{pipeline\PYGZus{}printout\PYGZus{}graph} \PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{options}\PYG{o}{.}\PYG{n}{flowchart}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{n}{output\PYGZus{}format}\PYG{p}{,}
+ \PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{no\PYGZus{}key\PYGZus{}legend} \PYG{o}{=} \PYG{n+nb+bp}{True}\PYG{p}{)}
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+
+\PYG{c}{\PYGZsh{} Run Pipeline}
+
+\PYG{c}{\PYGZsh{}88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888}
+\PYG{k}{else}\PYG{p}{:}
+ \PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{combineBlastResults}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{n}{options}\PYG{o}{.}\PYG{n}{jobs}\PYG{p}{,}
+ \PYG{n}{logger} \PYG{o}{=} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+\end{Verbatim}
+
+
+\section{Example code for \emph{FAQ Good practices: ``What is the best way of handling data in file pairs (or triplets etc.)?''}}
+\label{examples/paired_end_data.py:example-code-for-faq-good-practices-what-is-the-best-way-of-handling-data-in-file-pairs-or-triplets-etc}\label{examples/paired_end_data.py:faq-paired-files-code}\label{examples/paired_end_data.py::doc}\begin{quote}
+
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-collate]{\emph{@collate}}}
+
+\end{itemize}
+
+
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}!/usr/bin/env python}
+\PYG{k+kn}{import} \PYG{n+nn}{sys}\PYG{o}{,} \PYG{n+nn}{os}
+
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{ruffus.cmdline} \PYG{k+kn}{as} \PYG{n+nn}{cmdline}
+\PYG{k+kn}{from} \PYG{n+nn}{subprocess} \PYG{k+kn}{import} \PYG{n}{check\PYGZus{}call}
+
+\PYG{n}{parser} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{get\PYGZus{}argparse}\PYG{p}{(}\PYG{n}{description}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Parimala}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{s pipeline?}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} .}
+\PYG{c}{\PYGZsh{} Very flexible handling of input files .}
+\PYG{c}{\PYGZsh{} .}
+\PYG{c}{\PYGZsh{} input files can be specified flexibly as: .}
+\PYG{c}{\PYGZsh{} \PYGZhy{}\PYGZhy{}input a.fastq b.fastq .}
+\PYG{c}{\PYGZsh{} \PYGZhy{}\PYGZhy{}input a.fastq \PYGZhy{}\PYGZhy{}input b.fastq .}
+\PYG{c}{\PYGZsh{} \PYGZhy{}\PYGZhy{}input *.fastq \PYGZhy{}\PYGZhy{}input other/*.fastq .}
+\PYG{c}{\PYGZsh{} \PYGZhy{}\PYGZhy{}input \PYGZdq{}*.fastq\PYGZdq{} .}
+\PYG{c}{\PYGZsh{} .}
+\PYG{c}{\PYGZsh{} The last form is expanded in the script and avoids limitations on command .}
+\PYG{c}{\PYGZsh{} line lengths .}
+\PYG{c}{\PYGZsh{} .}
+\PYG{n}{parser}\PYG{o}{.}\PYG{n}{add\PYGZus{}argument}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZhy{}i}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZhy{}\PYGZhy{}input}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{nargs}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{+}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{metavar}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{FILE}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{action}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{append}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} [...]
+
+\PYG{n}{options} \PYG{o}{=} \PYG{n}{parser}\PYG{o}{.}\PYG{n}{parse\PYGZus{}args}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} standard python logger which can be synchronised across concurrent Ruffus tasks}
+\PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex} \PYG{o}{=} \PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{setup\PYGZus{}logging} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{PARIMALA}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{log\PYGZus{}file}\PYG{p}{,} \PYG{n}{options}\PYG{o}{.}\PYG{n}{verbose}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} .}
+\PYG{c}{\PYGZsh{} Useful code to turn input files into a flat list .}
+\PYG{c}{\PYGZsh{} .}
+\PYG{k+kn}{from} \PYG{n+nn}{glob} \PYG{k+kn}{import} \PYG{n}{glob}
+\PYG{n}{original\PYGZus{}data\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{n}{fn} \PYG{k}{for} \PYG{n}{grouped} \PYG{o+ow}{in} \PYG{n}{options}\PYG{o}{.}\PYG{n}{input} \PYG{k}{for} \PYG{n}{glob\PYGZus{}spec} \PYG{o+ow}{in} \PYG{n}{grouped} \PYG{k}{for} \PYG{n}{fn} \PYG{o+ow}{in} \PYG{n}{glob}\PYG{p}{(}\PYG{n}{glob\PYGZus{}spec}\PYG{p}{)}\PYG{p}{]} \PYG{k}{if} \PYG{n}{options}\PYG{o}{.}\PYG{n}{input} \PYG{k}{else} \PYG{p}{[}\PYG{p}{]}
+\PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{original\PYGZus{}data\PYGZus{}files}\PYG{p}{:}
+ \PYG{n}{original\PYGZus{}data\PYGZus{}files} \PYG{o}{=} \PYG{p}{[}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{C1W1\PYGZus{}R1.fastq.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{C1W1\PYGZus{}R2.fastq.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{]}
+ \PYG{c}{\PYGZsh{}raise Exception (\PYGZdq{}No matching files specified with \PYGZhy{}\PYGZhy{}input.\PYGZdq{})}
+
+\PYG{c}{\PYGZsh{} \PYGZlt{}\PYGZlt{}\PYGZlt{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{} pipelined functions go here}
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{} .}
+\PYG{c}{\PYGZsh{} Group together file pairs .}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{original\PYGZus{}data\PYGZus{}files}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} match file name up to the \PYGZdq{}R1.fastq.gz\PYGZdq{}}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{([\PYGZca{}/]+)R[12].fastq.gz\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} Create output parameter supplied to next task}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}paired.R1.fastq.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} paired file 1}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}paired.R2.fastq.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} paired file 2}
+ \PYG{c}{\PYGZsh{} Extra parameters for our own convenience and use}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}unpaired.R1.fastq.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} unpaired file 1}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}unpaired.R2.fastq.gz}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} unpaired file 2}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{trim\PYGZus{}fastq}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}paired\PYGZus{}files}\PYG{p}{,} \PYG{n}{discarded\PYGZus{}unpaired\PYGZus{}files}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{)} \PYG{o}{!=} \PYG{l+m+mi}{2}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{One of read pairs }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ missing}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{java \PYGZhy{}jar \PYGZti{}/SPRING\PYGZhy{}SUMMER\PYGZus{}2014/Softwares/Trimmomatic/Trimmomatic\PYGZhy{}0.32/trimmomatic\PYGZhy{}0.32.jar }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ PE \PYGZhy{}phred33 }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZob{}input\PYGZus{}files[0]\PYGZcb{} \PYGZob{}input\PYGZus{}files[1]\PYGZcb{} }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZob{}output\PYGZus{}paired\PYGZus{}files[0]\PYGZcb{} \PYGZob{}output\PYGZus{}paired\PYGZus{}files[1]\PYGZcb{} }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZob{}discarded\PYGZus{}unpaired\PYGZus{}files[0]\PYGZcb{} \PYGZob{}discarded\PYGZus{}unpaired\PYGZus{}files[1]\PYGZcb{} }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ LEADING:30 TRAILING:30 SLIDINGWINDOW:4:15 MINLEN:50 }\PYG{l+s}{\PYGZdq{}}
+ \PYG{p}{)}
+
+ \PYG{n}{check\PYGZus{}call}\PYG{p}{(}\PYG{n}{cmd}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+ \PYG{k}{with} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Hooray trim\PYGZus{}fastq worked}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{c}{\PYGZsh{} .}
+\PYG{c}{\PYGZsh{} Each file pair now makes its way down the rest of the pipeline as .}
+\PYG{c}{\PYGZsh{} a couple .}
+\PYG{c}{\PYGZsh{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus{}\PYGZus [...]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{trim\PYGZus{}fastq}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} regular expression match on first of pe files}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{([\PYGZca{}/]+)paired.R1.fastq.gz\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,}
+ \PYG{c}{\PYGZsh{} Output parameter supplied to next task}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}.sam}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{c}{\PYGZsh{} Extra parameters for our own convenience and use}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}.pe\PYGZus{}soap\PYGZus{}pe}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} soap intermediate file}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}1[0]\PYGZcb{}.pe\PYGZus{}soap\PYGZus{}se}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} soap intermediate file}
+ \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{align\PYGZus{}seq}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{soap\PYGZus{}pe\PYGZus{}output\PYGZus{}file}\PYG{p}{,} \PYG{n}{soap\PYGZus{}se\PYGZus{}output\PYGZus{}file}\PYG{p}{,} \PYG{n}{logger}\PYG{p}{,} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{)} \PYG{o}{!=} \PYG{l+m+mi}{2}\PYG{p}{:}
+ \PYG{k}{raise} \PYG{n+ne}{Exception}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{One of read pairs }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ missing}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,}\PYG{p}{)}\PYG{p}{)}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZti{}/SPRING\PYGZhy{}SUMMER\PYGZus{}2014/Softwares/soap2.21release/soap }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{}a \PYGZob{}input\PYGZus{}files[0]\PYGZcb{} }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{}b \PYGZob{}input\PYGZus{}files[1]\PYGZcb{} }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{}D Y55\PYGZus{}genome.fa.index* }\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{}o \PYGZob{}soap\PYGZus{}pe\PYGZus{}output\PYGZus{}file\PYGZcb{} \PYGZhy{}2 \PYGZob{}soap\PYGZus{}se\PYGZus{}output\PYGZus{}file\PYGZcb{} \PYGZhy{}m 400 \PYGZhy{}x 600}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+ \PYG{n}{check\PYGZus{}call}\PYG{p}{(}\PYG{n}{cmd}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+
+ \PYG{c}{\PYGZsh{}Soap\PYGZus{}to\PYGZus{}sam}
+ \PYG{n}{cmd} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ perl \PYGZti{}/SPRING\PYGZhy{}SUMMER\PYGZus{}2014/Softwares/soap2sam.pl \PYGZhy{}p \PYGZob{}soap\PYGZus{}pe\PYGZus{}output\PYGZus{}file\PYGZcb{} \PYGZgt{} \PYGZob{}output\PYGZus{}file\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}
+
+ \PYG{n}{check\PYGZus{}call}\PYG{p}{(}\PYG{n}{cmd}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+
+
+ \PYG{k}{with} \PYG{n}{logger\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{logger}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Hooray align\PYGZus{}seq worked}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{n}{cmdline}\PYG{o}{.}\PYG{n}{run} \PYG{p}{(}\PYG{n}{options}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\chapter{Reference:}
+\label{contents:reference}
+
+\section{Decorators}
+\label{contents:decorators}
+
+\subsection{Ruffus Decorators}
+\label{decorators/decorators:glob}\label{decorators/decorators::doc}\label{decorators/decorators:ruffus-decorators}
+
+\strong{See also:}
+
+
+{\hyperref[decorators/indicator_objects:decorators-indicator-objects]{\emph{Indicator objects}}}
+
+
+
+
+\subsubsection{\emph{Core}}
+\label{decorators/decorators:core}\label{decorators/decorators:decorators}
+\begin{tabular}{|p{0.317\linewidth}|p{0.317\linewidth}|p{0.317\linewidth}|}
+\hline
+\textbf{\relax
+Decorator
+} & \textbf{\relax
+Examples
+} & \textbf{\relax }\\\hline
+
+\textbf{@originate} ({\hyperref[decorators/originate:decorators-originate]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/originate:new-manual-originate]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Creates (originates) a set of starting file without dependencies from scratch (\emph{ex nihilo}!)
+
+\item {}
+Only called to create files which do not exist.
+
+\item {}
+Invoked onces (a job created) per item in the \code{output\_files} list.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/originate:decorators-originate]{\emph{@originate}}} ( \code{output\_files}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@split} ({\hyperref[decorators/split:decorators-split]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/split:new-manual-split]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Splits a single input into multiple output
+
+\item {}
+Globs in \code{output} can specify an indeterminate number of files.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/split:decorators-split]{\emph{@split}}} ( \code{tasks\_or\_file\_names}, \code{output\_files}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@transform} ({\hyperref[decorators/transform:decorators-transform]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/transform:new-manual-transform]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Applies the task function to transform input data to output.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-suffix-string]{\emph{suffix}}}\emph{(}\code{suffix\_string}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-matching-formatter]{\emph{formatter}}}\emph{(}\code{regex\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@merge} ({\hyperref[decorators/merge:decorators-merge]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/merge:new-manual-merge]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Merges multiple input files into a single output.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} (\code{tasks\_or\_file\_names}, \code{output}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+\end{tabular}
+
+
+
+\subsubsection{\emph{Combinatorics}}
+\label{decorators/decorators:combinatorics}\label{decorators/decorators:decorators-combinatorics}
+\begin{tabular}{|p{0.317\linewidth}|p{0.317\linewidth}|p{0.317\linewidth}|}
+\hline
+\textbf{\relax
+Decorator
+} & \textbf{\relax
+Examples
+} & \textbf{\relax }\\\hline
+
+\textbf{@product} ({\hyperref[decorators/product:decorators-product]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-product]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Generates the \textbf{product}, i.e. all vs all comparisons, between sets of input files.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/product:decorators-product]{\emph{@product}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/product:decorators-product-matching-formatter]{\emph{formatter}}} \emph{({[}} \code{regex\_pattern} \emph{{]})} ,*{[}* \code{tasks\_or\_file\_names}, {\hyperref[decorators/product:decorators-product-matching-formatter]{\emph{formatter}}} \emph{({[}} \code{regex\_pattern} \emph{{]}), {]}}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@permutations} ({\hyperref[decorators/permutations:decorators-permutations]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-permutations]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Generates the \textbf{permutations}, between all the elements of a set of \textbf{Input}
+
+\item {}
+Analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.permutations}{itertools.permutations}
+
+\item {}
+permutations(`ABCD', 2) --\textgreater{} AB AC AD BA BC BD CA CB CD DA DB DC
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/product:decorators-product-matching-formatter]{\emph{formatter}}} \emph{({[}} \code{regex\_pattern} \emph{{]})}, \code{tuple\_size}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@combinations} ({\hyperref[decorators/combinations:decorators-combinations]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinations]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Generates the \textbf{permutations}, between all the elements of a set of \textbf{Input}
+
+\item {}
+Analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.permutations}{itertools.combinations}
+
+\item {}
+combinations(`ABCD', 3) --\textgreater{} ABC ABD ACD BCD
+
+\item {}
+Generates the \textbf{combinations}, between all the elements of a set of \textbf{Input}:
+i.e. r-length tuples of \emph{input} elements with no repeated elements (\textbf{A A})
+and where order of the tuples is irrelevant (either \textbf{A B} or \textbf{B A}, not both).
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/permutations:decorators-permutations]{\emph{@combinations}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/product:decorators-product-matching-formatter]{\emph{formatter}}} \emph{({[}} \code{regex\_pattern} \emph{{]})}, \code{tuple\_size}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@combinations\_with\_replacement} ({\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/combinatorics:new-manual-combinations-with-replacement]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Generates the \textbf{permutations}, between all the elements of a set of \textbf{Input}
+
+\item {}
+Analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.permutations}{itertools.permutations}
+
+\item {}
+combinations(`ABCD', 3) --\textgreater{} ABC ABD ACD BCD
+
+\item {}
+Generates the \textbf{combinations\_with\_replacement}, between all the elements of a set of \textbf{Input}:
+i.e. r-length tuples of \emph{input} elements with no repeated elements (\textbf{A A})
+and where order of the tuples is irrelevant (either \textbf{A B} or \textbf{B A}, not both).
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/permutations:decorators-permutations]{\emph{@combinations\_with\_replacement}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/product:decorators-product-matching-formatter]{\emph{formatter}}} \emph{({[}} \code{regex\_pattern} \emph{{]})}, \code{tuple\_size}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+\end{tabular}
+
+
+
+\subsubsection{\emph{Advanced}}
+\label{decorators/decorators:advanced}\begin{quote}
+
+\begin{tabular}{|p{0.317\linewidth}|p{0.317\linewidth}|p{0.317\linewidth}|}
+\hline
+\textbf{\relax
+Decorator
+} & \textbf{\relax
+Examples
+} & \textbf{\relax }\\\hline
+
+\textbf{@subdivide} ({\hyperref[decorators/subdivide:decorators-subdivide]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-subdivide]{\emph{Manual}}})
+- Subdivides a set of \emph{Inputs} each further into multiple \emph{Outputs}.
+- The number of files in each \emph{Output} can be set at runtime by the use of globs.
+- \textbf{Many to Even More} operator.
+- The use of \textbf{split} is a synonym for subdivide is deprecated.
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/subdivide:decorators-subdivide-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, {[} {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} \textbar{} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}\emph{(}\code{input\_pattern}\emph{)}, {]} \code{output\_pattern}, {[}\code{extra\_ [...]
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/subdivide:decorators-subdivide-matching-formatter]{\emph{formatter}}}\emph{(}{[}\code{regex\_pattern}{]} \emph{)}, {[} {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} \textbar{} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}\emph{(}\code{input\_pattern}\emph{)}, {]} \code{output\_pattern}, { [...]
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@transform} ({\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/inputs:new-manual-inputs]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Infers input as well as output from regular expression substitutions
+
+\item {}
+Useful for adding additional file dependencies
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, {[} {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} \textbar{} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}\emph{(}\code{input\_pattern}\emph{)}, {]} \code{output\_pattern}, {[}\cod [...]
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@transform}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/transform_ex:decorators-transform-matching-formatter]{\emph{formatter}}}\emph{(}\code{regex\_pattern}\emph{)}, {[} {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} \textbar{} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}\emph{(}\code{input\_pattern}\emph{)}, {]} \code{output\_pattern}, [...]
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@collate} ({\hyperref[decorators/collate:decorators-collate]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/subdivide_collate:new-manual-collate]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Groups multiple input files using regular expression matching
+
+\item {}
+Input resulting in the same output after substitution will be collated together.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} (\code{tasks\_or\_file\_names}, {\hyperref[decorators/collate:decorators-collate-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/collate_ex:decorators-collate-ex]{\emph{@collate}}} (\code{tasks\_or\_file\_names}, {\hyperref[decorators/collate_ex:decorators-collate-ex-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} \textbar{} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}\emph{(}\code{input\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{extra\_paramet [...]
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} (\code{tasks\_or\_file\_names}, {\hyperref[decorators/collate:decorators-collate-matching-formatter]{\emph{formatter}}}\emph{(}\code{formatter\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{extra\_parameters},...{]} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/collate_ex:decorators-collate-ex]{\emph{@collate}}} (\code{tasks\_or\_file\_names}, {\hyperref[decorators/collate_ex:decorators-collate-ex-matching-formatter]{\emph{formatter}}}\emph{(}\code{formatter\_pattern}\emph{)}, {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} \textbar{} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}\emph{(}\code{input\_pattern}\emph{)}, \code{output\_pattern}, {[}\code{ex [...]
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@follows} ({\hyperref[decorators/follows:decorators-follows]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Indicates task dependency
+
+\item {}
+optional {\hyperref[decorators/follows:decorators-follows-directory-name]{\emph{mkdir}}} prerequisite ({\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows-mkdir]{\emph{see Manual}}})
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}} ( \code{task1}, \code{'task2'} ))}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}} ( \code{task1}, {\hyperref[decorators/follows:decorators-follows-directory-name]{\emph{mkdir}}}( \code{'my/directory/'} ))}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@posttask} ({\hyperref[decorators/posttask:decorators-posttask]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/posttask:new-manual-posttask]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Calls function after task completes
+
+\item {}
+Optional {\hyperref[decorators/posttask:decorators-posttask-file-name]{\emph{touch\_file}}} indicator ({\hyperref[tutorials/new_tutorial/posttask:new-manual-posttask-touch-file]{\emph{Manual}}})
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} ( \code{signal\_task\_completion\_function} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}} ({\hyperref[decorators/indicator_objects:decorators-touch-file]{\emph{touch\_file}}}( \code{'task1.completed'} ))}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@active\_if} ({\hyperref[decorators/active_if:decorators-active-if]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/active_if:new-manual-active-if]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Switches tasks on and off at run time depending on its parameters
+
+\item {}
+Evaluated each time {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(...)}}}, {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout]{\emph{pipeline\_printout(...)}}} or {\hyperref[pipeline_functions:pipeline-functions-pipeline-printout-graph]{\emph{pipeline\_printout\_graph(...)}}} is called.
+
+\item {}
+Dormant tasks behave as if they are up to date and have no output.
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/active_if:decorators-active-if]{\emph{@active\_if}}} ( \code{on\_or\_off1, {[}on\_or\_off2, ...{]}} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@jobs\_limit} ({\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/multiprocessing:new-manual-jobs-limit]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Limits the amount of multiprocessing for the specified task
+
+\item {}
+Ensures that fewer than N jobs for this task are run in parallel
+
+\item {}
+Overrides \code{multiprocess} parameter in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run(...)}}}
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/jobs_limit:decorators-jobs-limit]{\emph{@jobs\_limit}}} ( \code{NUMBER\_OF\_JOBS\_RUNNING\_CONCURRENTLY} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@mkdir} ({\hyperref[decorators/mkdir:decorators-mkdir]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/mkdir:new-manual-mkdir]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Generates paths for \href{http://docs.python.org/2/library/os.html\#os.makedirs}{os.makedirs}
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/mkdir:decorators-mkdir]{\emph{@mkdir}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/mkdir:decorators-mkdir-suffix-string]{\emph{suffix}}}\emph{(}\code{suffix\_string}\emph{)}, \code{output\_pattern} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/mkdir:decorators-mkdir]{\emph{@mkdir}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/mkdir:decorators-mkdir-matching-regex]{\emph{regex}}}\emph{(}\code{regex\_pattern}\emph{)}, \code{output\_pattern} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/mkdir:decorators-mkdir]{\emph{@mkdir}}} ( \code{tasks\_or\_file\_names}, {\hyperref[decorators/mkdir:decorators-mkdir-matching-formatter]{\emph{formatter}}}\emph{(}\code{regex\_pattern}\emph{)}, \code{output\_pattern})}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@graphviz} ({\hyperref[decorators/graphviz:decorators-graphviz]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/pipeline_printout_graph:new-manual-pipeline-printout-graph]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Customise the graphic for each task in printed flowcharts
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/graphviz:decorators-graphviz]{\emph{@graphviz}}} ( \code{graphviz\_parameter = XXX}, \code{{[}graphviz\_parameter2 = YYY ...{]}})}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+\end{tabular}
+
+\end{quote}
+
+
+\subsubsection{\emph{Esoteric!}}
+\label{decorators/decorators:esoteric}\begin{quote}
+
+\begin{tabular}{|p{0.317\linewidth}|p{0.317\linewidth}|p{0.317\linewidth}|}
+\hline
+\textbf{\relax
+Decorator
+} & \textbf{\relax
+Examples
+} & \textbf{\relax }\\\hline
+
+\textbf{@files} ({\hyperref[decorators/files:decorators-files]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/deprecated_files:new-manual-deprecated-files]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+I/O parameters
+
+\item {}
+skips up-to-date jobs
+
+\item {}
+Should use {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}} etc instead
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/files:decorators-files]{\emph{@files}}}( \code{parameter\_list} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/files:decorators-files]{\emph{@files}}}( \code{parameter\_generating\_function} )}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/files:decorators-files]{\emph{@files}}} ( \code{input\_file}, \code{output\_file}, \code{other\_params}, ... )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@parallel} ({\hyperref[decorators/parallel:decorators-parallel]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/parallel:new-manual-deprecated-parallel]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+By default, does not check if jobs are up to date
+
+\item {}
+Best used in conjuction with {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate}}}
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel}}} ( \code{parameter\_list} ) ({\hyperref[tutorials/new_tutorial/parallel:new-manual-deprecated-parallel]{\emph{see Manual}}})}] \leavevmode
+
+
+\end{description}
+
+\item {} \begin{description}
+\item[{{\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel}}} ( \code{parameter\_generating\_function} ) ({\hyperref[tutorials/new_tutorial/onthefly:new-manual-on-the-fly]{\emph{see Manual}}})}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\textbf{@check\_if\_uptodate} ({\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{Summary}}} / {\hyperref[tutorials/new_tutorial/check_if_uptodate:new-manual-check-if-uptodate]{\emph{Manual}}})
+\begin{itemize}
+\item {}
+Custom function to determine if jobs need to be run
+
+\end{itemize}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate}}} ( \code{is\_task\_up\_to\_date\_function} )}] \leavevmode
+
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+
+\begin{notice}{tip}{Tip:}\begin{description}
+\item[{The use of this overly complicated function is discouraged.}] \leavevmode
+\textbf{@files\_re} ({\hyperref[decorators/files_re:decorators-files-re]{\emph{Summary}}})
+\begin{itemize}
+\item {}
+I/O file names via regular
+expressions
+
+\item {}
+start from lists of file names
+or \href{http://docs.python.org/library/glob.html}{\emph{glob}} results
+
+\item {}
+skips up-to-date jobs
+
+\end{itemize}
+
+\end{description}
+\end{notice}
+ & \begin{itemize}
+\item {} \begin{description}
+\item[{{\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re}}} ( \code{tasks\_or\_file\_names}, \code{matching\_regex}, {[}\code{input\_pattern},{]} \code{output\_pattern}, \code{...} )}] \leavevmode
+\code{input\_pattern}/\code{output\_pattern} are regex patterns
+used to create input/output file names from the starting
+list of either glob\_str or file names
+
+\end{description}
+
+\end{itemize}
+ & \\\hline
+\end{tabular}
+
+\end{quote}
+
+
+\strong{See also:}
+
+
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}}
+
+
+
+\index{Indicator Object (Disambiguating parameters)}
+
+\subsection{Indicator Objects}
+\label{decorators/indicator_objects:decorators-indicator-objects}\label{decorators/indicator_objects:index-0}\label{decorators/indicator_objects::doc}\label{decorators/indicator_objects:indicator-objects}\begin{quote}
+
+How \emph{ruffus} disambiguates certain parameters to decorators.
+
+They are like \href{http://docs.python.org/tutorial/controlflow.html\#keyword-arguments}{keyword arguments} in python, a little more verbose but they make the syntax much simpler.
+
+Indicator objects are also ``self-documenting'' so you can see
+exactly what is happening clearly.
+\end{quote}
+
+\index{formatter!Indicator Object (Disambiguating parameters)}\index{Indicator Object (Disambiguating parameters)!formatter}
+
+\subsubsection{\emph{formatter}}
+\label{decorators/indicator_objects:decorators-formatter}\label{decorators/indicator_objects:formatter}\label{decorators/indicator_objects:index-1}\begin{quote}
+
+\textbf{formatter({[}} \code{regex \textbar{} None} \textbf{, regex \textbar{} None...{]})}
+\begin{itemize}
+\item {}
+The optional enclosed parameters are a python regular expression strings
+
+\item {}
+Each regular expression matches a corresponding \emph{Input} file name string
+
+\item {}
+\emph{formatter} parses each file name string into path and regular expression components
+
+\item {}
+Parsing fails altogether if the regular expression is not matched
+
+\end{itemize}
+
+Path components include:
+\begin{itemize}
+\item {}
+\code{basename}: The \href{http://docs.python.org/2/library/os.path.html\#os.path.basename}{base name} \emph{excluding} \href{http://docs.python.org/2/library/os.path.html\#os.path.splitext}{extension}, \code{"file.name"}
+
+\item {}
+\code{ext} : The \href{http://docs.python.org/2/library/os.path.html\#os.path.splitext}{extension}, \code{".ext"}
+
+\item {}
+\code{path} : The \href{http://docs.python.org/2/library/os.path.html\#os.path.dirname}{dirname}, \code{"/directory/to/a"}
+
+\item {}
+\code{subdir} : A list of sub-directories in the \code{path} in reverse order, \code{{[}"a", "to", "directory", "/"{]}}
+
+\item {}
+\code{subpath} : A list of descending sub-paths in reverse order, \code{{[}"/directory/to/a", "/directory/to", "/directory", "/"{]}}
+
+\end{itemize}
+
+The replacement string refers to these components using python \href{http://docs.python.org/2/library/string.html\#string-formatting}{string.format} style curly braces. \code{\{NAME\}}
+
+We refer to an element from the Nth input string by index, for example:
+\begin{itemize}
+\item {}
+\code{"\{ext{[}0{]}\}"} is the extension of the first input string.
+
+\item {}
+\code{"\{basename{[}1{]}\}"} is the basename of the second input string.
+
+\item {}
+\code{"\{basename{[}1{]}{[}0:3{]}\}"} are the first three letters from the basename of the second input string.
+
+\end{itemize}
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+
+\item {}
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}}
+
+\item {}
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}}
+
+\item {}
+{\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}}
+
+\item {}
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}}
+
+\item {}
+{\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}}
+
+\end{itemize}
+
+\end{description}
+
+\textbf{@transform example}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} create initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job1.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job2.b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{job3.c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} formatter}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}file\PYGZus{}pairs}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job(?P\PYGZlt{}JOBNUMBER\PYGZgt{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{d+).a.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extract job number}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.+/job[123].b.start}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} Match only \PYGZdq{}b\PYGZdq{} files}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Replacement list}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[1]\PYGZcb{}/jobs\PYGZob{}JOBNUMBER[0]\PYGZcb{}.output.b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{first\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}files}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameters = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameters}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This produces:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+go}{input\PYGZus{}parameters = [\PYGZsq{}job1.a.start\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}job1.b.start\PYGZsq{}]}
+\PYG{g+go}{output\PYGZus{}parameters = [\PYGZsq{}/home/lg/src/temp/jobs1.output.a.1\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}/home/lg/src/temp/jobs1.output.b.1\PYGZsq{}, 45]}
+
+\PYG{g+go}{input\PYGZus{}parameters = [\PYGZsq{}job2.a.start\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}job2.b.start\PYGZsq{}]}
+\PYG{g+go}{output\PYGZus{}parameters = [\PYGZsq{}/home/lg/src/temp/jobs2.output.a.1\PYGZsq{},}
+\PYG{g+go}{ \PYGZsq{}/home/lg/src/temp/jobs2.output.b.1\PYGZsq{}, 45]}
+\end{Verbatim}
+\end{quote}
+
+\textbf{@permutations example}:
+\begin{quote}
+
+Combinatoric decorators such as {\hyperref[decorators/product:decorators-product]{\emph{@product}}} or
+{\hyperref[decorators/permutations:decorators-permutations]{\emph{@product}}} behave much
+like nested for loops in enumerating, combining, and permutating the original sets
+of inputs.
+
+The replacement strings require an extra level of indirection to refer to
+parsed components:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} create initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{c.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy{}\PYGZhy [...]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} formatter}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@permutations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file in permutations}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/\PYGZob{}basename[0][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}\PYGZob{}basename[1][0]\PYGZcb{}.product}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} path for 1st set of files, 1st file name}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)} \PYG{c}{\PYGZsh{} basename for 2nd set of files, 1st file name}
+\PYG{k}{def} \PYG{n+nf}{product\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}file}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{shared\PYGZus{}path = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{basenames = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{basenames}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This produces:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}b.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/src/oss/ruffus/a\PYGZus{}vs\PYGZus{}b.product}
+\PYG{g+go}{shared\PYGZus{}path = /home/lg/src/oss/ruffus}
+\PYG{g+go}{basenames = [\PYGZsq{}a\PYGZsq{}, \PYGZsq{}b\PYGZsq{}]}
+
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}c.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/src/oss/ruffus/a\PYGZus{}vs\PYGZus{}c.product}
+\PYG{g+go}{shared\PYGZus{}path = /home/lg/src/oss/ruffus}
+\PYG{g+go}{basenames = [\PYGZsq{}a\PYGZsq{}, \PYGZsq{}c\PYGZsq{}]}
+
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}a.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/src/oss/ruffus/b\PYGZus{}vs\PYGZus{}a.product}
+\PYG{g+go}{shared\PYGZus{}path = /home/lg/src/oss/ruffus}
+\PYG{g+go}{basenames = [\PYGZsq{}b\PYGZsq{}, \PYGZsq{}a\PYGZsq{}]}
+
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}c.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/src/oss/ruffus/b\PYGZus{}vs\PYGZus{}c.product}
+\PYG{g+go}{shared\PYGZus{}path = /home/lg/src/oss/ruffus}
+\PYG{g+go}{basenames = [\PYGZsq{}b\PYGZsq{}, \PYGZsq{}c\PYGZsq{}]}
+
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}c.start\PYGZsq{}, \PYGZsq{}a.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/src/oss/ruffus/c\PYGZus{}vs\PYGZus{}a.product}
+\PYG{g+go}{shared\PYGZus{}path = /home/lg/src/oss/ruffus}
+\PYG{g+go}{basenames = [\PYGZsq{}c\PYGZsq{}, \PYGZsq{}a\PYGZsq{}]}
+
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}c.start\PYGZsq{}, \PYGZsq{}b.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/src/oss/ruffus/c\PYGZus{}vs\PYGZus{}b.product}
+\PYG{g+go}{shared\PYGZus{}path = /home/lg/src/oss/ruffus}
+\PYG{g+go}{basenames = [\PYGZsq{}c\PYGZsq{}, \PYGZsq{}b\PYGZsq{}]}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\index{suffix!Indicator Object (Disambiguating parameters)}\index{Indicator Object (Disambiguating parameters)!suffix}
+
+\subsubsection{\emph{suffix}}
+\label{decorators/indicator_objects:decorators-suffix}\label{decorators/indicator_objects:index-2}\label{decorators/indicator_objects:suffix}\begin{quote}
+
+\textbf{suffix(} \code{string} \textbf{)}
+
+The enclosed parameter is a string which must match \emph{exactly} to the end
+of a file name.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+
+\end{itemize}
+
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Transforms {}`{}`*.c{}`{}` to {}`{}`*.o{}`{}`::}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\index{regex!Indicator Object (Disambiguating parameters)}\index{Indicator Object (Disambiguating parameters)!regex}
+
+\subsubsection{\emph{regex}}
+\label{decorators/indicator_objects:decorators-regex}\label{decorators/indicator_objects:regex}\label{decorators/indicator_objects:index-3}\begin{quote}
+
+\textbf{regex(} \code{regular\_expression} \textbf{)}
+
+The enclosed parameter is a python regular expression string,
+which must be wrapped in a \code{regex} indicator object.
+
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+
+\textbf{Used by:}
+\begin{itemize}
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}}
+
+\item {}
+The deprecated {\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re}}}
+
+\end{itemize}
+\begin{description}
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{.c\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\index{add\_inputs!Indicator Object (Adding additional input parameters)}\index{Indicator Object (Adding additional input parameters)!add\_inputs}
+
+\subsubsection{\emph{add\_inputs}}
+\label{decorators/indicator_objects:index-4}\label{decorators/indicator_objects:add-inputs}\label{decorators/indicator_objects:decorators-add-inputs}\begin{quote}
+
+\textbf{add\_inputs(} \code{input\_file\_pattern} \textbf{)}
+
+The enclosed parameter(s) are pattern strings or a nested structure which is added to the
+input for each job.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@transform}}}
+
+\item {}
+{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@collate}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}
+
+\end{itemize}
+
+\end{description}
+
+\textbf{Example @transform with suffix(...)}
+\begin{quote}
+
+A common task in compiling C code is to include the corresponding header file for the source.
+To compile \code{*.c} to \code{*.o}, adding \code{*.h} and the common header \code{universal.h}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p [...]
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do something here}
+ \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+
+\begin{DUlineblock}{0em}
+\item[] The starting files names are \code{1.c} and \code{2.c}.
+\item[] \code{suffix(".c")} matches ''.c'' so \code{\textbackslash{}1} stands for the unmatched prefices \code{"1"} and \code{"2"}
+\end{DUlineblock}
+\begin{description}
+\item[{This will result in the following functional calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+A string like \code{universal.h} in \code{add\_inputs} will added \emph{as is}.
+\code{r"\textbackslash{}1.h"}, however, performs suffix substitution, with the special form \code{r"\textbackslash{}1"} matching everything up to the suffix.
+Remember to `escape' \code{r"\textbackslash{}1"} otherwise Ruffus will complain and throw an \code{Exception} to remind you.
+The most convenient way is to use a python ``raw'' string.
+\end{quote}
+
+\textbf{Example of add\_inputs(...) with regex(...)}
+\begin{quote}
+\begin{description}
+\item[{The suffix match (\code{suffix(...)}) is exactly equivalent to the following code using regular expression (\code{regex(...)}):}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZca{}(.+)}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.c\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{ [...]
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do something here}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+
+The \code{suffix(..)} code is much simpler but the regular expression allows more complex substitutions.
+\end{quote}
+
+\textbf{add\_inputs(...) preserves original inputs}
+\begin{quote}
+
+\code{add\_inputs} nests the the original input parameters in a list before adding additional dependencies.
+\begin{description}
+\item[{This can be seen in the following example:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{B.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{C.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{]}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do something here}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\item[{This will result in the following functional calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{3.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{B.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{C.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+The original parameters are retained unchanged as the first item in a list
+\end{quote}
+\end{quote}
+
+\index{inputs!Indicator Object (Replacing input parameters)}\index{Indicator Object (Replacing input parameters)!inputs}
+
+\subsubsection{\emph{inputs}}
+\label{decorators/indicator_objects:decorators-inputs}\label{decorators/indicator_objects:index-5}\label{decorators/indicator_objects:inputs}\begin{quote}
+
+\textbf{inputs(} \code{input\_file\_pattern} \textbf{)}
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@transform}}}
+
+\item {}
+{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@collate}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}
+
+\end{itemize}
+
+\end{description}
+
+The enclosed single parameter is a pattern string or a nested structure which is
+used to construct the input for each job.
+
+If more than one argument is supplied to inputs, an exception will be raised.
+
+Use a tuple or list (as in the following example) to send multiple input arguments to each job.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+The advanced form of {\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@transform}}}
+
+\end{itemize}
+
+\end{description}
+
+\textbf{inputs(...) replaces original inputs}
+\begin{quote}
+
+\code{inputs(...)} allows the original input parameters to be replaced wholescale.
+\begin{description}
+\item[{This can be seen in the following example:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{B.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{C.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{]}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{docs.rst}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.pyc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do something here}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\item[{This will result in the following functional calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{docs.rst}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.pyc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{docs.rst}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.pyc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+In this example, the corresponding python files have been sneakily substituted
+without trace in the place of the C source files.
+\end{quote}
+\end{quote}
+
+\index{@follows!mkdir (Syntax)}\index{mkdir!@follows (Syntax)}\index{Indicator Object (Disambiguating parameters)!mkdir}
+
+\subsubsection{\emph{mkdir}}
+\label{decorators/indicator_objects:index-6}\label{decorators/indicator_objects:mkdir}\label{decorators/indicator_objects:decorators-indicator-objects-mkdir}\begin{quote}
+
+\textbf{mkdir(} \code{directory\_name1} \textbf{, {[}} \code{directory\_name2} \textbf{, ...{]} )}
+
+The enclosed parameter is a directory name or a sequence of directory names.
+These directories will be created as part of the prerequisites of running a task.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/follows:decorators-follows]{\emph{@follows}}}
+
+\end{itemize}
+
+\item[{\textbf{Example:}}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{mkdir}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/output/directory}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\index{@posttask!touch\_file (Syntax)}\index{touch\_file!@posttask (Syntax)}\index{Indicator Object (Disambiguating parameters)!touch\_file}
+
+\subsubsection{\emph{touch\_file}}
+\label{decorators/indicator_objects:touch-file}\label{decorators/indicator_objects:index-7}\label{decorators/indicator_objects:decorators-touch-file}\begin{quote}
+
+\textbf{touch\_file(} \code{file\_name} \textbf{)}
+
+The enclosed parameter is a file name. This file will be \code{touch}-ed after a
+task is executed.
+
+This will change the date/time stamp of the \code{file\_name} to the current date/time.
+If the file does not exist, an empty file will be created.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask}}}
+
+\end{itemize}
+
+\item[{\textbf{Example:}}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{touch\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task\PYGZus{}completed.flag}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{do\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\index{output\_from!Indicator Object (Disambiguating parameters)}\index{Indicator Object (Disambiguating parameters)!output\_from}
+
+\subsubsection{\emph{output\_from}}
+\label{decorators/indicator_objects:index-8}\label{decorators/indicator_objects:decorators-output-from}\label{decorators/indicator_objects:output-from}\begin{quote}
+
+\textbf{output\_from (} \code{file\_name\_string1} \textbf{{[},} \code{file\_name\_string1} \textbf{, ...{]} )}
+
+Indicates that any enclosed strings are not file names but refer to task functions.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[decorators/split:decorators-split]{\emph{@split}}}
+
+\item {}
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+
+\item {}
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}}
+
+\item {}
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}}
+
+\item {}
+{\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}
+
+\item {}
+{\hyperref[decorators/product:decorators-product]{\emph{@product}}}
+
+\item {}
+{\hyperref[decorators/permutations:decorators-permutations]{\emph{@permutations}}}
+
+\item {}
+{\hyperref[decorators/combinations:decorators-combinations]{\emph{@combinations}}}
+
+\item {}
+{\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement]{\emph{@combinations\_with\_replacement}}}
+
+\item {}
+{\hyperref[decorators/files:decorators-files]{\emph{@files}}}
+
+\end{itemize}
+
+\item[{\textbf{Example:}}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}from}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{76}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.split}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,} \PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+is equivalent to:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{(}\PYG{n}{task1}\PYG{p}{,} \PYG{l+m+mi}{76}\PYG{p}{,} \PYG{n}{task2}\PYG{p}{)}\PYG{p}{)}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.split}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,} \PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\index{@files\_re!combine (Deprecated Syntax)}\index{combine!@follows (Deprecated Syntax)}\index{Indicator Object (Disambiguating parameters)!combine}
+
+\subsubsection{\emph{combine}}
+\label{decorators/indicator_objects:combine}\label{decorators/indicator_objects:decorators-combine}\label{decorators/indicator_objects:index-9}\begin{quote}
+
+\textbf{combine(} \code{arguments} \textbf{)}
+
+\begin{notice}{warning}{Warning:}
+This is deprecated syntax.
+
+Please do not use!
+
+{\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} and {\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} are more powerful
+and have straightforward syntax.
+\end{notice}
+
+Indicates that the \emph{inputs} of {\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re}}} will be collated
+or summarised into \emph{outputs} by category. See the {\hyperref[tutorials/new_tutorial/deprecated_files_re:new-manual-files-re-combine]{\emph{Manual}}} or
+:ref:{}` @collate \textless{}new\_manual.collate\textgreater{}{}` for examples.
+\begin{description}
+\item[{\textbf{Used by:}}] \leavevmode\begin{itemize}
+\item {}
+{\hyperref[tutorials/new_tutorial/deprecated_files_re:new-manual-files-re-combine]{\emph{@files\_re}}}
+
+\end{itemize}
+
+\item[{\textbf{Example:}}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@files\PYGZus{}re}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.animals}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} inputs = all *.animal files}
+ \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{mammals.([\PYGZca{}.]+)}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} regular expression}
+ \PYG{n}{combine}\PYG{p}{(}\PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1/animals.in\PYGZus{}my\PYGZus{}zoo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} single output file per species}
+ \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZsq{}} \PYG{p}{)} \PYG{c}{\PYGZsh{} species name}
+\PYG{k}{def} \PYG{n+nf}{capture\PYGZus{}mammals}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{species}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} summarise all animals of this species}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+\setbox0\vbox{
+\begin{minipage}{0.95\linewidth}
+\textbf{Core}
+
+\medskip
+
+\phantomsection\label{decorators/originate:decorators-originate}
+\index{@originate!Syntax}\index{Syntax!@originate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@originate}
+\label{decorators/originate:originate}\label{decorators/originate::doc}\phantomsection\label{decorators/originate:decorators-originate-output-files}\phantomsection\label{decorators/originate:output-files}
+
+\subsubsection{\emph{@originate} ( \emph{output\_files}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/originate:decorators-originate-extra-parameters}\label{decorators/originate:originate-output-files-extra-parameters}\label{decorators/originate:extra-parameters}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode\begin{itemize}
+\item {}
+Creates (originates) a set of starting file without dependencies from scratch (\emph{ex nihilo}!)
+
+\item {}
+Only called to create files which do not exist.
+
+\item {}
+Invoked onces (a job created) per item in the \code{output\_files} list.
+
+\end{itemize}
+
+\begin{notice}{note}{Note:}
+The first argument for the task function is the \emph{Output}. There is by definition no
+\emph{Input} for \code{@originate}
+\end{notice}
+
+\end{description}
+
+\textbf{Example}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{d}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{extra}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{test}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{extra}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a, extra] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b, extra] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} c, extra] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} d, extra] completed}
+\PYG{g+go}{Completed Task = test}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} all files exist: nothing to do}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} delete \PYGZsq{}a\PYGZsq{} so that it is missing}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{import} \PYG{n+nn}{os}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a, extra] completed}
+\PYG{g+go}{Completed Task = test}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/originate:decorators-originate-output-files}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_files}}] \leavevmode\begin{itemize}
+\item {}
+Can be a single file name or a list of files
+
+\item {}
+Each item in the list is treated as the \emph{Output} of a separate job
+
+\end{itemize}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/originate:decorators-originate-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Any extra parameters are passed verbatim to the task function
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/split:decorators-split}
+\index{@split!Syntax}\index{Syntax!@split}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@split}
+\label{decorators/split::doc}\label{decorators/split:split}\phantomsection\label{decorators/split:decorators-split-tasks-or-file-names}\phantomsection\label{decorators/split:tasks-or-file-names}\phantomsection\label{decorators/split:decorators-split-extra-parameters}\phantomsection\label{decorators/split:extra-parameters}
+
+\subsubsection{\emph{@split} ( \emph{tasks\_or\_file\_names}, \emph{output\_files}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/split:output-files}\label{decorators/split:decorators-split-output-files}\label{decorators/split:split-tasks-or-file-names-output-files-extra-parameters}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+\begin{DUlineblock}{0em}
+\item[] Splits a single set of input files into multiple output file names, where the number of
+output files may not be known beforehand.
+\item[] Only out of date tasks (comparing input and output files) will be run
+\end{DUlineblock}
+
+\end{description}
+
+\textbf{Example}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{big\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.little\PYGZus{}files}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}big\PYGZus{}to\PYGZus{}small}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}file = }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{input\PYGZus{}file}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}file = }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{output\PYGZus{}file}
+\end{Verbatim}
+
+.
+\begin{quote}
+
+will produce:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+input\_file = big\_file
+output\_file = *.little\_files
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/split:decorators-split-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {}
+(Nested) list of file name strings (as in the example above).
+\begin{quote}
+
+\begin{DUlineblock}{0em}
+\item[] File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.
+\item[] E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+\end{DUlineblock}
+\end{quote}
+
+\item {}
+Task / list of tasks.
+\begin{quote}
+
+File names are taken from the output of the specified task(s)
+\end{quote}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/split:decorators-split-output-files}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_files}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\begin{DUlineblock}{0em}
+\item[] These are used \textbf{only} to check if the task is up to date.
+\item[] Normally you would use either a \href{http://docs.python.org/library/glob.html}{\emph{glob}} (e.g. \code{*.little\_files} as above) or a ``sentinel file''
+to indicate that the task has completed successfully.
+\item[] You can of course do both:
+\end{DUlineblock}
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{big\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{sentinel.file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.little\PYGZus{}files}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{split\PYGZus{}big\PYGZus{}to\PYGZus{}small}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/split:decorators-split-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{{[}\emph{extra\_parameters, ...}{]}}] \leavevmode
+Any extra parameters are passed verbatim to the task function
+
+\end{description}
+
+\end{itemize}
+
+
+\subsection{@split with \texttt{regex(...)}, \texttt{add\_inputs} and \texttt{inputs}}
+\label{decorators/split:split-with-regex-add-inputs-and-inputs}\begin{quote}
+
+This deprecated syntax is a synonym for {\hyperref[decorators/subdivide:decorators-subdivide]{\emph{@subdivide}}}.
+\end{quote}
+\phantomsection\label{decorators/transform:decorators-transform}
+\index{@transform!Syntax}\index{Syntax!@transform}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@transform}
+\label{decorators/transform::doc}\label{decorators/transform:transform}\phantomsection\label{decorators/transform:decorators-transform-tasks-or-file-names}\phantomsection\label{decorators/transform:tasks-or-file-names}\phantomsection\label{decorators/transform:decorators-transform-extra-parameters}\phantomsection\label{decorators/transform:extra-parameters}\phantomsection\label{decorators/transform:decorators-transform-output-pattern}\phantomsection\label{decorators/transform:output-patt [...]
+
+\subsubsection{\emph{@transform} ( \emph{tasks\_or\_file\_names}, \emph{suffix}\emph{(}\emph{suffix\_string}\emph{)}\textbar{} \emph{regex}\emph{(}\emph{matching\_regex}\emph{)} \textbar{} \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/transform:suffix-string}\label{decorators/transform:transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters}\label{decorators/transform:decorators-transform-suffix-string}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Applies the task function to transform data from input to output files.
+
+Output file names are specified from {\hyperref[decorators/transform:decorators-transform-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of specified tasks, or a list of file names, or a \href{http://docs.python.org/library/glob.html}{\emph{glob}} matching pattern.
+
+String replacement occurs either through suffix matches via {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} or
+the {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicators.
+
+Only out of date tasks (comparing input and output files) will be run
+
+\end{description}
+
+\textbf{Simple Example}
+\begin{quote}
+
+Transforms \code{*.c} to \code{*.o}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Same example with a regular expression:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{.c\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Both result in the following function calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} 1.c \PYGZhy{}\PYGZgt{} 1.o}
+\PYG{c}{\PYGZsh{} 2.c \PYGZhy{}\PYGZgt{} 2.o}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\textbf{Escaping regular expression patterns}
+\begin{quote}
+
+A string like \code{universal.h} in \code{add\_inputs} will added \emph{as is}.
+\code{r"\textbackslash{}1.h"}, however, performs suffix substitution, with the special form \code{r"\textbackslash{}1"} matching everything up to the suffix.
+Remember to `escape' \code{r"\textbackslash{}1"} otherwise Ruffus will complain and throw an Exception to remind you.
+The most convenient way is to use a python ``raw'' string.
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/transform:decorators-transform-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform:decorators-transform-suffix-string}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{suffix\_string}}] \leavevmode
+must be wrapped in a {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} indicator object.
+The end of each input file name which matches \code{suffix\_string} will be replaced by \code{output\_pattern}.
+
+Input file names which do not match suffix\_string will be ignored
+
+The non-suffix part of the match can be referred to using the \code{"\textbackslash{}1"} pattern. This
+can be useful for putting the output in different directory, for example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{my\PYGZus{}path/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This results in the following function calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} 1.c \PYGZhy{}\PYGZgt{} my\PYGZus{}path/1.o}
+\PYG{c}{\PYGZsh{} 2.c \PYGZhy{}\PYGZgt{} my\PYGZus{}path/2.o}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}path/1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}path/2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+For convenience and visual clarity, the \code{"\textbackslash{}1"} can be omitted from the output parameter.
+However, the \code{"\textbackslash{}1"} is mandatory for string substitutions in additional parameters,
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Compiling } [...]
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Results in the following function calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Compiling 1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{verbatim}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Compiling 2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{verbatim}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Since r''1'' is optional for the output parameter, \code{"\textbackslash{}1.o"} and \code{".o"} are equivalent.
+However, strings in other parameters which do not contain r''1'' will be included verbatim, much
+like the string \code{"verbatim"} in the above example.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform:decorators-transform-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+is a python regular expression string, which must be wrapped in
+a {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicator object
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+Each output file name is created using regular expression substitution with \code{output\_pattern}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform:decorators-transform-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform:decorators-transform-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform:decorators-transform-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{{[}\emph{extra\_parameters, ...}{]}}] \leavevmode
+Any extra parameters are passed to the task function.
+
+If \code{regex(matching\_regex)} or \code{formatter(...){}`} is used, then substitution
+is first applied to (even nested) string parameters. Other data types are passed
+verbatim.
+
+For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*).c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+will result in the following function calls:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\end{itemize}
+
+See {\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{here}}} for more advanced uses of transform.
+\phantomsection\label{decorators/merge:decorators-merge}
+\index{@merge!Syntax}\index{Syntax!@merge}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+\phantomsection\label{decorators/merge:decorators-merge-tasks-or-file-names}\phantomsection\label{decorators/merge:tasks-or-file-names}\phantomsection\label{decorators/merge:decorators-merge-extra-parameters}\phantomsection\label{decorators/merge:extra-parameters}
+
+\subsection{@merge}
+\label{decorators/merge:decorators-merge-output-file}\label{decorators/merge:merge}\label{decorators/merge::doc}\label{decorators/merge:output-file}
+
+\subsubsection{\emph{@merge} ( \emph{tasks\_or\_file\_names}, \emph{output\_file}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/merge:merge-tasks-or-file-names-output-file-extra-parameters}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Merges multiple input files into a single output.
+
+Only out of date tasks (comparing input and output files) will be run
+
+\end{description}
+
+\textbf{Example}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@merge}\PYG{p}{(}\PYG{n}{previous\PYGZus{}task}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{all.summary}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{summarize}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{summary\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/merge:decorators-merge-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/merge:decorators-merge-output-file}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_file}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/merge:decorators-merge-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters, ...}}] \leavevmode
+Any optional extra parameters are passed verbatim to the task function
+
+\end{description}
+
+\end{itemize}
+
+See {\hyperref[decorators/collate:decorators-collate]{\emph{here}}} for more advanced uses of merging.
+\end{minipage}}
+\begin{center}\setlength{\fboxsep}{5pt}\shadowbox{\box0}\end{center}
+\setbox0\vbox{
+\begin{minipage}{0.95\linewidth}
+\textbf{For advanced users}
+
+\medskip
+
+\phantomsection\label{decorators/subdivide:decorators-subdivide}
+\index{@subdivide!Syntax}\index{Syntax!@subdivide}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@subdivide}
+\label{decorators/subdivide:subdivide}\label{decorators/subdivide::doc}\phantomsection\label{decorators/subdivide:decorators-subdivide-tasks-or-file-names}\phantomsection\label{decorators/subdivide:tasks-or-file-names}\phantomsection\label{decorators/subdivide:decorators-subdivide-extra-parameters}\phantomsection\label{decorators/subdivide:extra-parameters}\phantomsection\label{decorators/subdivide:decorators-subdivide-output-pattern}\phantomsection\label{decorators/subdivide:output-patt [...]
+
+\subsubsection{\emph{@subdivide} ( \emph{tasks\_or\_file\_names}, \emph{regex}\emph{(}\emph{matching\_regex}\emph{)} \textbar{} \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, {[} \emph{inputs} \emph{(}\emph{input\_pattern\_or\_glob}\emph{)} \textbar{} \emph{add\_inputs} \emph{(}\emph{input\_pattern\_or\_glob}\emph{)} {]}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/subdivide:input-pattern-or-glob}\label{decorators/subdivide:decorators-subdivide-input-pattern-or-glob}\label{decorators/subdivide:subdivide-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+\begin{itemize}
+\item {}
+Subdivides a set of \emph{Inputs} each further into multiple \emph{Outputs}.
+
+\item {}
+\textbf{Many to Even More} operator
+
+\item {}
+The number of files in each \emph{Output} can be set at runtime by the use of globs
+
+\item {}
+Output file names are specified using the {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicators from {\hyperref[decorators/subdivide:decorators-subdivide-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of specified tasks, or a list of file names, or a \href{http://docs.python.org/library/glob.html}{\emph{glob}} matching pattern.
+
+\item {} \begin{description}
+\item[{Additional inputs or dependencies can be added dynamically to the task:}] \leavevmode
+{\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}} nests the the original input parameters in a list before adding additional dependencies.
+
+{\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} replaces the original input parameters wholescale.
+
+\end{description}
+
+\item {}
+Only out of date tasks (comparing input and output files) will be run.
+
+\end{itemize}
+
+\begin{notice}{note}{Note:}
+The use of \textbf{split} is a synonym for subdivide is deprecated.
+\end{notice}
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{random} \PYG{k+kn}{import} \PYG{n}{randint}
+\PYG{k+kn}{from} \PYG{n+nn}{random} \PYG{k+kn}{import} \PYG{n}{os}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{0.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{1.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{2.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Subdivide each of 3 start files further into [NNN1, NNN2, NNN3] number of files}
+\PYG{c}{\PYGZsh{} where NNN1, NNN2, NNN3 are determined at run time}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@subdivide}\PYG{p}{(}\PYG{n}{create\PYGZus{}files}\PYG{p}{,} \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}.*.step1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Output parameter: Glob matches any number of output file names}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{c}{\PYGZsh{} Extra parameter: Append to this for output file names}
+\PYG{k}{def} \PYG{n+nf}{subdivide\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name\PYGZus{}root}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{c}{\PYGZsh{} IMPORTANT: cleanup rubbish from previous run first}
+ \PYG{c}{\PYGZsh{}}
+ \PYG{k}{for} \PYG{n}{oo} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{n}{os}\PYG{o}{.}\PYG{n}{unlink}\PYG{p}{(}\PYG{n}{oo}\PYG{p}{)}
+ \PYG{c}{\PYGZsh{} The number of output files is decided at run time}
+ \PYG{n}{number\PYGZus{}of\PYGZus{}output\PYGZus{}files} \PYG{o}{=} \PYG{n}{randint}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{,}\PYG{l+m+mi}{4}\PYG{p}{)}
+ \PYG{k}{for} \PYG{n}{ii} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{number\PYGZus{}of\PYGZus{}output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{output\PYGZus{}file\PYGZus{}name} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}output\PYGZus{}file\PYGZus{}name\PYGZus{}root\PYGZcb{}.\PYGZob{}ii\PYGZcb{}.step1}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{format}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n+nb}{locals}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Each output of subdivide\PYGZus{}files results in a separate job for downstream tasks}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{subdivide\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.step1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.step2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{analyse\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\end{Verbatim}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} 0.start] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} 1.start] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} 2.start] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}files}
+\PYG{g+go}{ Job = [0.start \PYGZhy{}\PYGZgt{} 0.*.step1, 0] completed}
+\PYG{g+go}{ Job = [1.start \PYGZhy{}\PYGZgt{} 1.*.step1, 1] completed}
+\PYG{g+go}{ Job = [2.start \PYGZhy{}\PYGZgt{} 2.*.step1, 2] completed}
+\PYG{g+go}{Completed Task = subdivide\PYGZus{}files}
+\PYG{g+go}{ Job = [0.0.step1 \PYGZhy{}\PYGZgt{} 0.0.step2] completed}
+\PYG{g+go}{ Job = [0.1.step1 \PYGZhy{}\PYGZgt{} 0.1.step2] completed}
+\PYG{g+go}{ Job = [0.2.step1 \PYGZhy{}\PYGZgt{} 0.2.step2] completed}
+\PYG{g+go}{ Job = [1.0.step1 \PYGZhy{}\PYGZgt{} 1.0.step2] completed}
+\PYG{g+go}{ Job = [1.1.step1 \PYGZhy{}\PYGZgt{} 1.1.step2] completed}
+\PYG{g+go}{ Job = [1.2.step1 \PYGZhy{}\PYGZgt{} 1.2.step2] completed}
+\PYG{g+go}{ Job = [1.3.step1 \PYGZhy{}\PYGZgt{} 1.3.step2] completed}
+\PYG{g+go}{ Job = [2.0.step1 \PYGZhy{}\PYGZgt{} 2.0.step2] completed}
+\PYG{g+go}{ Job = [2.1.step1 \PYGZhy{}\PYGZgt{} 2.1.step2] completed}
+\PYG{g+go}{ Job = [2.2.step1 \PYGZhy{}\PYGZgt{} 2.2.step2] completed}
+\PYG{g+go}{ Job = [2.3.step1 \PYGZhy{}\PYGZgt{} 2.3.step2] completed}
+\PYG{g+go}{Completed Task = analyse\PYGZus{}files}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/subdivide:decorators-subdivide-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/subdivide:decorators-subdivide-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+is a python regular expression string, which must be wrapped in
+a {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicator object
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/subdivide:decorators-subdivide-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/subdivide:decorators-subdivide-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s). Can include glob patterns.
+Strings are subject to {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} or {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}}
+substitution.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/subdivide:decorators-subdivide-input-pattern-or-glob}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{input\_pattern}}] \leavevmode
+Specifies the resulting input(s) to each job.
+Must be wrapped in an {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} or an {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{inputs}}} indicator object.
+
+Can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {}
+(Nested) list of file name strings.
+
+\end{enumerate}
+
+Strings are subject to {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} or {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} substitution.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/subdivide:decorators-subdivide-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Any extra parameters are consumed by the task function and not forwarded further down the pipeline.
+Strings are subject to {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} or {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}}
+substitution.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-ex}
+\index{@transform, inputs(...)!Syntax}\index{Syntax!@transform, inputs(...)}\index{@transform, add\_inputs(...)!Syntax}\index{Syntax!@transform, add\_inputs(...)}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@transform with \texttt{add\_inputs} and \texttt{inputs}}
+\label{decorators/transform_ex:transform-with-add-inputs-and-inputs}\label{decorators/transform_ex::doc}\phantomsection\label{decorators/transform_ex:decorators-transform-tasks-or-file-names}\phantomsection\label{decorators/transform_ex:tasks-or-file-names}\phantomsection\label{decorators/transform_ex:decorators-transform-extra-parameters}\phantomsection\label{decorators/transform_ex:extra-parameters}\phantomsection\label{decorators/transform_ex:decorators-transform-output-pattern}\phant [...]
+
+\subsubsection{\emph{@transform} ( \emph{tasks\_or\_file\_names}, \emph{suffix}\emph{(}\emph{suffix\_string}\emph{)}\textbar{} \emph{regex}\emph{(}\emph{matching\_regex}\emph{)} \textbar{} \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{inputs} \textbar{} \emph{add\_inputs}\emph{(}\emph{input\_pattern\_or\_glob}\emph{)}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/transform_ex:suffix-string}\label{decorators/transform_ex:transform-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-inputs-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters}\label{decorators/transform_ex:decorators-transform-suffix-string}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+This variant of \code{@transform} allows additional inputs or dependencies to be added
+dynamically to the task.
+
+Output file names and strings in the extra parameters
+are determined from {\hyperref[decorators/transform_ex:decorators-transform-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names.
+
+This variant of \code{@transform} allows input file names to be derived in the same way.
+
+String replacement occurs either through suffix matches via {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} or
+the {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicators.
+
+\code{@collate} groups together all \textbf{Input} which result in identical \textbf{Output} and \textbf{extra}
+parameters.
+
+It is a \textbf{many to fewer} operation.
+
+{\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}} nests the the original input parameters in a list before adding additional dependencies.
+
+{\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} replaces the original input parameters wholescale.
+
+Only out of date tasks (comparing input and output files) will be run
+
+\end{description}
+
+\textbf{Example of} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}
+\begin{quote}
+
+A common task in compiling C code is to include the corresponding header file for the source.
+\begin{description}
+\item[{To compile \code{*.c} to \code{*.o}, adding \code{*.h} and the common header \code{universal.h}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p [...]
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\item[{This will result in the following functional calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\textbf{Example of} {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}}
+\begin{quote}
+
+\code{inputs(...)} allows the original input parameters to be replaced wholescale.
+\begin{description}
+\item[{This can be seen in the following example:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{A.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{B.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{C.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{]}\PYG{p}{]}\PYG{p}{,}
+ \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{inputs}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{docs.rst}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.pyc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\item[{This will result in the following functional calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{docs.rst}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.pyc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.py}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{docs.rst}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.pyc}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/transform_ex:decorators-transform-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-suffix-string}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{suffix\_string}}] \leavevmode
+must be wrapped in a {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} indicator object.
+The end of each file name which matches suffix\_string will be replaced by \emph{output\_pattern}.
+Thus:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+will result in the following function calls:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+File names which do not match suffix\_string will be ignored
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+is a python regular expression string, which must be wrapped in
+a {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicator object
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+Each output file name is created using regular expression substitution with \code{output\_pattern}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-input-pattern-or-glob}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{input\_pattern}}] \leavevmode
+Specifies the resulting input(s) to each job.
+Must be wrapped in an {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} or an {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{inputs}}} indicator object.
+
+Can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode
+Strings will be subject to substitution.
+File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/transform_ex:decorators-transform-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{{[}\emph{extra\_parameters, ...}{]}}] \leavevmode
+Any extra parameters are passed to the task function.
+
+If the \code{regex(...)} or \code{formatter(...)} parameter is used, then substitution
+is first applied to (even nested) string parameters. Other data types are passed
+verbatim.
+
+For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.*).c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{inputs}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{ [...]
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{file\PYGZus{}name\PYGZus{}root}\PYG{p}{)}\PYG{p}{:}
+ \PYG{c}{\PYGZsh{} do something here}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+will result in the following function calls:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{universal.h}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\end{itemize}
+
+See {\hyperref[decorators/transform:decorators-transform]{\emph{here}}} for more straightforward ways to use transform.
+\phantomsection\label{decorators/collate:decorators-collate}
+\index{@collate!Syntax}\index{Syntax!@collate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@collate}
+\label{decorators/collate::doc}\label{decorators/collate:collate}\phantomsection\label{decorators/collate:decorators-collate-tasks-or-file-names}\phantomsection\label{decorators/collate:tasks-or-file-names}\phantomsection\label{decorators/collate:decorators-collate-extra-parameters}\phantomsection\label{decorators/collate:extra-parameters}\phantomsection\label{decorators/collate:decorators-collate-output-pattern}\phantomsection\label{decorators/collate:output-pattern}\phantomsection\labe [...]
+
+\subsubsection{\emph{@collate} ( \emph{tasks\_or\_file\_names}, \emph{regex}\emph{(}\emph{matching\_regex}\emph{)} \textbar{} \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/collate:collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-output-pattern-extra-parameters}\label{decorators/collate:matching-formatter}\label{decorators/collate:decorators-collate-matching-formatter}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Groups / collates sets of input files, each into a separate summary.
+
+Only out of date tasks (comparing input and output files) will be run
+
+Output file names and strings in the extra parameters
+are determined from {\hyperref[decorators/collate:decorators-collate-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names.
+
+String replacement occurs either through suffix matches via {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} or
+the {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicators.
+
+\code{@collate} groups together all \textbf{Input} which result in identical \textbf{Output} and \textbf{extra}
+parameters.
+
+It is a \textbf{many to fewer} operation.
+
+\item[{\textbf{Example}:}] \leavevmode
+\code{regex(r".*(\textbackslash{}..+)"), "\textbackslash{}1.summary"} creates a separate summary file for each suffix:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{animal\PYGZus{}files} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.mammals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{d.mammals}\PYG{l+s}{\PYGZdq{}}
+\PYG{c}{\PYGZsh{} summarise by file suffix:}
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{animal\PYGZus{}files}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.(.+)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.summary}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{summarize}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{summary\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/collate:decorators-collate-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate:decorators-collate-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+is a python regular expression string, which must be wrapped in
+a {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicator object
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate:decorators-collate-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate:decorators-collate-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate:decorators-collate-extra-parameters}\begin{quote}
+\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Any extra parameters are passed verbatim to the task function
+
+\end{description}
+
+\end{itemize}
+\begin{enumerate}
+\item {}
+\emph{outputs} and optional extra parameters are passed to the functions after string
+substitution in any strings. Non-string values are passed through unchanged.
+
+\item {}
+Each collate job consists of input files which are aggregated by string substitution
+to a single set of output / extra parameter matches
+
+\item {}
+In the above cases, \code{a.fish} and \code{b.fish} both produce \code{fish.summary} after regular
+expression subsitution, and are collated into a single job:
+\code{{[}"a.fish", "b.fish" -\textgreater{} "fish.summary"{]}}
+while \code{c.mammals}, \code{d.mammals} both produce \code{mammals.summary}, are collated in a separate job:
+\code{{[}"c.mammals", "d.mammals" -\textgreater{} "mammals.summary"{]}}
+
+\end{enumerate}
+
+\textbf{Example2}:
+\begin{quote}
+
+Suppose we had the following files:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{cows}\PYG{o}{.}\PYG{n}{mammals}\PYG{o}{.}\PYG{n}{animal}
+\PYG{n}{horses}\PYG{o}{.}\PYG{n}{mammals}\PYG{o}{.}\PYG{n}{animal}
+\PYG{n}{sheep}\PYG{o}{.}\PYG{n}{mammals}\PYG{o}{.}\PYG{n}{animal}
+
+\PYG{n}{snake}\PYG{o}{.}\PYG{n}{reptile}\PYG{o}{.}\PYG{n}{animal}
+\PYG{n}{lizard}\PYG{o}{.}\PYG{n}{reptile}\PYG{o}{.}\PYG{n}{animal}
+\PYG{n}{crocodile}\PYG{o}{.}\PYG{n}{reptile}\PYG{o}{.}\PYG{n}{animal}
+
+\PYG{n}{pufferfish}\PYG{o}{.}\PYG{n}{fish}\PYG{o}{.}\PYG{n}{animal}
+\end{Verbatim}
+
+and we wanted to end up with three different resulting output:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+cow.mammals.animal
+horse.mammals.animal
+sheep.mammals.animal
+ -\textgreater{} mammals.results
+
+snake.reptile.animal
+lizard.reptile.animal
+crocodile.reptile.animal
+ -\textgreater{} reptile.results
+
+pufferfish.fish.animal
+ -\textgreater{} fish.results
+\end{Verbatim}
+
+This is the \code{@collate} code required:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{animals} \PYG{o}{=} \PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{cows.mammals.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{horses.mammals.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{sheep.mammals.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{snake.reptile.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{lizard.reptile.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{crocodile.reptile.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{pufferfish.fish.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}
+
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{animals}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{(.+)}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.(.+)}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.animal}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{2.results}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{c}{\PYGZsh{} \PYGZbs{}1 = species [cow, horse]}
+\PYG{c}{\PYGZsh{} \PYGZbs{}2 = phylogenetics group [mammals, reptile, fish]}
+\PYG{k}{def} \PYG{n+nf}{summarize\PYGZus{}animals\PYGZus{}into\PYGZus{}groups}\PYG{p}{(}\PYG{n}{species\PYGZus{}file}\PYG{p}{,} \PYG{n}{result\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ ... more code here}\PYG{l+s}{\PYGZdq{}}
+ \PYG{k}{pass}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+See {\hyperref[decorators/merge:decorators-merge]{\emph{@merge}}} for an alternative way to summarise files.
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex}
+\index{@collate (Advanced Usage)!Syntax}\index{Syntax!@collate (Advanced Usage)}\index{@collate, inputs(...)!Syntax}\index{Syntax!@collate, inputs(...)}\index{@collate, add\_inputs(...)!Syntax}\index{Syntax!@collate, add\_inputs(...)}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@collate with \texttt{add\_inputs} and \texttt{inputs}}
+\label{decorators/collate_ex:collate-with-add-inputs-and-inputs}\label{decorators/collate_ex::doc}\phantomsection\label{decorators/collate_ex:decorators-collate-ex-tasks-or-file-names}\phantomsection\label{decorators/collate_ex:tasks-or-file-names}\phantomsection\label{decorators/collate_ex:decorators-collate-ex-extra-parameters}\phantomsection\label{decorators/collate_ex:extra-parameters}\phantomsection\label{decorators/collate_ex:decorators-collate-ex-output-pattern}\phantomsection\lab [...]
+
+\subsubsection{\emph{@collate} ( \emph{tasks\_or\_file\_names}, \emph{regex}\emph{(}\emph{matching\_regex}\emph{)} \textbar{} \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, {[}\emph{inputs}\emph{(}\emph{input\_pattern\_or\_glob}\emph{)} \textbar{} \emph{add\_inputs}\emph{(}\emph{input\_pattern\_or\_glob}\emph{)}{]} , \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/collate_ex:decorators-collate-ex-matching-formatter}\label{decorators/collate_ex:matching-formatter}\label{decorators/collate_ex:collate-tasks-or-file-names-regex-matching-regex-formatter-matching-formatter-inputs-input-pattern-or-glob-add-inputs-input-pattern-or-glob-output-pattern-extra-parameters}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Groups / collates sets of input files, each into a separate summary.
+
+This variant of \code{@collate} allows additional inputs or dependencies to be added
+dynamically to the task.
+
+Output file names are determined from {\hyperref[decorators/collate_ex:decorators-collate-ex-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names.
+
+This variant of \code{@collate} allows input file names to be derived in the same way.
+
+{\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}} nests the the original input parameters in a list before adding additional dependencies.
+
+{\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} replaces the original input parameters wholescale.
+
+Only out of date tasks (comparing input and output files) will be run
+
+\end{description}
+
+\textbf{Example of} {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{add\_inputs}}}
+\begin{quote}
+
+\code{regex(r".*(\textbackslash{}..+)"), "\textbackslash{}1.summary"} creates a separate summary file for each suffix.
+But we also add date of birth data for each species:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{animal\PYGZus{}files} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tuna.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{shark.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog.mammals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{cat.mammals}\PYG{l+s}{\PYGZdq{}}
+\PYG{c}{\PYGZsh{} summarise by file suffix:}
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{animal\PYGZus{}files}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{.+}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.(.+)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{add\PYGZus{}inputs}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.summary}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{summarize}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{summary\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This results in the following equivalent function calls:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{summarize}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{shark.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fish.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}} \PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tuna.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fish.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}} \PYG{p}{]} \PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fish.summary}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{summarize}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{cat.mammals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{mammals.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog.mammals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{mammals.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{mammals.summary}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Example of} {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{add\_inputs}}}
+\begin{quote}
+
+using \code{inputs(...)} will summarise only the dates of births for each species group:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{animal\PYGZus{}files} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{tuna.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{shark.fish}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dog.mammals}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{cat.mammals}\PYG{l+s}{\PYGZdq{}}
+\PYG{c}{\PYGZsh{} summarise by file suffix:}
+\PYG{n+nd}{@collate}\PYG{p}{(}\PYG{n}{animal\PYGZus{}files}\PYG{p}{,} \PYG{n}{regex}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{.+}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{.(.+)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{n}{inputs}\PYG{p}{(}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.summary}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{summarize}\PYG{p}{(}\PYG{n}{infiles}\PYG{p}{,} \PYG{n}{summary\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This results in the following equivalent function calls:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{summarize}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fish.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}} \PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fish.summary}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{summarize}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{mammals.date\PYGZus{}of\PYGZus{}birth}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{mammals.summary}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+is a python regular expression string, which must be wrapped in
+a {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicator object
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex-input-pattern-or-glob}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{input\_pattern}}] \leavevmode
+Specifies the resulting input(s) to each job.
+Must be wrapped in an {\hyperref[decorators/indicator_objects:decorators-inputs]{\emph{inputs}}} or an {\hyperref[decorators/indicator_objects:decorators-add-inputs]{\emph{inputs}}} indicator object.
+
+Can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode
+Strings will be subject to substitution.
+File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/collate_ex:decorators-collate-ex-extra-parameters}\begin{quote}
+\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Any extra parameters are passed verbatim to the task function
+
+\end{description}
+
+\end{itemize}
+\begin{enumerate}
+\item {}
+\emph{outputs} and optional extra parameters are passed to the functions after string
+substitution in any strings. Non-string values are passed through unchanged.
+
+\item {}
+Each collate job consists of input files which are aggregated by string substitution
+to a single set of output / extra parameter matches
+
+\end{enumerate}
+\end{quote}
+
+See {\hyperref[decorators/collate:decorators-collate]{\emph{@collate}}} for more straightforward ways to use collate.
+\phantomsection\label{decorators/graphviz:decorators-graphviz}
+\index{@graphviz!Syntax}\index{Syntax!@graphviz}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@graphviz}
+\label{decorators/graphviz::doc}\label{decorators/graphviz:graphviz}
+
+\subsubsection{\emph{@graphviz} ( \emph{graphviz\_parameters},...{]} )}
+\label{decorators/graphviz:graphviz-graphviz-parameters}\label{decorators/graphviz:decorators-graphviz-graphviz-parameters}\label{decorators/graphviz:graphviz-parameters}\begin{quote}
+
+\emph{Contributed by Sean Davis, with improved syntax via Jake Biesinger}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Customise the graphic for each task in printed flowcharts by adding
+\href{http://www.graphviz.org/doc/info/attrs.html}{graphviz attributes},
+(URL, shape, colour) to that node.
+\begin{itemize}
+\item {}
+This allows HTML formatting in the task names (using the \code{label} parameter as in the following example).
+HTML labels \textbf{must} be enclosed in \code{\textless{}} and \code{\textgreater{}}. E.g.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{label} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}Line \PYGZlt{}BR/\PYGZgt{} wrapped task\PYGZus{}name()\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\item {}
+You can also opt to keep the task name and wrap it with a prefix and suffix:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{label\PYGZus{}suffix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{??? }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{label\PYGZus{}prefix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{: What is this?}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\item {}
+The \code{URL} attribute allows the generation of clickable svg, and also client / server
+side image maps usable in web pages.
+See \href{http://www.graphviz.org/content/output-formats\#dimap}{Graphviz documentation}
+
+\end{itemize}
+
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@graphviz}\PYG{p}{(}\PYG{n}{URL}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{http://cnn.com}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{fillcolor} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FFCCCC}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{color} \PYG{o}{=} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF0000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{pencolor}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF0000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{fontcolor}\PYG{o}{=}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}4B6000}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{n}{label\PYGZus{}suffix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{???}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{label\PYGZus{}prefix} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{What is this?\PYGZlt{}BR/\PYGZgt{} }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{label} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZlt{}What \PYGZlt{}FONT COLOR=}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s}{red}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s}{\PYGZgt{}is\PYGZlt{}/FONT\PYGZgt{}this\PYGZgt{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{n}{shape}\PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{component}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{height} \PYG{o}{=} \PYG{l+m+mf}{1.5}\PYG{p}{,} \PYG{n}{peripheries} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{,}
+ \PYG{n}{style}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{dashed}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{Up\PYGZus{}to\PYGZus{}date\PYGZus{}task2}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} Can use dictionary if you wish...}
+\PYG{n}{graphviz\PYGZus{}params} \PYG{o}{=} \PYG{p}{\PYGZob{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{URL}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{http://cnn.com}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{fontcolor}\PYG{l+s}{\PYGZdq{}}\PYG{p}{:} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{}FF00FF}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsq{}}\PYG{p}{\PYGZcb{}}
+\PYG{n+nd}{@graphviz}\PYG{p}{(}\PYG{o}{*}\PYG{o}{*}\PYG{n}{graphviz\PYGZus{}params}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{myTask}\PYG{p}{(}\PYG{n+nb}{input}\PYG{p}{,}\PYG{n}{output}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+\scalebox{0.300000}{\includegraphics{history_html_flowchart1.png}}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/graphviz:decorators-graphviz-graphviz-parameters}\begin{itemize}
+\item {}
+named \emph{graphviz\_parameters}
+\begin{quote}
+
+Including among others:
+\begin{itemize}
+\item {}
+URL (e.g. \code{"www.ruffus.org.uk"})
+
+\item {}
+fillcolor
+
+\item {}
+color
+
+\item {}
+pencolor
+
+\item {}
+fontcolor
+
+\item {}
+label\_suffix (appended to task name)
+
+\item {}
+label\_prefix (precedes task name)
+
+\item {}
+label (replaces task name)
+
+\item {}
+shape (e.g. \code{"component", "box", "diamond", "doubleoctagon"} etc., see \href{http://www.graphviz.org/doc/info/shapes.html}{graphviz} )
+
+\item {}
+height
+
+\item {}
+peripheries (Number of borders)
+
+\item {}
+style (e.g. \code{"solid", "wedged", "dashed"} etc., see \href{http://www.graphviz.org/doc/info/attrs.html\#k:style}{graphviz} )
+
+\end{itemize}
+
+Colours may specified as \code{'"\#FFCCCC"', 'red', 'red:blue', '/bugn9/7'} etc. see \href{http://www.graphviz.org/doc/info/attrs.html\#k:color}{color names} and \href{http://www.graphviz.org/doc/info/colors.html}{colour schemes}
+\end{quote}
+
+\end{itemize}
+\phantomsection\label{decorators/mkdir:decorators-mkdir}
+\index{@mkdir!Syntax}\index{Syntax!@mkdir}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\item {}
+More on @mkdir in the \code{Ruffus} {\hyperref[tutorials/new_tutorial/mkdir:new-manual-mkdir]{\emph{Manual}}}
+
+\item {}
+{\hyperref[decorators/follows:decorators-follows]{\emph{@follows(mkdir(``dir''))}}} specifies the creation of a \emph{single} directory as a task pre-requisite.
+
+\end{itemize}
+
+
+
+
+\subsection{@mkdir}
+\label{decorators/mkdir:mkdir}\label{decorators/mkdir::doc}\phantomsection\label{decorators/mkdir:decorators-mkdir-tasks-or-file-names}\phantomsection\label{decorators/mkdir:tasks-or-file-names}\phantomsection\label{decorators/mkdir:decorators-mkdir-output-pattern}\phantomsection\label{decorators/mkdir:output-pattern}\phantomsection\label{decorators/mkdir:decorators-mkdir-matching-regex}\phantomsection\label{decorators/mkdir:matching-regex}\phantomsection\label{decorators/mkdir:decorator [...]
+
+\subsubsection{\emph{@mkdir} ( \emph{tasks\_or\_file\_names}, \emph{suffix}\emph{(}\emph{suffix\_string}\emph{)}\textbar{} \emph{regex}\emph{(}\emph{matching\_regex}\emph{)} \textbar{} \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{output\_pattern})}
+\label{decorators/mkdir:suffix-string}\label{decorators/mkdir:mkdir-tasks-or-file-names-suffix-suffix-string-regex-matching-regex-formatter-matching-formatter-output-pattern}\label{decorators/mkdir:decorators-mkdir-suffix-string}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+\begin{itemize}
+\item {}
+Prepares directories to receive \emph{Output} files
+
+\item {}
+Used when \emph{Output} path names are generated at runtime from \emph{Inputs}. \textbf{mkdir} can make sure these runtime specified paths exist.
+
+\item {}
+Directory names are generated from \textbf{Input} using string substitution via {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter()}}}, {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix()}}} or {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex()}}}.
+
+\item {}
+Behaves essentially like \code{@transform} but with its own (internal) function which does the actual work of making a directory
+
+\item {}
+Does \emph{not} invoke the host task function to which it is attached
+
+\item {}
+Makes specified directories using \href{http://docs.python.org/2/library/os.html\#os.makedirs}{os.makedirs}
+
+\item {}
+Multiple directories can be created in a list
+
+\end{itemize}
+
+\begin{notice}{note}{Note:}
+Only missing directories are created.
+
+In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+
+Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+directories are always created or available \emph{before} the pipeline runs.
+\end{notice}
+\end{quote}
+
+\textbf{Simple Example}
+\begin{quote}
+
+Creates multiple directories per job to hold the results of {\hyperref[decorators/transform:decorators-transform]{\emph{@transform}}}
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+
+\PYG{c}{\PYGZsh{} create files without making directories \PYGZhy{}\PYGZgt{} ERROR}
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}/processed.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}.tmp/tmp.processed.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}files\PYGZus{}without\PYGZus{}mkdir}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{} create files after making corresponding directories}
+\PYG{n+nd}{@mkdir}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} create directory}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}.tmp}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)} \PYG{c}{\PYGZsh{} create directory.tmp}
+\PYG{n+nd}{@transform}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files}\PYG{p}{,}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}/processed.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0]\PYGZcb{}/\PYGZob{}basename[0]\PYGZcb{}.tmp/tmp.processed.txt}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}files\PYGZus{}with\PYGZus{}mkdir}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}files\PYGZus{}without\PYGZus{}mkdir}\PYG{p}{]}\PYG{p}{)}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}files\PYGZus{}with\PYGZus{}mkdir}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Running without making the directories first gives errors:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}files\PYGZus{}without\PYGZus{}mkdir}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} A.start] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} B.start] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}initial\PYGZus{}files}
+
+\PYG{g+go}{ Traceback (most recent call last):}
+\PYG{g+go}{ File \PYGZdq{}\PYGZlt{}stdin\PYGZgt{}\PYGZdq{}, line 1, in \PYGZlt{}module\PYGZgt{}}
+\PYG{g+go}{ File \PYGZdq{}/usr/local/lib/python2.7/dist\PYGZhy{}packages/ruffus/task.py\PYGZdq{}, line 3738, in pipeline\PYGZus{}run}
+\PYG{g+go}{ raise job\PYGZus{}errors}
+\PYG{g+go}{ ruffus.ruffus\PYGZus{}exceptions.RethrownJobError:}
+
+\PYG{g+go}{ Original exception:}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} Exception \PYGZsh{}1}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} \PYGZsq{}exceptions.IOError([Errno 2] No such file or directory: \PYGZsq{}A/processed.txt\PYGZsq{})\PYGZsq{} raised in ...}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} Task = def create\PYGZus{}files\PYGZus{}without\PYGZus{}mkdir(...):}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} Job = [A.start \PYGZhy{}\PYGZgt{} [processed.txt, tmp.processed.txt]]}
+\end{Verbatim}
+\end{quote}
+
+Running after making the directories first:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}files\PYGZus{}with\PYGZus{}mkdir}\PYG{p}{]}\PYG{p}{)}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} A.start] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} B.start] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}initial\PYGZus{}files}
+\PYG{g+go}{ Make directories [A, A.tmp] completed}
+\PYG{g+go}{ Make directories [B, B.tmp] completed}
+\PYG{g+go}{Completed Task = (mkdir 1) before create\PYGZus{}files\PYGZus{}with\PYGZus{}mkdir}
+\PYG{g+go}{ Job = [A.start \PYGZhy{}\PYGZgt{} [processed.txt, tmp.processed.txt]] completed}
+\PYG{g+go}{ Job = [B.start \PYGZhy{}\PYGZgt{} [processed.txt, tmp.processed.txt]] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}files\PYGZus{}with\PYGZus{}mkdir}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\textbf{Escaping regular expression patterns}
+\begin{quote}
+
+A string like \code{universal.h} in \code{add\_inputs} will added \emph{as is}.
+\code{r"\textbackslash{}1.h"}, however, performs suffix substitution, with the special form \code{r"\textbackslash{}1"} matching everything up to the suffix.
+Remember to `escape' \code{r"\textbackslash{}1"} otherwise Ruffus will complain and throw an Exception to remind you.
+The most convenient way is to use a python ``raw'' string.
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/mkdir:decorators-mkdir-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/mkdir:decorators-mkdir-suffix-string}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{suffix\_string}}] \leavevmode
+must be wrapped in a {\hyperref[decorators/indicator_objects:decorators-suffix]{\emph{suffix}}} indicator object.
+The end of each input file name which matches \code{suffix\_string} will be replaced by \code{output\_pattern}.
+
+Input file names which do not match suffix\_string will be ignored
+
+The non-suffix part of the match can be referred to using the \code{"\textbackslash{}1"} pattern. This
+can be useful for putting the output in different directory, for example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{my\PYGZus{}path/}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+This results in the following function calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{c}{\PYGZsh{} 1.c \PYGZhy{}\PYGZgt{} my\PYGZus{}path/1.o}
+\PYG{c}{\PYGZsh{} 2.c \PYGZhy{}\PYGZgt{} my\PYGZus{}path/2.o}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}path/1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}path/2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+For convenience and visual clarity, the \code{"\textbackslash{}1"} can be omitted from the output parameter.
+However, the \code{"\textbackslash{}1"} is mandatory for string substitutions in additional parameters,
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nd}{@mkdir}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{r\PYGZdq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Compiling }\PYG [...]
+\PYG{k}{def} \PYG{n+nf}{compile}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Results in the following function calls:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{1.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Compiling 1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{verbatim}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n+nb}{compile}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{2.o}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Compiling 2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{verbatim}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+Since r''1'' is optional for the output parameter, \code{"\textbackslash{}1.o"} and \code{".o"} are equivalent.
+However, strings in other parameters which do not contain r''1'' will be included verbatim, much
+like the string \code{"verbatim"} in the above example.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/mkdir:decorators-mkdir-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+is a python regular expression string, which must be wrapped in
+a {\hyperref[decorators/indicator_objects:decorators-regex]{\emph{regex}}} indicator object
+See python \href{http://docs.python.org/library/re.html}{regular expression (re)}
+documentation for details of regular expression syntax
+Each output file name is created using regular expression substitution with \code{output\_pattern}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/mkdir:decorators-mkdir-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/mkdir:decorators-mkdir-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/jobs_limit:decorators-jobs-limit}
+\index{@jobs\_limit!Syntax}\index{Syntax!@jobs\_limit}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@jobs\_limit}
+\label{decorators/jobs_limit:jobs-limit}\label{decorators/jobs_limit::doc}\phantomsection\label{decorators/jobs_limit:decorators-jobs-limit-maximum-num-of-jobs}\phantomsection\label{decorators/jobs_limit:maximum-num-of-jobs}
+
+\subsubsection{\emph{@jobs\_limit} ( \emph{maximum\_num\_of\_jobs}, {[} \emph{name} {]})}
+\label{decorators/jobs_limit:decorators-jobs-limit-name}\label{decorators/jobs_limit:name}\label{decorators/jobs_limit:jobs-limit-maximum-num-of-jobs-name}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+\begin{DUlineblock}{0em}
+\item[] Manages the resources available for a task.
+\item[] Limits the number of concurrent jobs which can be run in parallel for this task
+\item[] Overrides the value for \code{multiprocess} in {\hyperref[pipeline_functions:pipeline-functions-pipeline-run]{\emph{pipeline\_run}}}
+\item[] If an optional \code{name} is given, the same limit is shared across all tasks with the same @job\_limit name.
+\end{DUlineblock}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/jobs_limit:decorators-jobs-limit-maximum-num-of-jobs}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{maximum\_num\_of\_jobs}}] \leavevmode
+The maximum number of concurrent jobs for this task. Must be an integer number
+greater than or equal to 1.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/jobs_limit:decorators-jobs-limit-name}\begin{quote}
+\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{name}}] \leavevmode
+Optional name for the limit. All tasks with the same name share the same limit if they
+are running concurrently.
+
+\end{description}
+
+\end{itemize}
+\begin{description}
+\item[{\textbf{Example}}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} make list of 10 files}
+\PYG{n+nd}{@split}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{*.stage1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{make\PYGZus{}files}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{l+m+mi}{10}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{.stage1}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{i}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@jobs\PYGZus{}limit}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{make\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage1}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{stage1}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.stage3}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{stage2}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{stage2}\PYG{p}{]}\PYG{p}{,} \PYG{n}{multiprocess} \PYG{o}{=} \PYG{l+m+mi}{5}\PYG{p}{)}
+\end{Verbatim}
+
+will run the 10 jobs of \code{stage1} 2 at a time, while {}`{}` stage2{}`{}` will
+run 5 at a time (from \code{multiprocess = 5}):
+
+\includegraphics{jobs_limit.png}
+
+\end{description}
+\end{quote}
+\phantomsection\label{decorators/posttask:decorators-posttask}
+\index{@posttask!Syntax}\index{Syntax!@posttask}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+\phantomsection\label{decorators/posttask:decorators-posttask-function}\phantomsection\label{decorators/posttask:function}
+
+\subsection{@posttask}
+\label{decorators/posttask:decorators-posttask-file-name}\label{decorators/posttask::doc}\label{decorators/posttask:posttask}\label{decorators/posttask:file-name}
+
+\subsubsection{\emph{@posttask} (\emph{function} \textbar{} \emph{touch\_file}\emph{(}\emph{file\_name}\emph{)})}
+\label{decorators/posttask:posttask-function-touch-file-file-name}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Calls functions to signal the completion of each task
+
+\end{description}
+
+\textbf{Example}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{k}{def} \PYG{n+nf}{task\PYGZus{}finished}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{hooray}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{task\PYGZus{}finished}\PYG{p}{)}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/posttask:decorators-posttask-function}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{function}:}] \leavevmode
+\code{function()} will be called when the ruffus passes through a task.
+
+This may happen even if all of the jobs are up-to-date:
+when a upstream task is out-of-date, and the execution passes through
+this point in the pipeline
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/posttask:decorators-posttask-file-name}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{file\_name}}] \leavevmode
+Files to be \code{touch}-ed after the task is executed.
+
+This will change the date/time stamp of the \code{file\_name} to the current date/time.
+If the file does not exist, an empty file will be created.
+
+Requires to be wrapped in a {\hyperref[decorators/indicator_objects:decorators-touch-file]{\emph{touch\_file}}} indicator object:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{n+nd}{@posttask}\PYG{p}{(}\PYG{n}{touch\PYGZus{}file}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{task\PYGZus{}completed.flag}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/active_if:decorators-active-if}
+\index{@active\_if!Syntax}\index{Syntax!@active\_if}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\item {}
+More on @active\_if in the \code{Ruffus} {\hyperref[tutorials/new_tutorial/active_if:new-manual-active-if]{\emph{Manual}}}
+
+\end{itemize}
+
+
+
+
+\subsection{@active\_if}
+\label{decorators/active_if:active-if}\label{decorators/active_if::doc}
+
+\subsubsection{\emph{@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}
+\label{decorators/active_if:active-if-on-or-off1-on-or-off2}\label{decorators/active_if:decorators-active-if-on-or-off}\label{decorators/active_if:on-or-off}\begin{quote}
+
+\textbf{Purpose:}
+\begin{itemize}
+\item {}
+Switches tasks on and off at run time depending on its parameters
+
+\item {}
+Evaluated each time \code{pipeline\_run}, \code{pipeline\_printout} or \code{pipeline\_printout\_graph} is called.
+
+\item {}
+The Design and initial implementation were contributed by Jacob Biesinger
+
+\item {}
+Dormant tasks behave as if they are up to date and have no output.
+
+\end{itemize}
+
+\textbf{Example}:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}1} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{False}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}3} \PYG{o}{=} \PYG{n+nb+bp}{True}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} task1}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.foo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.foo}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}files}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{l+s+sd}{\PYGZdq{}\PYGZdq{}\PYGZdq{}}
+\PYG{l+s+sd}{ create\PYGZus{}files}
+\PYG{l+s+sd}{ \PYGZdq{}\PYGZdq{}\PYGZdq{}}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{outfile} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Only runs if all three run\PYGZus{}if\PYGZus{}true conditions are met}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} @active\PYGZus{}if determines if task is active}
+\PYG{n+nd}{@active\PYGZus{}if}\PYG{p}{(}\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}1}\PYG{p}{,} \PYG{k}{lambda}\PYG{p}{:} \PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2}\PYG{p}{)}
+\PYG{n+nd}{@active\PYGZus{}if}\PYG{p}{(}\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}3}\PYG{p}{)}
+\PYG{n+nd}{@transform}\PYG{p}{(}\PYG{n}{create\PYGZus{}files}\PYG{p}{,} \PYG{n}{suffix}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.foo}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{.bar}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ \PYGZhy{}\PYGZgt{} }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches off task because run\PYGZus{}if\PYGZus{}true\PYGZus{}2 == False}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+
+\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches on task because all run\PYGZus{}if\PYGZus{}true conditions are met}
+\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+\end{Verbatim}
+
+Produces the following output:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches off task \PYGZdq{}this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive\PYGZdq{} because run\PYGZus{}if\PYGZus{}true\PYGZus{}2 == False}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+
+\PYG{g+go}{Task enters queue = create\PYGZus{}files}
+\PYG{g+go}{create\PYGZus{}files}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.foo] Missing file [a.foo]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.foo] Missing file [b.foo]}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} a.foo] completed}
+\PYG{g+go}{ Job = [None \PYGZhy{}\PYGZgt{} b.foo] completed}
+\PYG{g+go}{Completed Task = create\PYGZus{}files}
+\PYG{g+go}{Inactive Task = this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}
+
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} @active\PYGZus{}if switches on task \PYGZdq{}this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive\PYGZdq{} because all run\PYGZus{}if\PYGZus{}true conditions are met}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{run\PYGZus{}if\PYGZus{}true\PYGZus{}2} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose} \PYG{o}{=} \PYG{l+m+mi}{3}\PYG{p}{)}
+
+\PYG{g+go}{Task enters queue = this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}
+
+\PYG{g+go}{ Job = [a.foo \PYGZhy{}\PYGZgt{} a.bar] Missing file [a.bar]}
+\PYG{g+go}{ Job = [b.foo \PYGZhy{}\PYGZgt{} b.bar] Missing file [b.bar]}
+\PYG{g+go}{ Job = [a.foo \PYGZhy{}\PYGZgt{} a.bar] completed}
+\PYG{g+go}{ Job = [b.foo \PYGZhy{}\PYGZgt{} b.bar] completed}
+\PYG{g+go}{Completed Task = this\PYGZus{}task\PYGZus{}might\PYGZus{}be\PYGZus{}inactive}
+\end{Verbatim}
+\end{quote}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/active_if:decorators-active-if-on-or-off}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{on\_or\_off}:}] \leavevmode
+A comma separated list of boolean conditions. These can be values, functions or callable objects which return True / False
+
+Multiple \code{@active\_if} decorators can be stacked for clarity as in the example
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/follows:decorators-follows}
+\index{@follows!Syntax}\index{Syntax!@follows}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\item {}
+More on @follows in the \code{Ruffus} {\hyperref[tutorials/new_tutorial/transform_in_parallel:new-manual-follows]{\emph{Manual}}}
+
+\end{itemize}
+
+\begin{notice}{note}{Note:}
+Only missing directories are created.
+
+In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+directories are always created or available \emph{before} the pipeline runs.
+\end{notice}
+
+
+
+
+\subsection{@follows}
+\label{decorators/follows:follows}\label{decorators/follows::doc}\phantomsection\label{decorators/follows:decorators-follows-mkdir}\phantomsection\label{decorators/follows:decorators-follows-task}\phantomsection\label{decorators/follows:task}\phantomsection\label{decorators/follows:decorators-follows-task-name}\phantomsection\label{decorators/follows:task-name}
+
+\subsubsection{\emph{@follows}(\emph{task} \textbar{} \emph{``task\_name''} \textbar{} \emph{mkdir} (\emph{directory\_name}), {[}more\_tasks, ...{]})}
+\label{decorators/follows:directory-name}\label{decorators/follows:decorators-follows-directory-name}\label{decorators/follows:follows-task-task-name-mkdir-directory-name-more-tasks}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Indicates either
+\begin{itemize}
+\item {}
+task dependencies
+
+\item {}
+that the task requires a directory to be created first \emph{if necessary}. (Existing directories will not be overwritten)
+
+\end{itemize}
+\end{quote}
+
+\textbf{Example}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k}{def} \PYG{n+nf}{task1}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{doing task 1}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{n+nd}{@follows}\PYG{p}{(}\PYG{n}{task1}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{task2}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{doing task 2}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/follows:decorators-follows-task}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{task}:}] \leavevmode
+a list of tasks which have to be run \textbf{before} this function
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/follows:decorators-follows-task-name}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{``task\_name''}:}] \leavevmode
+Dependencies can be quoted function names.
+Quoted function names allow dependencies to be added before the function is defined.
+
+Functions in other modules need to be fully qualified.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/follows:decorators-follows-directory-name}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{directory\_name}:}] \leavevmode
+Directories which need to be created (\emph{only if they don't exist}) before
+the task is run can be specified via a \code{mkdir} indicator object:
+\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+ at follows(task\_x, mkdir("/output/directory") ...)
+def task():
+ pass
+\end{Verbatim}
+\end{quote}
+
+\end{description}
+
+\end{itemize}
+\end{minipage}}
+\begin{center}\setlength{\fboxsep}{5pt}\shadowbox{\box0}\end{center}
+\setbox0\vbox{
+\begin{minipage}{0.95\linewidth}
+\textbf{Combinatorics}
+
+\medskip
+
+\phantomsection\label{decorators/product:decorators-product}
+\index{@product!Syntax}\index{Syntax!@product}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@product}
+\label{decorators/product:product}\label{decorators/product::doc}\phantomsection\label{decorators/product:decorators-product-tasks-or-file-names}\phantomsection\label{decorators/product:tasks-or-file-names}\phantomsection\label{decorators/product:decorators-product-extra-parameters}\phantomsection\label{decorators/product:extra-parameters}\phantomsection\label{decorators/product:decorators-product-output-pattern}\phantomsection\label{decorators/product:output-pattern}
+
+\subsubsection{\emph{@product} ( \emph{tasks\_or\_file\_names}, \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, {[}\emph{tasks\_or\_file\_names}, \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, ... {]}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/product:decorators-product-matching-formatter}\label{decorators/product:product-tasks-or-file-names-formatter-matching-formatter-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}\label{decorators/product:matching-formatter}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Generates the Cartesian \textbf{product}, i.e. all vs all comparisons, between sets of input files.
+
+The effect is analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.product}{itertools}
+function of the same name, i.e. a nested for loop.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{product}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} product(\PYGZsq{}ABC\PYGZsq{}, \PYGZsq{}XYZ\PYGZsq{}) \PYGZhy{}\PYGZhy{}\PYGZgt{} AX AY AZ BX BY BZ CX CY CZ}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{product}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ABC}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{XYZ}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}AX\PYGZsq{}, \PYGZsq{}AY\PYGZsq{}, \PYGZsq{}AZ\PYGZsq{}, \PYGZsq{}BX\PYGZsq{}, \PYGZsq{}BY\PYGZsq{}, \PYGZsq{}BZ\PYGZsq{}, \PYGZsq{}CX\PYGZsq{}, \PYGZsq{}CY\PYGZsq{}, \PYGZsq{}CZ\PYGZsq{}]}
+\end{Verbatim}
+
+Only out of date tasks (comparing input and output files) will be run
+
+Output file names and strings in the extra parameters
+are determined from {\hyperref[decorators/product:decorators-product-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}}.
+
+The replacement strings require an extra level of indirection to refer to
+parsed components:
+\begin{enumerate}
+\item {}
+The first level refers to which \emph{set} of inputs (e.g. \textbf{A,B} or \textbf{P,Q} or \textbf{X,Y}
+in the following example.)
+
+\item {}
+The second level refers to which input file in any particular \emph{set} of inputs.
+
+\end{enumerate}
+\begin{description}
+\item[{For example, \code{'\{basename{[}2{]}{[}0{]}\}'} is the \href{http://docs.python.org/2/library/os.path.html\#os.path.basename}{basename} for}] \leavevmode\begin{itemize}
+\item {}
+the third set of inputs (\textbf{X,Y}) and
+
+\item {}
+the first file name string in each \textbf{Input} of that set (\code{"x.1\_start"} and \code{"y.1\_start"})
+
+\end{itemize}
+
+\end{description}
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+Calculates the \textbf{@product} of \textbf{A,B} and \textbf{P,Q} and \textbf{X, Y} files
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} Three sets of initial files}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ab}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{p.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{q.start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}pq}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{x.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{x.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{y.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{y.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]} \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}xy}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @product}
+\PYG{n+nd}{@product}\PYG{p}{(} \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ab}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 1}
+
+ \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}pq}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 2}
+
+ \PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}xy}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{(.start)\PYGZdl{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input file set \PYGZsh{} 3}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}} \PYG{c}{\PYGZsh{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}.product}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{}}
+
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} Extra parameter: basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{product\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZsh{} basenames = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{input\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{input\PYGZus{}file}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{output\PYGZus{}parameter = }\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+go}{\PYGZsh{} basenames = a p x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a p y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a q x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = a q y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}a.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/a\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b p x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b p y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}p.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}p\PYGZus{}vs\PYGZus{}y.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b q x}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}x.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}x.product}
+
+\PYG{g+go}{\PYGZsh{} basenames = b q y}
+\PYG{g+go}{input\PYGZus{}parameter = (\PYGZsq{}b.start\PYGZsq{}, \PYGZsq{}q.start\PYGZsq{}, \PYGZsq{}y.start\PYGZsq{})}
+\PYG{g+go}{output\PYGZus{}parameter = /home/lg/temp/b\PYGZus{}vs\PYGZus{}q\PYGZus{}vs\PYGZus{}y.product}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/product:decorators-product-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/product:decorators-product-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/product:decorators-product-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s) after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/product:decorators-product-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Optional extra parameters are passed to the functions after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/permutations:decorators-permutations}
+\index{@permutations!Syntax}\index{Syntax!@permutations}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@permutations}
+\label{decorators/permutations:permutations}\label{decorators/permutations::doc}\phantomsection\label{decorators/permutations:decorators-permutations-tasks-or-file-names}\phantomsection\label{decorators/permutations:tasks-or-file-names}\phantomsection\label{decorators/permutations:decorators-permutations-extra-parameters}\phantomsection\label{decorators/permutations:extra-parameters}\phantomsection\label{decorators/permutations:decorators-permutations-output-pattern}\phantomsection\label [...]
+
+\subsubsection{\emph{@permutations} ( \emph{tasks\_or\_file\_names}, \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/permutations:matching-formatter}\label{decorators/permutations:decorators-permutations-matching-formatter}\label{decorators/permutations:permutations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Generates the \textbf{permutations}, between all the elements of a set of \textbf{Input}
+
+The effect is analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.permutations}{itertools}
+function of the same name:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{permutations}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} permutations(\PYGZsq{}ABCD\PYGZsq{}, 2) \PYGZhy{}\PYGZhy{}\PYGZgt{} AB AC AD BA BC BD CA CB CD DA DB DC}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{permutations}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ABCD}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}AB\PYGZsq{}, \PYGZsq{}AC\PYGZsq{}, \PYGZsq{}AD\PYGZsq{}, \PYGZsq{}BA\PYGZsq{}, \PYGZsq{}BC\PYGZsq{}, \PYGZsq{}BD\PYGZsq{}, \PYGZsq{}CA\PYGZsq{}, \PYGZsq{}CB\PYGZsq{}, \PYGZsq{}CD\PYGZsq{}, \PYGZsq{}DA\PYGZsq{}, \PYGZsq{}DB\PYGZsq{}, \PYGZsq{}DC\PYGZsq{}]}
+\end{Verbatim}
+
+Only out of date tasks (comparing input and output files) will be run
+
+Output file names and strings in the extra parameters
+are determined from {\hyperref[decorators/permutations:decorators-permutations-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}}.
+
+The replacement strings require an extra level of indirection to refer to
+parsed components:
+\begin{enumerate}
+\item {}
+The first level refers to which \emph{set} in each tuple of inputs.
+
+\item {}
+The second level refers to which input file in any particular \emph{set} of inputs.
+
+\end{enumerate}
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+Calculates the \textbf{@permutations} of \textbf{A,B,C,D} files
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @permutations}
+\PYG{n+nd}{@permutations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 2 at a time}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}.permutations}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{permutations\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+
+\PYG{g+go}{A \PYGZhy{} B}
+\PYG{g+go}{A \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} A}
+\PYG{g+go}{B \PYGZhy{} C}
+\PYG{g+go}{B \PYGZhy{} D}
+\PYG{g+go}{C \PYGZhy{} A}
+\PYG{g+go}{C \PYGZhy{} B}
+\PYG{g+go}{C \PYGZhy{} D}
+\PYG{g+go}{D \PYGZhy{} A}
+\PYG{g+go}{D \PYGZhy{} B}
+\PYG{g+go}{D \PYGZhy{} C}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/permutations:decorators-permutations-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/permutations:decorators-permutations-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/permutations:decorators-permutations-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s) after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/permutations:decorators-permutations-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Optional extra parameters are passed to the functions after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations:decorators-combinations}
+\index{@combinations!Syntax}\index{Syntax!@combinations}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@combinations}
+\label{decorators/combinations:combinations}\label{decorators/combinations::doc}\phantomsection\label{decorators/combinations:decorators-combinations-tasks-or-file-names}\phantomsection\label{decorators/combinations:tasks-or-file-names}\phantomsection\label{decorators/combinations:decorators-combinations-extra-parameters}\phantomsection\label{decorators/combinations:extra-parameters}\phantomsection\label{decorators/combinations:decorators-combinations-output-pattern}\phantomsection\label [...]
+
+\subsubsection{\emph{@combinations} ( \emph{tasks\_or\_file\_names}, \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/combinations:combinations-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}\label{decorators/combinations:matching-formatter}\label{decorators/combinations:decorators-combinations-matching-formatter}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Generates the \textbf{combinations}, between all the elements of a set of \textbf{Input} (e.g. \textbf{A B C D}),
+i.e. r-length tuples of \emph{input} elements with no repeated elements (\textbf{A A})
+and where order of the tuples is irrelevant (either \textbf{A B} or \textbf{B A}, not both).
+
+The effect is analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.combinations}{itertools}
+function of the same name:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{combinations}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} combinations(\PYGZsq{}ABCD\PYGZsq{}, 3) \PYGZhy{}\PYGZhy{}\PYGZgt{} ABC ABD ACD BCD}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{combinations}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ABCD}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}ABC\PYGZsq{}, \PYGZsq{}ABD\PYGZsq{}, \PYGZsq{}ACD\PYGZsq{}, \PYGZsq{}BCD\PYGZsq{}]}
+\end{Verbatim}
+
+Only out of date tasks (comparing input and output files) will be run
+
+Output file names and strings in the extra parameters
+are determined from {\hyperref[decorators/combinations:decorators-combinations-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}}.
+
+The replacement strings require an extra level of indirection to refer to
+parsed components:
+\begin{enumerate}
+\item {}
+The first level refers to which \emph{set} in each tuple of inputs.
+
+\item {}
+The second level refers to which input file in any particular \emph{set} of inputs.
+
+\end{enumerate}
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+Calculates the \textbf{@combinations} of \textbf{A,B,C,D} files
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @combinations}
+\PYG{n+nd}{@combinations}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 3 at a time}
+ \PYG{l+m+mi}{3}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][1]\PYGZcb{}.combinations}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[2][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combinations\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{A \PYGZhy{} B \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} B \PYGZhy{} D}
+\PYG{g+go}{A \PYGZhy{} C \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} C \PYGZhy{} D}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/combinations:decorators-combinations-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations:decorators-combinations-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations:decorators-combinations-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s) after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations:decorators-combinations-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Optional extra parameters are passed to the functions after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement}
+\index{@combinations\_with\_replacement!Syntax}\index{Syntax!@combinations\_with\_replacement}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@combinations\_with\_replacement}
+\label{decorators/combinations_with_replacement::doc}\label{decorators/combinations_with_replacement:combinations-with-replacement}\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-tasks-or-file-names}\phantomsection\label{decorators/combinations_with_replacement:tasks-or-file-names}\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-extra-parameters}\phantomsection\label{decorators/ [...]
+
+\subsubsection{\emph{@combinations\_with\_replacement} ( \emph{tasks\_or\_file\_names}, \emph{formatter}\emph{(}\emph{matching\_formatter}\emph{)}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]} )}
+\label{decorators/combinations_with_replacement:combinations-with-replacement-tasks-or-file-names-formatter-matching-formatter-output-pattern-extra-parameters}\label{decorators/combinations_with_replacement:matching-formatter}\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-matching-formatter}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Generates the \textbf{combinations\_with\_replacement}, between all the elements of a set of \textbf{Input} (e.g. \textbf{A B C D}),
+i.e. r-length tuples of \emph{input} elements included repeated elements (\textbf{A A})
+and where order of the tuples is irrelevant (either \textbf{A B} or \textbf{B A}, not both).
+
+The effect is analogous to the python \href{http://docs.python.org/2/library/itertools.html\#itertools.combinations\_with\_replacement}{itertools}
+function of the same name:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{k+kn}{from} \PYG{n+nn}{itertools} \PYG{k+kn}{import} \PYG{n}{combinations\PYGZus{}with\PYGZus{}replacement}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{c}{\PYGZsh{} combinations\PYGZus{}with\PYGZus{}replacement(\PYGZsq{}ABCD\PYGZsq{}, 2) \PYGZhy{}\PYGZhy{}\PYGZgt{} AA AB AC AD BB BC BD CC CD DD}
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{p}{[} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a} \PYG{o+ow}{in} \PYG{n}{combinations\PYGZus{}with\PYGZus{}replacement}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{ABCD}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}\PYG{p}{]}
+\PYG{g+go}{[\PYGZsq{}AA\PYGZsq{}, \PYGZsq{}AB\PYGZsq{}, \PYGZsq{}AC\PYGZsq{}, \PYGZsq{}AD\PYGZsq{}, \PYGZsq{}BB\PYGZsq{}, \PYGZsq{}BC\PYGZsq{}, \PYGZsq{}BD\PYGZsq{}, \PYGZsq{}CC\PYGZsq{}, \PYGZsq{}CD\PYGZsq{}, \PYGZsq{}DD\PYGZsq{}]}
+\end{Verbatim}
+
+Only out of date tasks (comparing input and output files) will be run
+
+Output file names and strings in the extra parameters
+are determined from {\hyperref[decorators/combinations_with_replacement:decorators-combinations-with-replacement-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of up stream tasks, or a list of file names, after string replacement via
+{\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}}.
+
+The replacement strings require an extra level of indirection to refer to
+parsed components:
+\begin{enumerate}
+\item {}
+The first level refers to which \emph{set} in each tuple of inputs.
+
+\item {}
+The second level refers to which input file in any particular \emph{set} of inputs.
+
+\end{enumerate}
+\end{quote}
+
+\textbf{Example}:
+\begin{quote}
+
+Calculates the \textbf{@combinations\_with\_replacement} of \textbf{A,B,C,D} files
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus.combinatorics} \PYG{k+kn}{import} \PYG{o}{*}
+
+\PYG{c}{\PYGZsh{} initial file pairs}
+\PYG{n+nd}{@originate}\PYG{p}{(}\PYG{p}{[} \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.1\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{D.2\PYGZus{}start}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{(}\PYG{n}{output\PYGZus{}files}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{for} \PYG{n}{output\PYGZus{}file} \PYG{o+ow}{in} \PYG{n}{output\PYGZus{}files}\PYG{p}{:}
+ \PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{oo}\PYG{p}{:} \PYG{k}{pass}
+
+\PYG{c}{\PYGZsh{} @combinations\PYGZus{}with\PYGZus{}replacement}
+\PYG{n+nd}{@combinations\PYGZus{}with\PYGZus{}replacement}\PYG{p}{(}\PYG{n}{create\PYGZus{}initial\PYGZus{}files\PYGZus{}ABCD}\PYG{p}{,} \PYG{c}{\PYGZsh{} Input}
+ \PYG{n}{formatter}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{c}{\PYGZsh{} match input files}
+
+ \PYG{c}{\PYGZsh{} tuple of 2 at a time}
+ \PYG{l+m+mi}{2}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Output Replacement string}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}/}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][1]\PYGZcb{}\PYGZus{}vs\PYGZus{}}\PYG{l+s}{\PYGZdq{}}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][1]\PYGZcb{}.combinations\PYGZus{}with\PYGZus{}replacement}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter: path for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}path[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}
+
+ \PYG{c}{\PYGZsh{} Extra parameter}
+ \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[0][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} basename for 1st set of files, 1st file name}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{\PYGZob{}basename[1][0]\PYGZcb{}}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2rd}
+ \PYG{p}{]}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{combinations\PYGZus{}with\PYGZus{}replacement\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}parameter}\PYG{p}{,} \PYG{n}{shared\PYGZus{}path}\PYG{p}{,} \PYG{n}{basenames}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{print} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s}{\PYGZdq{}}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{basenames}\PYG{p}{)}
+
+
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} Run}
+\PYG{c}{\PYGZsh{}}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\end{Verbatim}
+
+This results in:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{g+gp}{\PYGZgt{}\PYGZgt{}\PYGZgt{} }\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
+\PYG{g+go}{A \PYGZhy{} A}
+\PYG{g+go}{A \PYGZhy{} B}
+\PYG{g+go}{A \PYGZhy{} C}
+\PYG{g+go}{A \PYGZhy{} D}
+\PYG{g+go}{B \PYGZhy{} B}
+\PYG{g+go}{B \PYGZhy{} C}
+\PYG{g+go}{B \PYGZhy{} D}
+\PYG{g+go}{C \PYGZhy{} C}
+\PYG{g+go}{C \PYGZhy{} D}
+\PYG{g+go}{D \PYGZhy{} D}
+\end{Verbatim}
+\end{quote}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}}.}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-matching-formatter}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_formatter}}] \leavevmode
+a {\hyperref[decorators/indicator_objects:decorators-formatter]{\emph{formatter}}} indicator object containing optionally
+a python \href{http://docs.python.org/library/re.html}{regular expression (re)}.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s) after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/combinations_with_replacement:decorators-combinations-with-replacement-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+Optional extra parameters are passed to the functions after string
+substitution
+
+\end{description}
+
+\end{itemize}
+\end{minipage}}
+\begin{center}\setlength{\fboxsep}{5pt}\shadowbox{\box0}\end{center}
+\setbox0\vbox{
+\begin{minipage}{0.95\linewidth}
+\textbf{Esoteric}
+
+\medskip
+
+\phantomsection\label{decorators/files_ex:decorators-files-on-the-fly}
+\index{@files (on-the-fly parameter generation)!Syntax}\index{Syntax!@files (on-the-fly parameter generation)}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{Generating parameters on the fly for @files}
+\label{decorators/files_ex:decorators-files-custom-function}\label{decorators/files_ex::doc}\label{decorators/files_ex:custom-function}\label{decorators/files_ex:generating-parameters-on-the-fly-for-files}
+
+\subsubsection{\emph{@files} (\emph{custom\_function})}
+\label{decorators/files_ex:files-custom-function}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Uses a custom function to generate sets of parameters to separate jobs which can run in parallel.
+
+The first two parameters in each set represent the input and output which are
+used to see if the job is out of date and needs to be (re-)run.
+
+By default, out of date checking uses input/output file timestamps.
+(On some file systems, timestamps have a resolution in seconds.)
+See {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate()}}} for alternatives.
+\end{quote}
+\begin{description}
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k}{def} \PYG{n+nf}{generate\PYGZus{}parameters\PYGZus{}on\PYGZus{}the\PYGZus{}fly}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input\PYGZus{}file1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{output\PYGZus{}file1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input\PYGZus{}file2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{output\PYGZus{}file2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input\PYGZus{}file3}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{output\PYGZus{}file3}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd job}
+ \PYG{p}{]}
+ \PYG{k}{for} \PYG{n}{job\PYGZus{}parameters} \PYG{o+ow}{in} \PYG{n}{parameters}\PYG{p}{:}
+ \PYG{k}{yield} \PYG{n}{job\PYGZus{}parameters}
+
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{generate\PYGZus{}parameters\PYGZus{}on\PYGZus{}the\PYGZus{}fly}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{n}{param1}\PYG{p}{,} \PYG{n}{param2}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\item[{is the equivalent of calling:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input\PYGZus{}file1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{output\PYGZus{}file1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}
+\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input\PYGZus{}file2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{output\PYGZus{}file2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{)}
+\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{input\PYGZus{}file3}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{output\PYGZus{}file3}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/files_ex:decorators-files-custom-function}\begin{quote}
+\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{custom\_function}:}] \leavevmode
+Generator function which yields each time a complete set of parameters for one job
+
+\end{description}
+
+\end{itemize}
+\begin{description}
+\item[{\textbf{Checking if jobs are up to date:}}] \leavevmode
+Strings in \code{input} and \code{output} (including in nested sequences) are interpreted as file names and
+used to check if jobs are up-to-date.
+
+See {\hyperref[decorators/files:decorators-files-check-up-to-date]{\emph{above}}} for more details
+
+\end{description}
+\end{quote}
+\phantomsection\label{decorators/check_if_uptodate:decorators-check-if-uptodate}
+\index{@check\_if\_uptodate!Syntax}\index{Syntax!@check\_if\_uptodate}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@check\_if\_uptodate}
+\label{decorators/check_if_uptodate:decorators-check-if-uptodate-dependency-checking-function}\label{decorators/check_if_uptodate:check-if-uptodate}\label{decorators/check_if_uptodate::doc}\label{decorators/check_if_uptodate:dependency-checking-function}
+
+\subsubsection{\emph{@check\_if\_uptodate} (\emph{dependency\_checking\_function})}
+\label{decorators/check_if_uptodate:check-if-uptodate-dependency-checking-function}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Checks to see if a job is up to date, and needs to be run.
+
+Usually used in conjunction with {\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel()}}}
+
+\end{description}
+
+\textbf{Example}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{k+kn}{import} \PYG{n+nn}{os}
+\PYG{k}{def} \PYG{n+nf}{check\PYGZus{}file\PYGZus{}exists}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{return} \PYG{n+nb+bp}{True}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Missing file }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{output\PYGZus{}file}
+ \PYG{k}{else}\PYG{p}{:}
+ \PYG{k}{return} \PYG{n+nb+bp}{False}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{File }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{ exists}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{output\PYGZus{}file}
+
+\PYG{n+nd}{@parallel}\PYG{p}{(}\PYG{p}{[}\PYG{p}{[}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{]}\PYG{p}{)}
+\PYG{n+nd}{@check\PYGZus{}if\PYGZus{}uptodate}\PYG{p}{(}\PYG{n}{check\PYGZus{}file\PYGZus{}exists}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+Is equivalent to:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n+nb+bp}{None}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{(}\PYG{n}{input\PYGZus{}file}\PYG{p}{,} \PYG{n}{output\PYGZus{}file}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{output\PYGZus{}file}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{create\PYGZus{}if\PYGZus{}necessary}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+Both produce the same output:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+Task = create\_if\_necessary
+ Job = [null, "a.1"] completed
+\end{Verbatim}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/check_if_uptodate:decorators-check-if-uptodate-dependency-checking-function}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{dependency\_checking\_function}:}] \leavevmode
+returns two parameters: if job needs to be run, and a message explaining why
+
+dependency\_checking\_func() needs to handle the same number of parameters as the
+task function e.g. \code{input\_file} and \code{output\_file} above.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/parallel:decorators-parallel}
+\index{@parallel!Syntax}\index{Syntax!@parallel}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@parallel}
+\label{decorators/parallel::doc}\label{decorators/parallel:parallel}\phantomsection\label{decorators/parallel:decorators-parallel-job-params}\phantomsection\label{decorators/parallel:job-params}
+
+\subsubsection{\emph{@parallel} ( {[} {[}\emph{job\_params}, ...{]}, {[}\emph{job\_params}, ...{]}...{]} \textbar{} \emph{parameter\_generating\_function})}
+\label{decorators/parallel:decorators-parallel-parameter-generating-function}\label{decorators/parallel:parallel-job-params-job-params-parameter-generating-function}\label{decorators/parallel:parameter-generating-function}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+To apply the (task) function to a set of parameters in parallel without file dependency checking.
+
+Most useful allied to {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate()}}}
+
+\end{description}
+
+\textbf{Example}:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd job}
+ \PYG{p}{]}
+\PYG{n+nd}{@parallel}\PYG{p}{(}\PYG{n}{parameters}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{parallel\PYGZus{}task}\PYG{p}{(}\PYG{n}{name}\PYG{p}{,} \PYG{n}{param1}\PYG{p}{,} \PYG{n}{param2}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{ Parallel task }\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s}{: }\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{n}{name}\PYG{p}{)}
+ \PYG{n}{sys}\PYG{o}{.}\PYG{n}{stderr}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ + }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s}{ = }\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}\PYGZbs{}}\PYG{l+s}{n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{param1}\PYG{p}{,} \PYG{n}{param2}\PYG{p}{,} \PYG{n}{param1} \PYG{o}{+} \PYG{n}{param2}\PYG{p}{)}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/parallel:decorators-parallel-job-params}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{job\_params}:}] \leavevmode
+Requires a sequence of parameters, one set for each job.
+
+Each set of parameters can be one or more items in a sequence which will be passed to
+the decorated task function iteratively (or in parallel)
+
+For example:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{[}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 3rd job}
+ \PYG{p}{]}
+\PYG{n+nd}{@parallel}\PYG{p}{(}\PYG{n}{parameters}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{parallel\PYGZus{}task}\PYG{p}{(}\PYG{n}{name}\PYG{p}{,} \PYG{n}{param1}\PYG{p}{,} \PYG{n}{param2}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\end{Verbatim}
+
+Will result in the following function calls:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{parallel\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{1}\PYG{p}{,} \PYG{l+m+mi}{2}\PYG{p}{)}
+\PYG{n}{parallel\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{3}\PYG{p}{,} \PYG{l+m+mi}{4}\PYG{p}{)}
+\PYG{n}{parallel\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{C}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{5}\PYG{p}{,} \PYG{l+m+mi}{6}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/parallel:decorators-parallel-parameter-generating-function}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{parameter\_generating\_function}}] \leavevmode\begin{enumerate}
+\item {}
+A generator yielding set of parameters (as above) in turn and on the fly
+
+\item {}
+A function returning a sequence of parameter sets, as above
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\end{minipage}}
+\begin{center}\setlength{\fboxsep}{5pt}\shadowbox{\box0}\end{center}
+\setbox0\vbox{
+\begin{minipage}{0.95\linewidth}
+\textbf{Deprecated}
+
+\medskip
+
+\phantomsection\label{decorators/files:decorators-files}
+\index{@files!Syntax}\index{Syntax!@files}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+\phantomsection\label{decorators/files:decorators-files-input}\phantomsection\label{decorators/files:input}\phantomsection\label{decorators/files:decorators-files-input1}\phantomsection\label{decorators/files:input1}\phantomsection\label{decorators/files:decorators-files-output}\phantomsection\label{decorators/files:output}\phantomsection\label{decorators/files:decorators-files-output1}\phantomsection\label{decorators/files:output1}\phantomsection\label{decorators/files:decorators-files- [...]
+
+\subsection{@files}
+\label{decorators/files:files}\label{decorators/files:decorators-files-extra-parameters1}\label{decorators/files::doc}\label{decorators/files:extra-parameters1}
+
+\subsubsection{\emph{@files} (\emph{input1}, \emph{output1}, {[}\emph{extra\_parameters1}, ...{]})}
+\label{decorators/files:files-input1-output1-extra-parameters1}
+
+\paragraph{@files for single jobs}
+\label{decorators/files:files-for-single-jobs}\begin{quote}
+\begin{description}
+\item[{\textbf{Purpose:}}] \leavevmode
+Provides parameters to run a task.
+
+The first two parameters in each set represent the input and output which are
+used to see if the job is out of date and needs to be (re-)run.
+
+By default, out of date checking uses input/output file timestamps.
+(On some file systems, timestamps have a resolution in seconds.)
+See {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate()}}} for alternatives.
+
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{transform\PYGZus{}files}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{text}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{transform\PYGZus{}files}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\item[{If \code{a.2} is missing or was created before \code{a.1}, then the following will be called:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{transform\PYGZus{}files}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/files:decorators-files-input1}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{input}}] \leavevmode
+Input file names
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files:decorators-files-output1}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output}}] \leavevmode
+Output file names
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files:decorators-files-extra-parameters1}\begin{quote}
+\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+optional \code{extra\_parameters} are passed verbatim to each job.
+
+\end{description}
+
+\end{itemize}
+\begin{description}
+\item[{\textbf{Checking if jobs are up to date:}}] \leavevmode
+Strings in \code{input} and \code{output} (including in nested sequences) are interpreted as file names and
+used to check if jobs are up-to-date.
+
+See {\hyperref[decorators/files:decorators-files-check-up-to-date]{\emph{above}}} for more details
+
+\end{description}
+\end{quote}
+
+
+\subsubsection{\emph{@files} ( \emph{((} \emph{input}, \emph{output}, {[}\emph{extra\_parameters},...{]} \emph{), (...), ...)} )}
+\label{decorators/files:files-input-output-extra-parameters}
+
+\paragraph{@files in parallel}
+\label{decorators/files:files-in-parallel}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+Passes each set of parameters to separate jobs which can run in parallel
+
+The first two parameters in each set represent the input and output which are
+used to see if the job is out of date and needs to be (re-)run.
+
+By default, out of date checking uses input/output file timestamps.
+(On some file systems, timestamps have a resolution in seconds.)
+See {\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate()}}} for alternatives.
+\end{quote}
+\begin{description}
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{parameters} \PYG{o}{=} \PYG{p}{[}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 1st job}
+ \PYG{p}{[} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,} \PYG{c}{\PYGZsh{} 2nd job}
+ \PYG{p}{]}
+
+\PYG{n+nd}{@files}\PYG{p}{(}\PYG{n}{parameters}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{text}\PYG{p}{)}\PYG{p}{:}
+ \PYG{k}{pass}
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\item[{is the equivalent of calling:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{A file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{n}{parallel\PYGZus{}io\PYGZus{}task}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{B file}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/files:decorators-files-input}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{input}}] \leavevmode
+Input file names
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files:decorators-files-output}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output}}] \leavevmode
+Output file names
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files:decorators-files-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{extra\_parameters}}] \leavevmode
+optional \code{extra\_parameters} are passed verbatim to each job.
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files:decorators-files-check-up-to-date}\begin{quote}
+\begin{description}
+\item[{\textbf{Checking if jobs are up to date:}}] \leavevmode\begin{enumerate}
+\item {}
+Strings in \code{input} and \code{output} (including in nested sequences) are interpreted as file names and
+used to check if jobs are up-to-date.
+
+\item {}
+In the absence of input files (e.g. \code{input == None}), the job will run if any output file is missing.
+
+\item {}
+In the absence of output files (e.g. \code{output == None}), the job will always run.
+
+\item {}
+If any of the output files is missing, the job will run.
+
+\item {}
+If any of the input files is missing when the job is run, a
+\code{MissingInputFileError} exception will be raised.
+
+\end{enumerate}
+
+\end{description}
+\end{quote}
+\phantomsection\label{decorators/files_re:decorators-files-re}
+\index{@files\_re!Syntax}\index{Syntax!@files\_re}
+
+\strong{See also:}
+
+\begin{itemize}
+\item {}
+{\hyperref[decorators/decorators:decorators]{\emph{Decorators}}} for more decorators
+
+\end{itemize}
+
+
+
+
+\subsection{@files\_re}
+\label{decorators/files_re:files-re}\label{decorators/files_re::doc}\phantomsection\label{decorators/files_re:decorators-files-re-tasks-or-file-names}\phantomsection\label{decorators/files_re:tasks-or-file-names}\phantomsection\label{decorators/files_re:decorators-files-re-extra-parameters}\phantomsection\label{decorators/files_re:extra-parameters}\phantomsection\label{decorators/files_re:decorators-files-re-output-pattern}\phantomsection\label{decorators/files_re:output-pattern}\phantom [...]
+
+\subsubsection{\emph{@files\_re} (\emph{tasks\_or\_file\_names}, \emph{matching\_regex}, {[}\emph{input\_pattern}{]}, \emph{output\_pattern}, {[}\emph{extra\_parameters},...{]})}
+\label{decorators/files_re:files-re-tasks-or-file-names-matching-regex-input-pattern-output-pattern-extra-parameters}\label{decorators/files_re:matching-regex}\label{decorators/files_re:decorators-files-re-matching-regex}
+
+\paragraph{Legacy design now deprecated. We suggest using \emph{@transform()} instead}
+\label{decorators/files_re:legacy-design-now-deprecated-we-suggest-using-transform-instead}\begin{quote}
+
+\textbf{Purpose:}
+\begin{quote}
+
+All singing, all dancing decorator which can do everything that {\hyperref[decorators/merge:decorators-merge]{\emph{@merge()}}} and
+{\hyperref[decorators/transform:decorators-transform]{\emph{@transform()}}} can do.
+
+Applies the task function to transform data from input to output files.
+
+Output file names are determined from {\hyperref[decorators/files_re:decorators-files-re-tasks-or-file-names]{\emph{tasks\_or\_file\_names}}}, i.e. from the output
+of specified tasks, or a list of file names, using regular expression pattern substitutions.
+
+Only out of date tasks (comparing input and output files) will be run.
+\end{quote}
+\begin{description}
+\item[{\textbf{Example}:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} convert all files ending in \PYGZdq{}.1\PYGZdq{} into files ending in \PYGZdq{}.2\PYGZdq{}}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@files\PYGZus{}re}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{(.*).1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{transform\PYGZus{}func}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{converted}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{task\PYGZus{}re}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\item[{If the following files are present \code{a.1}, \code{b.1}, \code{c.1}, this will result in the following function calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{transform\PYGZus{}func}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{transform\PYGZus{}func}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\PYG{n}{transform\PYGZus{}func}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\textbf{Parameters:}
+\end{quote}
+\phantomsection\label{decorators/files_re:decorators-files-re-tasks-or-file-names}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{tasks\_or\_file\_names}}] \leavevmode
+can be a:
+\begin{enumerate}
+\item {} \begin{description}
+\item[{Task / list of tasks (as in the example above).}] \leavevmode
+File names are taken from the output of the specified task(s)
+
+\end{description}
+
+\item {} \begin{description}
+\item[{(Nested) list of file name strings.}] \leavevmode\begin{description}
+\item[{File names containing \code{*{[}{]}?} will be expanded as a \href{http://docs.python.org/library/glob.html}{\emph{glob}} .}] \leavevmode
+E.g.:\code{"a.*" =\textgreater{} "a.1", "a.2"}
+
+\end{description}
+
+\end{description}
+
+\end{enumerate}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files_re:decorators-files-re-matching-regex}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{matching\_regex}}] \leavevmode
+a python regular expression string.
+
+\begin{DUlineblock}{0em}
+\item[] See python \href{http://docs.python.org/library/re.html}{regular expression (re)} documentation for details of regular expression syntax
+\item[] Each output file name is created using regular expression substitution with {\hyperref[decorators/files_re:decorators-files-re-output-pattern]{\emph{output\_pattern}}}
+\end{DUlineblock}
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files_re:decorators-files-re-input-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{input\_pattern}}] \leavevmode
+Optionally specifies the resulting input file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files_re:decorators-files-re-output-pattern}\begin{itemize}
+\item {} \begin{description}
+\item[{\emph{output\_pattern}}] \leavevmode
+Specifies the resulting output file name(s).
+
+\end{description}
+
+\end{itemize}
+\phantomsection\label{decorators/files_re:decorators-files-re-extra-parameters}\begin{itemize}
+\item {} \begin{description}
+\item[{{[}\emph{extra\_parameters, ...}{]}}] \leavevmode
+Any extra parameters are passed to the task function.
+
+\begin{DUlineblock}{0em}
+\item[] Regular expression substitution is first applied to (even nested) string parameters.
+\item[] Other data types are passed verbatim.
+\end{DUlineblock}
+\begin{description}
+\item[{For example:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{ruffus} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{c}{\PYGZsh{}}
+\PYG{c}{\PYGZsh{} convert all files ending in \PYGZdq{}.1\PYGZdq{} into files ending in \PYGZdq{}.2\PYGZdq{}}
+\PYG{c}{\PYGZsh{}}
+\PYG{n+nd}{@files\PYGZus{}re}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{*.1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{(.*).1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1.2}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{r\PYGZsq{}}\PYG{l+s}{\PYGZbs{}}\PYG{l+s}{1}\PYG{l+s}{\PYGZsq{}}\PYG{p}{,} \PYG{l+m+mi}{55}\PYG{p}{]}\PYG{p}{,} \PYG{l+m+mi}{17}\PYG{p}{)}
+\PYG{k}{def} \PYG{n+nf}{transform\PYGZus{}func}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{,} \PYG{n}{outfile}\PYG{p}{,} \PYG{n}{extras}\PYG{p}{,} \PYG{n}{extra3}\PYG{p}{)}\PYG{p}{:}
+ \PYG{n}{extra1}\PYG{p}{,} \PYG{n}{extra2} \PYG{o}{=} \PYG{n}{extras}
+ \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{outfile}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{w}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{(}\PYG{p}{)} \PYG{o}{+} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{converted}\PYG{l+s+si}{\PYGZpc{}s}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}} \PYG{o}{\PYGZpc{}} \PYG{p}{(}\PYG{n}{extra1}\PYG{p}{,} \PYG{n}{extra2}\PYG{p}{,} \PYG{n}{extra3}\PYG [...]
+
+\PYG{n}{pipeline\PYGZus{}run}\PYG{p}{(}\PYG{p}{[}\PYG{n}{transform\PYGZus{}func}\PYG{p}{]}\PYG{p}{)}
+\end{Verbatim}
+
+\item[{If the following files are present \code{a.1}, \code{b.1}, \code{c.1}, this will result in the following function calls:}] \leavevmode
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{n}{transform\PYGZus{}func}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{a}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{55}\PYG{p}{]}\PYG{p}{,} \PYG{l+m+mi}{17}\PYG{p}{)}
+\PYG{n}{transform\PYGZus{}func}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{b}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{55}\PYG{p}{]}\PYG{p}{,} \PYG{l+m+mi}{17}\PYG{p}{)}
+\PYG{n}{transform\PYGZus{}func}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.1}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c.2}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{c}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{l+m+mi}{55}\PYG{p}{]}\PYG{p}{,} \PYG{l+m+mi}{17}\PYG{p}{)}
+\end{Verbatim}
+
+\end{description}
+
+\end{description}
+
+\end{itemize}
+\end{minipage}}
+\begin{center}\setlength{\fboxsep}{5pt}\shadowbox{\box0}\end{center}
+
+
+\section{Modules:}
+\label{contents:modules}
+
+\subsection{ruffus.Task}
+\label{task:ruffus-task}\label{task:glob}\label{task::doc}
+
+\subsubsection{Decorators}
+\label{task:decorators}\begin{quote}
+
+Basic Task decorators are:
+\begin{quote}
+
+{\hyperref[decorators/follows:decorators-follows]{\emph{@follows()}}}
+
+and
+
+{\hyperref[decorators/files:decorators-files]{\emph{@files()}}}
+\end{quote}
+
+Task decorators include:
+\begin{quote}
+
+{\hyperref[decorators/files:decorators-files]{\emph{@split()}}}
+
+{\hyperref[decorators/files:decorators-files]{\emph{@transform()}}}
+
+{\hyperref[decorators/files:decorators-files]{\emph{@merge()}}}
+
+{\hyperref[decorators/posttask:decorators-posttask]{\emph{@posttask()}}}
+\end{quote}
+
+More advanced users may require:
+\begin{quote}
+
+{\hyperref[decorators/transform_ex:decorators-transform-ex]{\emph{@transform()}}}
+
+{\hyperref[decorators/collate:decorators-collate]{\emph{@collate()}}}
+
+{\hyperref[decorators/parallel:decorators-parallel]{\emph{@parallel()}}}
+
+{\hyperref[decorators/check_if_uptodate:decorators-check-if-uptodate]{\emph{@check\_if\_uptodate()}}}
+
+{\hyperref[decorators/files_re:decorators-files-re]{\emph{@files\_re()}}}
+\end{quote}
+\end{quote}
+
+
+\subsubsection{Pipeline functions}
+\label{task:pipeline-functions}
+
+\paragraph{pipeline\_run}
+\label{task:pipeline-run}\index{pipeline\_run() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.pipeline_run}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{pipeline\_run}}{\emph{target\_tasks}, \emph{forcedtorun\_tasks=}\optional{}, \emph{multiprocess=1}, \emph{logger=stderr\_logger}, \emph{gnu\_make\_maximal\_rebuild\_mode=True}}{}
+Run pipelines.
+\begin{quote}\begin{description}
+\item[{Parameters}] \leavevmode\begin{itemize}
+\item {}
+\textbf{target\_tasks} -- targets task functions which will be run if they are out-of-date
+
+\item {}
+\textbf{forcedtorun\_tasks} -- task functions which will be run whether or not they are out-of-date
+
+\item {}
+\textbf{multiprocess} -- The number of concurrent jobs running on different processes.
+
+\item {}
+\textbf{multithread} -- The number of concurrent jobs running as different threads. If \textgreater{} 1, ruffus will use multithreading \emph{instead of} multiprocessing (and ignore the multiprocess parameter). Using multi threading is particularly useful to manage high performance clusters which otherwise are prone to ``processor storms'' when large number of cores finish jobs at the same time. (Thanks Andreas Heger)
+
+\item {}
+\textbf{logger} (\href{http://docs.python.org/library/logging.html}{logging} objects) -- Where progress will be logged. Defaults to stderr output.
+
+\item {}
+\textbf{verbose} -- level 0 : nothing
+level 1 : Out-of-date Task names
+level 2 : All Tasks (including any task function docstrings)
+level 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+level 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+level 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+level 6 : All jobs in All Tasks whether out of date or not
+level 10: logs messages useful only for debugging ruffus pipeline code
+
+\item {}
+\textbf{touch\_files\_only} -- Create or update input/output files only to simulate running the pipeline. Do not run jobs. If set to CHECKSUM\_REGENERATE, will regenerate the checksum history file to reflect the existing i/o files on disk.
+
+\item {}
+\textbf{exceptions\_terminate\_immediately} -- Exceptions cause immediate termination
+rather than waiting for N jobs to finish where N = multiprocess
+
+\item {}
+\textbf{log\_exceptions} -- Print exceptions to the logger as soon as they occur.
+
+\item {}
+\textbf{checksum\_level} -- Several options for checking up-to-dateness are available: Default is level 1.
+level 0 : Use only file timestamps
+level 1 : above, plus timestamp of successful job completion
+level 2 : above, plus a checksum of the pipeline function body
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\item {}
+\textbf{one\_second\_per\_job} -- To work around poor file timepstamp resolution for some file systems. Defaults to True if checksum\_level is 0 forcing Tasks to take a minimum of 1 second to complete.
+
+\item {}
+\textbf{runtime\_data} -- Experimental feature for passing data to tasks at run time
+
+\item {}
+\textbf{gnu\_make\_maximal\_rebuild\_mode} -- Defaults to re-running \emph{all} out-of-date tasks. Runs minimal
+set to build targets if set to \code{True}. Use with caution.
+
+\item {}
+\textbf{history\_file} -- The database file which stores checksums and file timestamps for input/output files.
+
+\item {}
+\textbf{verbose\_abbreviated\_path} -- whether input and output paths are abbreviated.
+level 0: The full (expanded, abspath) input or output path
+level \textgreater{} 1: The number of subdirectories to include. Abbreviated paths are prefixed with \code{{[},,,{]}/}
+level \textless{} 0: level \textless{} 0: Input / Output parameters are truncated to \code{MMM} letters where \code{verbose\_abbreviated\_path ==-MMM}. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by \code{\textless{}???\textgreater{}}
+
+\end{itemize}
+
+\end{description}\end{quote}
+
+\end{fulllineitems}
+
+
+
+\paragraph{pipeline\_printout}
+\label{task:pipeline-printout}\index{pipeline\_printout() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.pipeline_printout}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{pipeline\_printout}}{\emph{output\_stream=None}, \emph{target\_tasks=}\optional{}, \emph{forcedtorun\_tasks=}\optional{}, \emph{verbose=None}, \emph{indent=4}, \emph{gnu\_make\_maximal\_rebuild\_mode=True}, \emph{wrap\_width=100}, \emph{runtime\_data=None}, \emph{checksum\_level=None}, \emph{history\_file=None}, \emph{verbose\_abbreviated\_path=None}}{}
+Printouts the parts of the pipeline which will be run
+
+Because the parameters of some jobs depend on the results of previous tasks, this function
+produces only the current snap-shot of task jobs. In particular, tasks which generate
+variable number of inputs into following tasks will not produce the full range of jobs.
+\begin{description}
+\item[{::}] \leavevmode
+verbose = 0 : Nothing
+verbose = 1 : Out-of-date Task names
+verbose = 2 : All Tasks (including any task function docstrings)
+verbose = 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+verbose = 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+verbose = 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+verbose = 6 : All jobs in All Tasks whether out of date or not
+
+\end{description}
+\begin{quote}\begin{description}
+\item[{Parameters}] \leavevmode\begin{itemize}
+\item {}
+\textbf{output\_stream} (file-like object with \code{write()} function) -- where to print to
+
+\item {}
+\textbf{target\_tasks} -- targets task functions which will be run if they are out-of-date
+
+\item {}
+\textbf{forcedtorun\_tasks} -- task functions which will be run whether or not they are out-of-date
+
+\item {}
+\textbf{verbose} -- level 0 : nothing
+level 1 : Out-of-date Task names
+level 2 : All Tasks (including any task function docstrings)
+level 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+level 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+level 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+level 6 : All jobs in All Tasks whether out of date or not
+level 10: logs messages useful only for debugging ruffus pipeline code
+
+\item {}
+\textbf{indent} -- How much indentation for pretty format.
+
+\item {}
+\textbf{gnu\_make\_maximal\_rebuild\_mode} -- Defaults to re-running \emph{all} out-of-date tasks. Runs minimal
+set to build targets if set to \code{True}. Use with caution.
+
+\item {}
+\textbf{wrap\_width} -- The maximum length of each line
+
+\item {}
+\textbf{runtime\_data} -- Experimental feature for passing data to tasks at run time
+
+\item {}
+\textbf{checksum\_level} -- Several options for checking up-to-dateness are available: Default is level 1.
+level 0 : Use only file timestamps
+level 1 : As above, plus timestamp of successful job completion
+level 2 : As above, plus a checksum of the pipeline function body
+level 3 : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\item {}
+\textbf{history\_file} -- The database file which stores checksums and file timestamps for input/output files.
+
+\item {}
+\textbf{verbose\_abbreviated\_path} -- whether input and output paths are abbreviated.
+level 0: The full (expanded, abspath) input or output path
+level \textgreater{} 1: The number of subdirectories to include. Abbreviated paths are prefixed with \code{{[},,,{]}/}
+level \textless{} 0: level \textless{} 0: Input / Output parameters are truncated to \code{MMM} letters where \code{verbose\_abbreviated\_path ==-MMM}. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by \code{\textless{}???\textgreater{}}
+
+\end{itemize}
+
+\end{description}\end{quote}
+
+\end{fulllineitems}
+
+
+
+\paragraph{pipeline\_printout\_graph}
+\label{task:pipeline-printout-graph}\index{pipeline\_printout\_graph() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.pipeline_printout_graph}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{pipeline\_printout\_graph}}{\emph{stream}, \emph{output\_format=None}, \emph{target\_tasks=}\optional{}, \emph{forcedtorun\_tasks=}\optional{}, \emph{draw\_vertically=True}, \emph{ignore\_upstream\_of\_target=False}, \emph{skip\_uptodate\_tasks=False}, \emph{gnu\_make\_maximal\_rebuild\_mode=True}, \emph{test\_all\_task\_for\_update=True}, \emph{no\_key\_legend=False}, \emph{mi [...]
+print out pipeline dependencies in various formats
+\begin{quote}\begin{description}
+\item[{Parameters}] \leavevmode\begin{itemize}
+\item {}
+\textbf{stream} (file-like object with \code{write()} function) -- where to print to
+
+\item {}
+\textbf{output\_format} -- {[}''dot'', ``jpg'', ``svg'', ``ps'', ``png''{]}. All but the first depends on the \href{http://www.graphviz.org}{dot} program.
+
+\item {}
+\textbf{target\_tasks} -- targets task functions which will be run if they are out-of-date.
+
+\item {}
+\textbf{forcedtorun\_tasks} -- task functions which will be run whether or not they are out-of-date.
+
+\item {}
+\textbf{draw\_vertically} -- Top to bottom instead of left to right.
+
+\item {}
+\textbf{ignore\_upstream\_of\_target} -- Don't draw upstream tasks of targets.
+
+\item {}
+\textbf{skip\_uptodate\_tasks} -- Don't draw up-to-date tasks if possible.
+
+\item {}
+\textbf{gnu\_make\_maximal\_rebuild\_mode} -- Defaults to re-running \emph{all} out-of-date tasks. Runs minimal
+set to build targets if set to \code{True}. Use with caution.
+
+\item {}
+\textbf{test\_all\_task\_for\_update} -- Ask all task functions if they are up-to-date.
+
+\item {}
+\textbf{no\_key\_legend} -- Don't draw key/legend for graph.
+
+\item {}
+\textbf{minimal\_key\_legend} -- Only add entries to the legend for task types which appear
+
+\item {}
+\textbf{user\_colour\_scheme} -- Dictionary specifying colour scheme for flowchart
+
+\item {}
+\textbf{pipeline\_name} -- Pipeline Title
+
+\item {}
+\textbf{size} -- tuple of x and y dimensions
+
+\item {}
+\textbf{dpi} -- print resolution
+
+\item {}
+\textbf{runtime\_data} -- Experimental feature for passing data to tasks at run time
+
+\item {}
+\textbf{history\_file} -- The database file which stores checksums and file timestamps for input/output files.
+
+\item {}
+\textbf{checksum\_level} -- Several options for checking up-to-dateness are available: Default is level 1.
+level 0 : Use only file timestamps
+level 1 : above, plus timestamp of successful job completion
+level 2 : above, plus a checksum of the pipeline function body
+level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+\end{itemize}
+
+\end{description}\end{quote}
+
+\end{fulllineitems}
+
+
+
+\subsubsection{Logging}
+\label{task:id1}\index{t\_black\_hole\_logger (class in ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.t_black_hole_logger}\pysigline{\strong{class }\code{ruffus.task.}\bfcode{t\_black\_hole\_logger}}
+Does nothing!
+
+\end{fulllineitems}
+
+\index{t\_stderr\_logger (class in ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.t_stderr_logger}\pysigline{\strong{class }\code{ruffus.task.}\bfcode{t\_stderr\_logger}}
+Everything to stderr
+
+\end{fulllineitems}
+
+
+
+\subsubsection{Implementation:}
+\label{task:implementation}
+
+\paragraph{Parameter factories:}
+\label{task:parameter-factories}\index{merge\_param\_factory() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.merge_param_factory}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{merge\_param\_factory}}{\emph{input\_files\_task\_globs}, \emph{output\_param}, \emph{*extra\_params}}{}
+Factory for task\_merge
+
+\end{fulllineitems}
+
+\index{collate\_param\_factory() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.collate_param_factory}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{collate\_param\_factory}}{\emph{input\_files\_task\_globs}, \emph{flatten\_input}, \emph{file\_names\_transform}, \emph{extra\_input\_files\_task\_globs}, \emph{replace\_inputs}, \emph{output\_pattern}, \emph{*extra\_specs}}{}
+Factory for task\_collate
+
+Looks exactly like @transform except that all {[}input{]} which lead to the same {[}output / extra{]} are combined together
+
+\end{fulllineitems}
+
+\index{transform\_param\_factory() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.transform_param_factory}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{transform\_param\_factory}}{\emph{input\_files\_task\_globs}, \emph{flatten\_input}, \emph{file\_names\_transform}, \emph{extra\_input\_files\_task\_globs}, \emph{replace\_inputs}, \emph{output\_pattern}, \emph{*extra\_specs}}{}
+Factory for task\_transform
+
+\end{fulllineitems}
+
+\index{files\_param\_factory() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.files_param_factory}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{files\_param\_factory}}{\emph{input\_files\_task\_globs}, \emph{flatten\_input}, \emph{do\_not\_expand\_single\_job\_tasks}, \emph{output\_extras}}{}~\begin{description}
+\item[{Factory for functions which}] \leavevmode
+yield tuples of inputs, outputs / extras
+
+\end{description}
+
+..Note:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+1. Each job requires input/output file names
+2. Input/output file names can be a string, an arbitrarily nested sequence
+3. Non-string types are ignored
+3. Either Input or output file name must contain at least one string
+\end{Verbatim}
+
+\end{fulllineitems}
+
+\index{args\_param\_factory() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.args_param_factory}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{args\_param\_factory}}{\emph{orig\_args}}{}~\begin{description}
+\item[{Factory for functions which}] \leavevmode
+yield tuples of inputs, outputs / extras
+
+\end{description}
+
+..Note:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+1. Each job requires input/output file names
+2. Input/output file names can be a string, an arbitrarily nested sequence
+3. Non-string types are ignored
+3. Either Input or output file name must contain at least one string
+\end{Verbatim}
+
+\end{fulllineitems}
+
+\index{split\_param\_factory() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.split_param_factory}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{split\_param\_factory}}{\emph{input\_files\_task\_globs}, \emph{output\_files\_task\_globs}, \emph{*extra\_params}}{}
+Factory for task\_split
+
+\end{fulllineitems}
+
+
+
+\paragraph{Wrappers around jobs:}
+\label{task:wrappers-around-jobs}\index{job\_wrapper\_generic() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.job_wrapper_generic}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{job\_wrapper\_generic}}{\emph{param}, \emph{user\_defined\_work\_func}, \emph{register\_cleanup}, \emph{touch\_files\_only}}{}
+run func
+
+\end{fulllineitems}
+
+\index{job\_wrapper\_io\_files() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.job_wrapper_io_files}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{job\_wrapper\_io\_files}}{\emph{param}, \emph{user\_defined\_work\_func}, \emph{register\_cleanup}, \emph{touch\_files\_only}, \emph{output\_files\_only=False}}{}
+run func on any i/o if not up to date
+
+\end{fulllineitems}
+
+\index{job\_wrapper\_mkdir() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.job_wrapper_mkdir}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{job\_wrapper\_mkdir}}{\emph{param}, \emph{user\_defined\_work\_func}, \emph{register\_cleanup}, \emph{touch\_files\_only}}{}
+make directories if not exists
+
+\end{fulllineitems}
+
+
+
+\paragraph{Checking if job is update:}
+\label{task:checking-if-job-is-update}\index{needs\_update\_check\_modify\_time() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.needs_update_check_modify_time}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{needs\_update\_check\_modify\_time}}{\emph{*params}, \emph{**kwargs}}{}
+Given input and output files, see if all exist and whether output files are later than input files
+Each can be
+\begin{enumerate}
+\item {}
+string: assumed to be a filename ``file1''
+
+\item {}
+any other type
+
+\item {}
+arbitrary nested sequence of (1) and (2)
+
+\end{enumerate}
+
+\end{fulllineitems}
+
+\index{needs\_update\_check\_directory\_missing() (in module ruffus.task)}
+
+\begin{fulllineitems}
+\phantomsection\label{task:ruffus.task.needs_update_check_directory_missing}\pysiglinewithargsret{\code{ruffus.task.}\bfcode{needs\_update\_check\_directory\_missing}}{\emph{*params}, \emph{**kwargs}}{}~\begin{description}
+\item[{Called per directory:}] \leavevmode
+Does it exist?
+Is it an ordinary file not a directory? (throw exception
+
+\end{description}
+
+\end{fulllineitems}
+
+
+
+\subsubsection{Exceptions and Errors}
+\label{task:exceptions-and-errors}
+
+\subsection{ruffus.proxy\_logger}
+\label{proxy_logger:glob}\label{proxy_logger::doc}\label{proxy_logger:ruffus-proxy-logger}\phantomsection\label{proxy_logger:proxy-logger}\phantomsection\label{proxy_logger:module-ruffus.proxy_logger}\phantomsection\label{proxy_logger:proxy-logger}\index{ruffus.proxy\_logger (module)}
+
+\subsubsection{Create proxy for logging for use with multiprocessing}
+\label{proxy_logger:create-proxy-for-logging-for-use-with-multiprocessing}
+These can be safely sent (marshalled) across process boundaries
+
+
+\paragraph{Example 1}
+\label{proxy_logger:example-1}\begin{quote}
+
+Set up logger from config file:
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{proxy\PYGZus{}logger} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{args}\PYG{o}{=}\PYG{p}{\PYGZob{}}\PYG{p}{\PYGZcb{}}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{config\PYGZus{}file}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/my/config/file}\PYG{l+s}{\PYGZdq{}}
+
+\PYG{p}{(}\PYG{n}{logger\PYGZus{}proxy}\PYG{p}{,}
+ \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)} \PYG{o}{=} \PYG{n}{make\PYGZus{}shared\PYGZus{}logger\PYGZus{}and\PYGZus{}proxy} \PYG{p}{(}\PYG{n}{setup\PYGZus{}std\PYGZus{}shared\PYGZus{}logger}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}logger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{args}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\paragraph{Example 2}
+\label{proxy_logger:example-2}\begin{quote}
+
+Log to file \code{"/my/lg.log"} in the specified format (Time / Log name / Event type / Message).
+
+Delay file creation until first log.
+
+Only log \code{Debug} messages
+\begin{quote}
+
+Other alternatives for the logging threshold (\code{args{[}"level"{]}}) include
+\begin{itemize}
+\item {}
+\code{logging.DEBUG}
+
+\item {}
+\code{logging.INFO}
+
+\item {}
+\code{logging.WARNING}
+
+\item {}
+\code{logging.ERROR}
+
+\item {}
+\code{logging.CRITICAL}
+
+\end{itemize}
+\end{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{proxy\PYGZus{}logger} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{args}\PYG{o}{=}\PYG{p}{\PYGZob{}}\PYG{p}{\PYGZcb{}}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{file\PYGZus{}name}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/my/lg.log}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{formatter}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}(asctime)s}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s+si}{\PYGZpc{}(name)s}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s+si}{\PYGZpc{}(levelname)6s}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s+si}{\PYGZpc{}(message)s}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{delay}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{level}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{n}{logging}\PYG{o}{.}\PYG{n}{DEBUG}
+
+\PYG{p}{(}\PYG{n}{logger\PYGZus{}proxy}\PYG{p}{,}
+ \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)} \PYG{o}{=} \PYG{n}{make\PYGZus{}shared\PYGZus{}logger\PYGZus{}and\PYGZus{}proxy} \PYG{p}{(}\PYG{n}{setup\PYGZus{}std\PYGZus{}shared\PYGZus{}logger}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}logger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{args}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\paragraph{Example 3}
+\label{proxy_logger:example-3}\begin{quote}
+
+Rotate log files every 20 Kb, with up to 10 backups.
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{k+kn}{from} \PYG{n+nn}{proxy\PYGZus{}logger} \PYG{k+kn}{import} \PYG{o}{*}
+\PYG{n}{args}\PYG{o}{=}\PYG{p}{\PYGZob{}}\PYG{p}{\PYGZcb{}}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{file\PYGZus{}name}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{/my/lg.log}\PYG{l+s}{\PYGZdq{}}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{rotating}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]} \PYG{o}{=} \PYG{n+nb+bp}{True}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{maxBytes}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{o}{=}\PYG{l+m+mi}{20000}
+\PYG{n}{args}\PYG{p}{[}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{backupCount}\PYG{l+s}{\PYGZdq{}}\PYG{p}{]}\PYG{o}{=}\PYG{l+m+mi}{10}
+\PYG{p}{(}\PYG{n}{logger\PYGZus{}proxy}\PYG{p}{,}
+ \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)} \PYG{o}{=} \PYG{n}{make\PYGZus{}shared\PYGZus{}logger\PYGZus{}and\PYGZus{}proxy} \PYG{p}{(}\PYG{n}{setup\PYGZus{}std\PYGZus{}shared\PYGZus{}logger}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}logger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{args}\PYG{p}{)}
+\end{Verbatim}
+\end{quote}
+
+
+\paragraph{To use:}
+\label{proxy_logger:to-use}\begin{quote}
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{p}{(}\PYG{n}{logger\PYGZus{}proxy}\PYG{p}{,}
+ \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{)} \PYG{o}{=} \PYG{n}{make\PYGZus{}shared\PYGZus{}logger\PYGZus{}and\PYGZus{}proxy} \PYG{p}{(}\PYG{n}{setup\PYGZus{}std\PYGZus{}shared\PYGZus{}logger}\PYG{p}{,}
+ \PYG{l+s}{\PYGZdq{}}\PYG{l+s}{my\PYGZus{}logger}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,} \PYG{n}{args}\PYG{p}{)}
+
+\PYG{k}{with} \PYG{n}{logging\PYGZus{}mutex}\PYG{p}{:}
+ \PYG{n}{my\PYGZus{}log}\PYG{o}{.}\PYG{n}{debug}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{This is a debug message}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+ \PYG{n}{my\PYGZus{}log}\PYG{o}{.}\PYG{n}{info}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{This is an info message}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+ \PYG{n}{my\PYGZus{}log}\PYG{o}{.}\PYG{n}{warning}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{This is a warning message}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+ \PYG{n}{my\PYGZus{}log}\PYG{o}{.}\PYG{n}{error}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{This is an error message}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+ \PYG{n}{my\PYGZus{}log}\PYG{o}{.}\PYG{n}{critical}\PYG{p}{(}\PYG{l+s}{\PYGZsq{}}\PYG{l+s}{This is a critical error message}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+ \PYG{n}{my\PYGZus{}log}\PYG{o}{.}\PYG{n}{log}\PYG{p}{(}\PYG{n}{logging}\PYG{o}{.}\PYG{n}{DEBUG}\PYG{p}{,} \PYG{l+s}{\PYGZsq{}}\PYG{l+s}{This is a debug message}\PYG{l+s}{\PYGZsq{}}\PYG{p}{)}
+\end{Verbatim}
+
+Note that the logging function \code{exception()} is not included because python
+stack trace information is not well-marshalled
+(\href{http://docs.python.org/library/pickle.html}{pickle}d) across processes.
+\end{quote}
+
+
+\subsubsection{Proxies for a log:}
+\label{proxy_logger:proxies-for-a-log}\index{make\_shared\_logger\_and\_proxy() (in module ruffus.proxy\_logger)}
+
+\begin{fulllineitems}
+\phantomsection\label{proxy_logger:ruffus.proxy_logger.make_shared_logger_and_proxy}\pysiglinewithargsret{\code{ruffus.proxy\_logger.}\bfcode{make\_shared\_logger\_and\_proxy}}{\emph{logger\_factory}, \emph{logger\_name}, \emph{args}}{}
+Make a \href{http://docs.python.org/library/logging.html}{logging} object
+called ``\code{logger\_name}'' by calling \code{logger\_factory}(\code{args})
+
+This function will return a proxy to the shared logger which can be copied to jobs
+in other processes, as well as a mutex which can be used to prevent simultaneous logging
+from happening.
+\begin{quote}\begin{description}
+\item[{Parameters}] \leavevmode\begin{itemize}
+\item {}
+\textbf{logger\_factory} --
+functions which creates and returns an object with the
+\href{http://docs.python.org/library/logging.html}{logging} interface.
+\code{setup\_std\_shared\_logger()} is one example of a logger factory.
+
+
+\item {}
+\textbf{logger\_name} -- name of log
+
+\item {}
+\textbf{args} -- parameters passed (as a single argument) to \code{logger\_factory}
+
+\end{itemize}
+
+\item[{Returns}] \leavevmode
+a proxy to the shared logger which can be copied to jobs in other processes
+
+\item[{Returns}] \leavevmode
+a mutex which can be used to prevent simultaneous logging from happening
+
+\end{description}\end{quote}
+
+\end{fulllineitems}
+
+
+
+\subsubsection{Create a logging object}
+\label{proxy_logger:create-a-logging-object}\index{setup\_std\_shared\_logger() (in module ruffus.proxy\_logger)}
+
+\begin{fulllineitems}
+\phantomsection\label{proxy_logger:ruffus.proxy_logger.setup_std_shared_logger}\pysiglinewithargsret{\code{ruffus.proxy\_logger.}\bfcode{setup\_std\_shared\_logger}}{\emph{logger\_name}, \emph{args}}{}
+This function is a simple around wrapper around the python
+\href{http://docs.python.org/library/logging.html}{logging} module.
+
+This \emph{logger\_factory} example creates logging objects which can
+then be managed by proxy via \code{ruffus.proxy\_logger.make\_shared\_logger\_and\_proxy()}
+
+This can be:
+\begin{itemize}
+\item {}
+a \href{http://docs.python.org/library/logging.html\#filehandler}{disk log file}
+
+\item {}
+a automatically backed-up \href{http://docs.python.org/library/logging.html\#rotatingfilehandler}{(rotating) log}.
+
+\item {}
+any log specified in a \href{http://docs.python.org/library/logging.html\#configuration-file-format}{configuration file}
+
+\end{itemize}
+
+These are specified in the \code{args} dictionary forwarded by \code{make\_shared\_logger\_and\_proxy()}
+\begin{quote}\begin{description}
+\item[{Parameters}] \leavevmode\begin{itemize}
+\item {}
+\textbf{logger\_name} -- name of log
+
+\item {}
+\textbf{args} --
+a dictionary of parameters forwarded from \code{make\_shared\_logger\_and\_proxy()}
+
+Valid entries include:
+\begin{quote}
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"level"}}
+Sets the \href{http://docs.python.org/library/logging.html\#logging.Handler.setLevel}{threshold} for the logger.
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"config\_file"}}
+The logging object is configured from this \href{http://docs.python.org/library/logging.html\#configuration-file-format}{configuration file}.
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"file\_name"}}
+Sets disk log file name.
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"rotating"}}
+Chooses a \href{http://docs.python.org/library/logging.html\#rotatingfilehandler}{(rotating) log}.
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"maxBytes"}}
+Allows the file to rollover at a predetermined size
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"backupCount"}}
+If backupCount is non-zero, the system will save old log files by appending the extensions \code{.1}, \code{.2}, \code{.3} etc., to the filename.
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"delay"}}
+Defer file creation until the log is written to.
+
+\end{fulllineitems}
+
+
+
+\begin{fulllineitems}
+\pysigline{\bfcode{"formatter"}}
+\href{http://docs.python.org/library/logging.html\#formatter-objects}{Converts} the message to a logged entry string.
+For example,
+
+\begin{Verbatim}[commandchars=\\\{\}]
+\PYG{l+s}{\PYGZdq{}}\PYG{l+s+si}{\PYGZpc{}(asctime)s}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s+si}{\PYGZpc{}(name)s}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s+si}{\PYGZpc{}(levelname)6s}\PYG{l+s}{ \PYGZhy{} }\PYG{l+s+si}{\PYGZpc{}(message)s}\PYG{l+s}{\PYGZdq{}}
+\end{Verbatim}
+
+\end{fulllineitems}
+
+\end{quote}
+
+
+\end{itemize}
+
+\end{description}\end{quote}
+
+\end{fulllineitems}
+
+
+
+\chapter{Indices and tables}
+\label{contents:indices-and-tables}\begin{itemize}
+\item {}
+\emph{genindex}
+
+\item {}
+\emph{modindex}
+
+\item {}
+\emph{search}
+
+\end{itemize}
+
+
+\renewcommand{\indexname}{Python Module Index}
+\begin{theindex}
+\def\bigletter#1{{\Large\sffamily#1}\nopagebreak\vspace{1mm}}
+\bigletter{r}
+\item {\texttt{ruffus.proxy\_logger}}, \pageref{proxy_logger:module-ruffus.proxy_logger}
+\end{theindex}
+
+\renewcommand{\indexname}{Index}
+\printindex
+\end{document}
diff --git a/doc/_build/latex/ruffus.toc b/doc/_build/latex/ruffus.toc
new file mode 100644
index 0000000..50ebbe0
--- /dev/null
+++ b/doc/_build/latex/ruffus.toc
@@ -0,0 +1,544 @@
+\select at language {english}
+\contentsline {chapter}{\numberline {1}Start Here:}{1}{chapter.1}
+\contentsline {section}{\numberline {1.1}Installation}{1}{section.1.1}
+\contentsline {subsection}{\numberline {1.1.1}The easy way}{1}{subsection.1.1.1}
+\contentsline {subsection}{\numberline {1.1.2}The most up-to-date code:}{1}{subsection.1.1.2}
+\contentsline {subsubsection}{Graphical flowcharts}{1}{subsubsection*.3}
+\contentsline {section}{\numberline {1.2}\textbf {Ruffus} Manual: List of Chapters and Example code}{2}{section.1.2}
+\contentsline {section}{\numberline {1.3}\textbf {Chapter 1}: An introduction to basic \emph {Ruffus} syntax}{3}{section.1.3}
+\contentsline {subsection}{\numberline {1.3.1}Overview}{3}{subsection.1.3.1}
+\contentsline {subsection}{\numberline {1.3.2}Importing \emph {Ruffus}}{4}{subsection.1.3.2}
+\contentsline {subsection}{\numberline {1.3.3}\emph {Ruffus} decorators}{4}{subsection.1.3.3}
+\contentsline {subsection}{\numberline {1.3.4}Your first \emph {Ruffus} pipeline}{5}{subsection.1.3.4}
+\contentsline {subsubsection}{1. Write down the file names}{5}{subsubsection*.5}
+\contentsline {subsubsection}{2. Write the python functions for each stage}{5}{subsubsection*.6}
+\contentsline {subsubsection}{3. Link the python functions into a pipeline}{7}{subsubsection*.7}
+\contentsline {subsubsection}{4. @transform syntax}{7}{subsubsection*.8}
+\contentsline {subsubsection}{5. Run the pipeline!}{8}{subsubsection*.9}
+\contentsline {section}{\numberline {1.4}\textbf {Chapter 2}: Transforming data in a pipeline with \emph {@transform}}{9}{section.1.4}
+\contentsline {subsection}{\numberline {1.4.1}Review}{9}{subsection.1.4.1}
+\contentsline {subsection}{\numberline {1.4.2}Task functions as recipes}{9}{subsection.1.4.2}
+\contentsline {subsection}{\numberline {1.4.3}\emph {@transform} is a 1 to 1 operation}{9}{subsection.1.4.3}
+\contentsline {subsubsection}{A pair of files as the \textbf {Input}}{10}{subsubsection*.10}
+\contentsline {subsection}{\numberline {1.4.4}\textbf {Input} and \textbf {Output} parameters}{10}{subsection.1.4.4}
+\contentsline {section}{\numberline {1.5}\textbf {Chapter 3}: More on \texttt {@transform}-ing data}{11}{section.1.5}
+\contentsline {subsection}{\numberline {1.5.1}Review}{11}{subsection.1.5.1}
+\contentsline {subsection}{\numberline {1.5.2}Running pipelines in parallel}{12}{subsection.1.5.2}
+\contentsline {subsection}{\numberline {1.5.3}Up-to-date jobs are not re-run unnecessarily}{12}{subsection.1.5.3}
+\contentsline {subsection}{\numberline {1.5.4}Defining pipeline tasks out of order}{13}{subsection.1.5.4}
+\contentsline {subsection}{\numberline {1.5.5}Multiple dependencies}{14}{subsection.1.5.5}
+\contentsline {subsection}{\numberline {1.5.6}\emph {@follows}}{14}{subsection.1.5.6}
+\contentsline {subsection}{\numberline {1.5.7}Making directories automatically with \emph {@follows} and \emph {mkdir}}{15}{subsection.1.5.7}
+\contentsline {subsection}{\numberline {1.5.8}Globs in the \textbf {Input} parameter}{15}{subsection.1.5.8}
+\contentsline {subsection}{\numberline {1.5.9}Mixing Tasks and Globs in the \textbf {Input} parameter}{16}{subsection.1.5.9}
+\contentsline {section}{\numberline {1.6}\textbf {Chapter 4}: Creating files with \texttt {@originate}}{16}{section.1.6}
+\contentsline {subsection}{\numberline {1.6.1}Simplifying our example with \emph {@originate}}{16}{subsection.1.6.1}
+\contentsline {section}{\numberline {1.7}\textbf {Chapter 5}: Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{17}{section.1.7}
+\contentsline {subsection}{\numberline {1.7.1}Printing out which jobs will be run}{18}{subsection.1.7.1}
+\contentsline {subsection}{\numberline {1.7.2}Determining which jobs are out-of-date or not}{18}{subsection.1.7.2}
+\contentsline {subsection}{\numberline {1.7.3}Verbosity levels}{19}{subsection.1.7.3}
+\contentsline {subsection}{\numberline {1.7.4}Abbreviating long file paths with \texttt {verbose\_abbreviated\_path}}{19}{subsection.1.7.4}
+\contentsline {subsection}{\numberline {1.7.5}Getting a list of all tasks in a pipeline}{20}{subsection.1.7.5}
+\contentsline {section}{\numberline {1.8}\textbf {Chapter 6}: Running \emph {Ruffus} from the command line with ruffus.cmdline}{20}{section.1.8}
+\contentsline {subsection}{\numberline {1.8.1}Template for argparse}{21}{subsection.1.8.1}
+\contentsline {subsection}{\numberline {1.8.2}Command Line Arguments}{21}{subsection.1.8.2}
+\contentsline {subsection}{\numberline {1.8.3}1) Logging}{21}{subsection.1.8.3}
+\contentsline {subsubsection}{A) Only to the log file:}{22}{subsubsection*.13}
+\contentsline {subsubsection}{B) Only to the display:}{22}{subsubsection*.14}
+\contentsline {subsubsection}{C) To both simultaneously:}{22}{subsubsection*.15}
+\contentsline {subsection}{\numberline {1.8.4}2) Tracing pipeline progress}{22}{subsection.1.8.4}
+\contentsline {subsection}{\numberline {1.8.5}3) Printing a flowchart}{23}{subsection.1.8.5}
+\contentsline {subsection}{\numberline {1.8.6}4) Running in parallel on multiple processors}{23}{subsection.1.8.6}
+\contentsline {subsection}{\numberline {1.8.7}5) Setup checkpointing so that \emph {Ruffus} knows which files are out of date}{23}{subsection.1.8.7}
+\contentsline {subsubsection}{Recreating checkpoints}{23}{subsubsection*.16}
+\contentsline {subsubsection}{Touch files}{23}{subsubsection*.17}
+\contentsline {subsection}{\numberline {1.8.8}6) Skipping specified options}{23}{subsection.1.8.8}
+\contentsline {subsection}{\numberline {1.8.9}7) Specifying verbosity and abbreviating long paths}{24}{subsection.1.8.9}
+\contentsline {subsection}{\numberline {1.8.10}8) Displaying the version}{24}{subsection.1.8.10}
+\contentsline {subsection}{\numberline {1.8.11}Template for optparse}{24}{subsection.1.8.11}
+\contentsline {section}{\numberline {1.9}\textbf {Chapter 7}: Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{25}{section.1.9}
+\contentsline {subsection}{\numberline {1.9.1}Printing out a flowchart of our pipeline}{25}{subsection.1.9.1}
+\contentsline {subsection}{\numberline {1.9.2}Command line options made easier with \texttt {ruffus.cmdline}}{26}{subsection.1.9.2}
+\contentsline {subsection}{\numberline {1.9.3}Horribly complicated pipelines!}{26}{subsection.1.9.3}
+\contentsline {subsection}{\numberline {1.9.4}Circular dependency errors in pipelines!}{27}{subsection.1.9.4}
+\contentsline {subsection}{\numberline {1.9.5}\texttt {@graphviz}: Customising the appearance of each task}{28}{subsection.1.9.5}
+\contentsline {section}{\numberline {1.10}\textbf {Chapter 8}: Specifying output file names with \emph {formatter()} and \emph {regex()}}{29}{section.1.10}
+\contentsline {subsection}{\numberline {1.10.1}Review}{29}{subsection.1.10.1}
+\contentsline {subsection}{\numberline {1.10.2}A different file name \emph {suffix()} for each pipeline stage}{30}{subsection.1.10.2}
+\contentsline {subsection}{\numberline {1.10.3}\emph {formatter()} manipulates pathnames and regular expression}{31}{subsection.1.10.3}
+\contentsline {subsubsection}{Path name components}{32}{subsubsection*.18}
+\contentsline {subsubsection}{Filter and parse using regular expressions}{33}{subsubsection*.19}
+\contentsline {subsubsection}{Using \emph {@transform()} with \emph {formatter()}}{33}{subsubsection*.20}
+\contentsline {subsubsection}{string substitution for ``extra'' arguments}{34}{subsubsection*.21}
+\contentsline {subsubsection}{Changing directories using \emph {formatter()} in a zoo...}{35}{subsubsection*.22}
+\contentsline {subsection}{\numberline {1.10.4}\emph {regex()} manipulates via regular expressions}{36}{subsection.1.10.4}
+\contentsline {section}{\numberline {1.11}\textbf {Chapter 9}: Preparing directories for output with \emph {@mkdir()}}{37}{section.1.11}
+\contentsline {subsection}{\numberline {1.11.1}Overview}{37}{subsection.1.11.1}
+\contentsline {subsection}{\numberline {1.11.2}Creating directories after string substitution in a zoo...}{38}{subsection.1.11.2}
+\contentsline {subsubsection}{using \emph {formatter()}}{38}{subsubsection*.23}
+\contentsline {subsubsection}{using \emph {regex()}}{39}{subsubsection*.24}
+\contentsline {section}{\numberline {1.12}\textbf {Chapter 10}: Checkpointing: Interrupted Pipelines and Exceptions}{39}{section.1.12}
+\contentsline {subsection}{\numberline {1.12.1}Overview}{40}{subsection.1.12.1}
+\contentsline {subsection}{\numberline {1.12.2}Interrupting tasks}{40}{subsection.1.12.2}
+\contentsline {subsection}{\numberline {1.12.3}Checkpointing: only log completed jobs}{41}{subsection.1.12.3}
+\contentsline {subsection}{\numberline {1.12.4}Do not share the same checkpoint file across for multiple pipelines!}{41}{subsection.1.12.4}
+\contentsline {subsection}{\numberline {1.12.5}Setting checkpoint file names}{41}{subsection.1.12.5}
+\contentsline {subsubsection}{environment variable \texttt {DEFAULT\_RUFFUS\_HISTORY\_FILE}}{42}{subsubsection*.25}
+\contentsline {subsubsection}{Setting the checkpoint file name manually}{42}{subsubsection*.26}
+\contentsline {subsection}{\numberline {1.12.6}Useful checkpoint file name policies \texttt {DEFAULT\_RUFFUS\_HISTORY\_FILE}}{42}{subsection.1.12.6}
+\contentsline {subsubsection}{Example 1: same directory, different name}{42}{subsubsection*.27}
+\contentsline {subsubsection}{Example 2: Different directory, same name}{42}{subsubsection*.28}
+\contentsline {subsubsection}{Example 2: Different directory, same name but keep one level of subdirectory to disambiguate}{43}{subsubsection*.29}
+\contentsline {subsubsection}{Example 2: nested in common directory}{43}{subsubsection*.30}
+\contentsline {subsection}{\numberline {1.12.7}Regenerating the checkpoint file}{43}{subsection.1.12.7}
+\contentsline {subsection}{\numberline {1.12.8}Rules for determining if files are up to date}{43}{subsection.1.12.8}
+\contentsline {subsection}{\numberline {1.12.9}Missing files generate exceptions}{44}{subsection.1.12.9}
+\contentsline {subsection}{\numberline {1.12.10}Caveats: Coarse Timestamp resolution}{44}{subsection.1.12.10}
+\contentsline {subsection}{\numberline {1.12.11}Flag files: Checkpointing for the paranoid}{44}{subsection.1.12.11}
+\contentsline {section}{\numberline {1.13}\textbf {Chapter 11}: Pipeline topologies and a compendium of \emph {Ruffus} decorators}{44}{section.1.13}
+\contentsline {subsection}{\numberline {1.13.1}Overview}{44}{subsection.1.13.1}
+\contentsline {subsection}{\numberline {1.13.2}\emph {@transform}}{45}{subsection.1.13.2}
+\contentsline {subsection}{\numberline {1.13.3}A bestiary of \emph {Ruffus} decorators}{45}{subsection.1.13.3}
+\contentsline {subsection}{\numberline {1.13.4}\emph {@originate}}{45}{subsection.1.13.4}
+\contentsline {subsection}{\numberline {1.13.5}\emph {@merge}}{45}{subsection.1.13.5}
+\contentsline {subsection}{\numberline {1.13.6}\emph {@split}}{45}{subsection.1.13.6}
+\contentsline {subsection}{\numberline {1.13.7}\emph {@subdivide}}{46}{subsection.1.13.7}
+\contentsline {subsection}{\numberline {1.13.8}\emph {@collate}}{46}{subsection.1.13.8}
+\contentsline {subsection}{\numberline {1.13.9}Combinatorics}{46}{subsection.1.13.9}
+\contentsline {subsection}{\numberline {1.13.10}\emph {@product}}{47}{subsection.1.13.10}
+\contentsline {subsection}{\numberline {1.13.11}\emph {@combinations}}{47}{subsection.1.13.11}
+\contentsline {subsection}{\numberline {1.13.12}\emph {@combinations\_with\_replacement}}{47}{subsection.1.13.12}
+\contentsline {subsection}{\numberline {1.13.13}\emph {@permutations}}{47}{subsection.1.13.13}
+\contentsline {section}{\numberline {1.14}\textbf {Chapter 12}: Splitting up large tasks / files with \textbf {@split}}{47}{section.1.14}
+\contentsline {subsection}{\numberline {1.14.1}Overview}{48}{subsection.1.14.1}
+\contentsline {subsection}{\numberline {1.14.2}Example: Calculate variance for a large list of numbers in parallel}{48}{subsection.1.14.2}
+\contentsline {subsection}{\numberline {1.14.3}Output files for \emph {@split}}{48}{subsection.1.14.3}
+\contentsline {subsection}{\numberline {1.14.4}Be careful in specifying \textbf {Output} globs}{49}{subsection.1.14.4}
+\contentsline {subsection}{\numberline {1.14.5}Clean up previous pipeline runs}{49}{subsection.1.14.5}
+\contentsline {subsection}{\numberline {1.14.6}1 to many}{49}{subsection.1.14.6}
+\contentsline {subsection}{\numberline {1.14.7}Nothing to many}{50}{subsection.1.14.7}
+\contentsline {section}{\numberline {1.15}\textbf {Chapter 13}: \texttt {@merge} multiple input into a single result}{51}{section.1.15}
+\contentsline {subsection}{\numberline {1.15.1}Overview of \emph {@merge}}{51}{subsection.1.15.1}
+\contentsline {subsection}{\numberline {1.15.2}\emph {@merge} is a many to one operator}{51}{subsection.1.15.2}
+\contentsline {subsection}{\numberline {1.15.3}Example: Combining partial solutions: Calculating variances}{51}{subsection.1.15.3}
+\contentsline {section}{\numberline {1.16}\textbf {Chapter 14}: Multiprocessing, \texttt {drmaa} and Computation Clusters}{53}{section.1.16}
+\contentsline {subsection}{\numberline {1.16.1}Overview}{53}{subsection.1.16.1}
+\contentsline {subsubsection}{Multi Processing}{53}{subsubsection*.31}
+\contentsline {subsubsection}{Data sharing}{53}{subsubsection*.32}
+\contentsline {subsection}{\numberline {1.16.2}Restricting parallelism with \emph {@jobs\_limit}}{53}{subsection.1.16.2}
+\contentsline {subsection}{\numberline {1.16.3}Using \texttt {drmaa} to dispatch work to Computational Clusters or Grid engines from Ruffus jobs}{54}{subsection.1.16.3}
+\contentsline {subsubsection}{1) Use a shared drmaa session:}{54}{subsubsection*.33}
+\contentsline {subsubsection}{2) import \texttt {ruffus.drmaa\_wrapper}}{54}{subsubsection*.34}
+\contentsline {subsubsection}{3) call \emph {drmaa\_wrapper.run\_job()}}{54}{subsubsection*.35}
+\contentsline {subsubsection}{4) Use multithread: \emph {pipeline\_run(multithread = NNN)}}{55}{subsubsection*.36}
+\contentsline {subsubsection}{5) Develop locally}{55}{subsubsection*.37}
+\contentsline {subsection}{\numberline {1.16.4}Forcing a pipeline to appear up to date}{56}{subsection.1.16.4}
+\contentsline {section}{\numberline {1.17}\textbf {Chapter 15}: Logging progress through a pipeline}{56}{section.1.17}
+\contentsline {subsection}{\numberline {1.17.1}Overview}{56}{subsection.1.17.1}
+\contentsline {subsection}{\numberline {1.17.2}Logging task/job completion}{57}{subsection.1.17.2}
+\contentsline {subsubsection}{Controlling logging verbosity}{57}{subsubsection*.38}
+\contentsline {subsection}{\numberline {1.17.3}Use \emph {ruffus.cmdline}}{57}{subsection.1.17.3}
+\contentsline {subsection}{\numberline {1.17.4}Customising logging}{57}{subsection.1.17.4}
+\contentsline {subsection}{\numberline {1.17.5}Log your own messages}{58}{subsection.1.17.5}
+\contentsline {subsubsection}{1. Set up logging}{58}{subsubsection*.39}
+\contentsline {subsubsection}{2. Share the proxy}{58}{subsubsection*.40}
+\contentsline {section}{\numberline {1.18}\textbf {Chapter 16}: \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{59}{section.1.18}
+\contentsline {subsection}{\numberline {1.18.1}Overview}{59}{subsection.1.18.1}
+\contentsline {subsection}{\numberline {1.18.2}\emph {@subdivide} in parallel}{59}{subsection.1.18.2}
+\contentsline {subsection}{\numberline {1.18.3}Grouping using \emph {@collate}}{61}{subsection.1.18.3}
+\contentsline {section}{\numberline {1.19}\textbf {Chapter 17}: \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{62}{section.1.19}
+\contentsline {subsection}{\numberline {1.19.1}Overview}{62}{subsection.1.19.1}
+\contentsline {subsection}{\numberline {1.19.2}Generating output with \emph {formatter()}}{63}{subsection.1.19.2}
+\contentsline {subsection}{\numberline {1.19.3}All vs all comparisons with \emph {@product}}{63}{subsection.1.19.3}
+\contentsline {subsection}{\numberline {1.19.4}Permute all k-tuple orderings of inputs without repeats using \emph {@permutations}}{65}{subsection.1.19.4}
+\contentsline {subsection}{\numberline {1.19.5}Select unordered k-tuples within inputs excluding repeated elements using \emph {@combinations}}{66}{subsection.1.19.5}
+\contentsline {subsection}{\numberline {1.19.6}Select unordered k-tuples within inputs \emph {including} repeated elements with \emph {@combinations\_with\_replacement}}{67}{subsection.1.19.6}
+\contentsline {section}{\numberline {1.20}\textbf {Chapter 18}: Turning parts of the pipeline on and off at runtime with \emph {@active\_if}}{69}{section.1.20}
+\contentsline {subsection}{\numberline {1.20.1}Overview}{69}{subsection.1.20.1}
+\contentsline {subsection}{\numberline {1.20.2}\emph {@active\_if} controls the state of tasks}{69}{subsection.1.20.2}
+\contentsline {section}{\numberline {1.21}\textbf {Chapter 19}: Signal the completion of each stage of our pipeline with \emph {@posttask}}{71}{section.1.21}
+\contentsline {subsection}{\numberline {1.21.1}Overview}{71}{subsection.1.21.1}
+\contentsline {subsubsection}{\textbf {@posttask}}{71}{subsubsection*.41}
+\contentsline {subsubsection}{\emph {touch\_file}}{72}{subsubsection*.42}
+\contentsline {subsubsection}{Adding several post task actions}{72}{subsubsection*.43}
+\contentsline {section}{\numberline {1.22}\textbf {Chapter 20}: Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{72}{section.1.22}
+\contentsline {subsection}{\numberline {1.22.1}Overview}{72}{subsection.1.22.1}
+\contentsline {subsection}{\numberline {1.22.2}Adding additional \emph {input} prerequisites per job with \emph {add\_inputs()}}{73}{subsection.1.22.2}
+\contentsline {subsubsection}{1. Example: compiling c++ code}{73}{subsubsection*.44}
+\contentsline {subsubsection}{2. Example: Adding a common header file with \emph {add\_inputs()}}{73}{subsubsection*.45}
+\contentsline {subsubsection}{3. Example: Additional \emph {Input} can be tasks}{74}{subsubsection*.46}
+\contentsline {subsubsection}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{74}{subsubsection*.47}
+\contentsline {subsection}{\numberline {1.22.3}Replacing all input parameters with \emph {inputs()}}{75}{subsection.1.22.3}
+\contentsline {subsubsection}{5. Example: Running matching python scripts using \emph {inputs()}}{75}{subsubsection*.48}
+\contentsline {section}{\numberline {1.23}\textbf {Chapter 21}: Esoteric: Generating parameters on the fly with \emph {@files}}{75}{section.1.23}
+\contentsline {subsection}{\numberline {1.23.1}Overview}{76}{subsection.1.23.1}
+\contentsline {subsection}{\numberline {1.23.2}\emph {@files} syntax}{76}{subsection.1.23.2}
+\contentsline {subsection}{\numberline {1.23.3}A Cartesian Product, all vs all example}{77}{subsection.1.23.3}
+\contentsline {section}{\numberline {1.24}\textbf {Chapter 22}: Esoteric: Running jobs in parallel without files using \emph {@parallel}}{78}{section.1.24}
+\contentsline {subsection}{\numberline {1.24.1}\textbf {@parallel}}{78}{subsection.1.24.1}
+\contentsline {section}{\numberline {1.25}\textbf {Chapter 23}: Esoteric: Writing custom functions to decide which jobs are up to date with \emph {@check\_if\_uptodate}}{79}{section.1.25}
+\contentsline {subsection}{\numberline {1.25.1}\textbf {@check\_if\_uptodate} : Manual dependency checking}{79}{subsection.1.25.1}
+\contentsline {section}{\numberline {1.26}\textbf {Appendix 1}: Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{80}{section.1.26}
+\contentsline {subsection}{\numberline {1.26.1}Flowchart colours}{80}{subsection.1.26.1}
+\contentsline {section}{\numberline {1.27}\textbf {Appendix 2}: How dependency is checked}{81}{section.1.27}
+\contentsline {subsection}{\numberline {1.27.1}Overview}{81}{subsection.1.27.1}
+\contentsline {subsubsection}{Running all out-of-date tasks and dependents}{81}{subsubsection*.49}
+\contentsline {subsubsection}{Forced Reruns}{82}{subsubsection*.51}
+\contentsline {subsubsection}{Esoteric option: Minimal Reruns}{82}{subsubsection*.52}
+\contentsline {section}{\numberline {1.28}\textbf {Appendix 3}: Exceptions thrown inside pipelines}{83}{section.1.28}
+\contentsline {subsection}{\numberline {1.28.1}Overview}{83}{subsection.1.28.1}
+\contentsline {subsection}{\numberline {1.28.2}Pipelines running in parallel accumulate Exceptions}{84}{subsection.1.28.2}
+\contentsline {subsection}{\numberline {1.28.3}Terminate pipeline immediately upon Exceptions}{84}{subsection.1.28.3}
+\contentsline {subsubsection}{Set \emph {pipeline\_run(exceptions\_terminate\_immediately = True)}}{84}{subsubsection*.54}
+\contentsline {subsubsection}{raise \texttt {Ruffus.JobSignalledBreak}}{84}{subsubsection*.55}
+\contentsline {subsection}{\numberline {1.28.4}Display exceptions as they occur}{85}{subsection.1.28.4}
+\contentsline {section}{\numberline {1.29}\textbf {Appendix 4}: Names exported from Ruffus}{85}{section.1.29}
+\contentsline {subsection}{\numberline {1.29.1}Ruffus Names}{85}{subsection.1.29.1}
+\contentsline {section}{\numberline {1.30}\textbf {Appendix 5}: \textbf {@files}: Deprecated syntax}{87}{section.1.30}
+\contentsline {subsection}{\numberline {1.30.1}Overview}{87}{subsection.1.30.1}
+\contentsline {subsection}{\numberline {1.30.2}\textbf {@files}}{87}{subsection.1.30.2}
+\contentsline {subsection}{\numberline {1.30.3}Running the same code on different parameters in parallel}{88}{subsection.1.30.3}
+\contentsline {subsubsection}{Checking if jobs are up to date}{89}{subsubsection*.57}
+\contentsline {section}{\numberline {1.31}\textbf {Appendix 6}: \textbf {@files\_re}: Deprecated \emph {syntax using regular expressions}}{90}{section.1.31}
+\contentsline {subsection}{\numberline {1.31.1}Overview}{90}{subsection.1.31.1}
+\contentsline {subsubsection}{Transforming input and output filenames}{90}{subsubsection*.58}
+\contentsline {subsubsection}{Collating many \emph {inputs} into a single \emph {output}}{91}{subsubsection*.60}
+\contentsline {subsubsection}{Generating \emph {input} and \emph {output} parameter using regular expresssions}{91}{subsubsection*.61}
+\contentsline {section}{\numberline {1.32}\textbf {Chapter 1}: Python Code for An introduction to basic Ruffus syntax}{92}{section.1.32}
+\contentsline {subsection}{\numberline {1.32.1}Your first Ruffus script}{92}{subsection.1.32.1}
+\contentsline {subsection}{\numberline {1.32.2}Resulting Output}{93}{subsection.1.32.2}
+\contentsline {section}{\numberline {1.33}\textbf {Chapter 1}: Python Code for Transforming data in a pipeline with \texttt {@transform}}{93}{section.1.33}
+\contentsline {subsection}{\numberline {1.33.1}Your first Ruffus script}{93}{subsection.1.33.1}
+\contentsline {subsection}{\numberline {1.33.2}Resulting Output}{94}{subsection.1.33.2}
+\contentsline {section}{\numberline {1.34}\textbf {Chapter 3}: Python Code for More on \texttt {@transform}-ing data}{95}{section.1.34}
+\contentsline {subsection}{\numberline {1.34.1}Producing several items / files per job}{95}{subsection.1.34.1}
+\contentsline {subsubsection}{Resulting Output}{96}{subsubsection*.62}
+\contentsline {subsection}{\numberline {1.34.2}Defining tasks function out of order}{96}{subsection.1.34.2}
+\contentsline {subsubsection}{Resulting Output}{97}{subsubsection*.63}
+\contentsline {subsection}{\numberline {1.34.3}Multiple dependencies}{97}{subsection.1.34.3}
+\contentsline {subsubsection}{Resulting Output}{98}{subsubsection*.64}
+\contentsline {subsection}{\numberline {1.34.4}Multiple dependencies after @follows}{99}{subsection.1.34.4}
+\contentsline {subsubsection}{Resulting Output: \texttt {first\_task} completes before \texttt {second\_task}}{100}{subsubsection*.65}
+\contentsline {section}{\numberline {1.35}\textbf {Chapter 4}: Python Code for Creating files with \texttt {@originate}}{100}{section.1.35}
+\contentsline {subsection}{\numberline {1.35.1}Using \texttt {@originate}}{101}{subsection.1.35.1}
+\contentsline {subsection}{\numberline {1.35.2}Resulting Output}{101}{subsection.1.35.2}
+\contentsline {section}{\numberline {1.36}\textbf {Chapter 5}: Python Code for Understanding how your pipeline works with \emph {pipeline\_printout(...)}}{101}{section.1.36}
+\contentsline {subsection}{\numberline {1.36.1}Display the initial state of the pipeline}{102}{subsection.1.36.1}
+\contentsline {subsection}{\numberline {1.36.2}Normal Output}{102}{subsection.1.36.2}
+\contentsline {subsection}{\numberline {1.36.3}High Verbosity Output}{102}{subsection.1.36.3}
+\contentsline {subsection}{\numberline {1.36.4}Display the partially up-to-date pipeline}{103}{subsection.1.36.4}
+\contentsline {section}{\numberline {1.37}\textbf {Chapter 7}: Python Code for Displaying the pipeline visually with \emph {pipeline\_printout\_graph(...)}}{105}{section.1.37}
+\contentsline {subsection}{\numberline {1.37.1}Code}{105}{subsection.1.37.1}
+\contentsline {subsection}{\numberline {1.37.2}Resulting Flowcharts}{106}{subsection.1.37.2}
+\contentsline {section}{\numberline {1.38}\textbf {Chapter 8}: Python Code for Specifying output file names with \emph {formatter()} and \emph {regex()}}{107}{section.1.38}
+\contentsline {subsection}{\numberline {1.38.1}Example Code for \emph {suffix()}}{107}{subsection.1.38.1}
+\contentsline {subsection}{\numberline {1.38.2}Example Code for \emph {formatter()}}{107}{subsection.1.38.2}
+\contentsline {subsection}{\numberline {1.38.3}Example Code for \emph {formatter()} with replacements in \emph {extra} arguments}{108}{subsection.1.38.3}
+\contentsline {subsection}{\numberline {1.38.4}Example Code for \emph {formatter()} in Zoos}{109}{subsection.1.38.4}
+\contentsline {subsection}{\numberline {1.38.5}Example Code for \emph {regex()} in zoos}{110}{subsection.1.38.5}
+\contentsline {section}{\numberline {1.39}\textbf {Chapter 9}: Python Code for Preparing directories for output with \emph {@mkdir()}}{110}{section.1.39}
+\contentsline {subsection}{\numberline {1.39.1}Code for \emph {formatter()} Zoo example}{111}{subsection.1.39.1}
+\contentsline {subsection}{\numberline {1.39.2}Code for \emph {regex()} Zoo example}{111}{subsection.1.39.2}
+\contentsline {section}{\numberline {1.40}\textbf {Chapter 10}: Python Code for Checkpointing: Interrupted Pipelines and Exceptions}{112}{section.1.40}
+\contentsline {subsection}{\numberline {1.40.1}Code for .:ref:\emph {suffix() \textless {}decorators.suffix\textgreater {}} example}{112}{subsection.1.40.1}
+\contentsline {section}{\numberline {1.41}\textbf {Chapter 12}: Python Code for Splitting up large tasks / files with \textbf {@split}}{113}{section.1.41}
+\contentsline {subsection}{\numberline {1.41.1}Splitting large jobs}{113}{subsection.1.41.1}
+\contentsline {subsection}{\numberline {1.41.2}Resulting Output}{114}{subsection.1.41.2}
+\contentsline {section}{\numberline {1.42}\textbf {Chapter 13}: Python Code for \texttt {@merge} multiple input into a single result}{114}{section.1.42}
+\contentsline {subsection}{\numberline {1.42.1}Splitting large jobs}{115}{subsection.1.42.1}
+\contentsline {subsection}{\numberline {1.42.2}Resulting Output}{116}{subsection.1.42.2}
+\contentsline {section}{\numberline {1.43}\textbf {Chapter 14}: Python Code for Multiprocessing, \texttt {drmaa} and Computation Clusters}{117}{section.1.43}
+\contentsline {subsection}{\numberline {1.43.1}\emph {@jobs\_limit}}{117}{subsection.1.43.1}
+\contentsline {subsection}{\numberline {1.43.2}Using \texttt {ruffus.drmaa\_wrapper}}{119}{subsection.1.43.2}
+\contentsline {section}{\numberline {1.44}\textbf {Chapter 15}: Python Code for Logging progress through a pipeline}{120}{section.1.44}
+\contentsline {subsection}{\numberline {1.44.1}Rotating set of file logs}{120}{subsection.1.44.1}
+\contentsline {section}{\numberline {1.45}\textbf {Chapter 16}: Python Code for \emph {@subdivide} tasks to run efficiently and regroup with \emph {@collate}}{121}{section.1.45}
+\contentsline {subsection}{\numberline {1.45.1}\emph {@subdivide} and regroup with \emph {@collate} example}{121}{subsection.1.45.1}
+\contentsline {section}{\numberline {1.46}\textbf {Chapter 17}: Python Code for \emph {@combinations}, \emph {@permutations} and all versus all \emph {@product}}{123}{section.1.46}
+\contentsline {subsection}{\numberline {1.46.1}Example code for \emph {@product}}{123}{subsection.1.46.1}
+\contentsline {subsection}{\numberline {1.46.2}Example code for \emph {@permutations}}{125}{subsection.1.46.2}
+\contentsline {subsection}{\numberline {1.46.3}Example code for \emph {@combinations}}{126}{subsection.1.46.3}
+\contentsline {subsection}{\numberline {1.46.4}Example code for \emph {@combinations\_with\_replacement}}{127}{subsection.1.46.4}
+\contentsline {section}{\numberline {1.47}\textbf {Chapter 20}: Python Code for Manipulating task inputs via string substitution using \emph {inputs()} and \emph {add\_inputs()}}{128}{section.1.47}
+\contentsline {subsection}{\numberline {1.47.1}Example code for adding additional \emph {input} prerequisites per job with \emph {add\_inputs()}}{128}{subsection.1.47.1}
+\contentsline {subsubsection}{1. Example: compiling c++ code}{128}{subsubsection*.66}
+\contentsline {subsubsection}{2. Example: Adding a common header file with \emph {add\_inputs()}}{129}{subsubsection*.67}
+\contentsline {subsubsection}{3. Example: Additional \emph {Input} can be tasks}{129}{subsubsection*.68}
+\contentsline {subsubsection}{4. Example: Add corresponding files using \emph {add\_inputs()} with \emph {formatter} or \emph {regex}}{130}{subsubsection*.69}
+\contentsline {subsection}{\numberline {1.47.2}Example code for replacing all input parameters with \emph {inputs()}}{130}{subsection.1.47.2}
+\contentsline {subsubsection}{5. Example: Running matching python scripts using \emph {inputs()}}{130}{subsubsection*.70}
+\contentsline {section}{\numberline {1.48}\textbf {Chapter 21}: Esoteric: Python Code for Generating parameters on the fly with \emph {@files}}{131}{section.1.48}
+\contentsline {subsection}{\numberline {1.48.1}Introduction}{131}{subsection.1.48.1}
+\contentsline {subsection}{\numberline {1.48.2}Code}{132}{subsection.1.48.2}
+\contentsline {subsection}{\numberline {1.48.3}Resulting Output}{136}{subsection.1.48.3}
+\contentsline {section}{\numberline {1.49}\textbf {Appendix 1}: Python code for Flow Chart Colours with \emph {pipeline\_printout\_graph(...)}}{136}{section.1.49}
+\contentsline {subsection}{\numberline {1.49.1}Code}{137}{subsection.1.49.1}
+\contentsline {chapter}{\numberline {2}Overview:}{143}{chapter.2}
+\contentsline {section}{\numberline {2.1}Cheat Sheet}{143}{section.2.1}
+\contentsline {subsection}{\numberline {2.1.1}1. Annotate functions with \textbf {Ruffus} decorators}{144}{subsection.2.1.1}
+\contentsline {subsubsection}{Core}{144}{subsubsection*.71}
+\contentsline {subsubsection}{See \emph {Decorators} for a complete list of decorators}{144}{subsubsection*.72}
+\contentsline {subsection}{\numberline {2.1.2}2. Print dependency graph if necessary}{144}{subsection.2.1.2}
+\contentsline {subsection}{\numberline {2.1.3}3. Run the pipeline}{145}{subsection.2.1.3}
+\contentsline {section}{\numberline {2.2}Pipeline functions}{145}{section.2.2}
+\contentsline {subsection}{\numberline {2.2.1}\emph {pipeline\_run}}{145}{subsection.2.2.1}
+\contentsline {subsection}{\numberline {2.2.2}\emph {pipeline\_printout}}{147}{subsection.2.2.2}
+\contentsline {subsection}{\numberline {2.2.3}\emph {pipeline\_printout\_graph}}{148}{subsection.2.2.3}
+\contentsline {subsection}{\numberline {2.2.4}\emph {pipeline\_get\_task\_names}}{151}{subsection.2.2.4}
+\contentsline {section}{\numberline {2.3}drmaa functions}{151}{section.2.3}
+\contentsline {subsection}{\numberline {2.3.1}\emph {run\_job}}{151}{subsection.2.3.1}
+\contentsline {section}{\numberline {2.4}Installation}{154}{section.2.4}
+\contentsline {subsection}{\numberline {2.4.1}The easy way}{154}{subsection.2.4.1}
+\contentsline {subsection}{\numberline {2.4.2}The most up-to-date code:}{154}{subsection.2.4.2}
+\contentsline {subsubsection}{Graphical flowcharts}{154}{subsubsection*.254}
+\contentsline {section}{\numberline {2.5}Design \& Architecture}{155}{section.2.5}
+\contentsline {subsection}{\numberline {2.5.1}\emph {GNU Make}}{155}{subsection.2.5.1}
+\contentsline {subsubsection}{Deficiencies of \emph {make} / \emph {gmake}}{155}{subsubsection*.255}
+\contentsline {subsection}{\numberline {2.5.2}\emph {Scons}, \emph {Rake} and other \emph {Make} alternatives}{155}{subsection.2.5.2}
+\contentsline {subsubsection}{Implicit dependencies: disadvantages of \emph {make} / \emph {scons} / \emph {rake}}{156}{subsubsection*.256}
+\contentsline {subsubsection}{Explicit dependencies in \emph {Ruffus}}{156}{subsubsection*.257}
+\contentsline {subsubsection}{Static dependencies: What \emph {make} / \emph {scons} / \emph {rake} can't do (easily)}{156}{subsubsection*.258}
+\contentsline {subsection}{\numberline {2.5.3}Managing pipelines stage-by-stage using \textbf {Ruffus}}{157}{subsection.2.5.3}
+\contentsline {subsubsection}{Disadvantages of the Ruffus design}{158}{subsubsection*.259}
+\contentsline {subsection}{\numberline {2.5.4}Alternatives to \textbf {Ruffus}}{159}{subsection.2.5.4}
+\contentsline {subsubsection}{Acknowledgements}{159}{subsubsection*.260}
+\contentsline {section}{\numberline {2.6}Major Features added to Ruffus}{160}{section.2.6}
+\contentsline {subsection}{\numberline {2.6.1}version 2.5RC}{160}{subsection.2.6.1}
+\contentsline {subsubsection}{1) Python3 compatability (but at least python 2.6 is now required)}{160}{subsubsection*.261}
+\contentsline {subsubsection}{2) Ctrl-C interrupts}{160}{subsubsection*.262}
+\contentsline {subsubsection}{3) Customising flowcharts in pipeline\_printout\_graph() with \texttt {@graphviz}}{160}{subsubsection*.263}
+\contentsline {subsubsection}{4. Consistent verbosity levels}{161}{subsubsection*.264}
+\contentsline {subsubsection}{5. Allow abbreviated paths from \texttt {pipeline\_run} or \texttt {pipeline\_printout}}{162}{subsubsection*.265}
+\contentsline {subsubsection}{Other changes}{163}{subsubsection*.266}
+\contentsline {subsection}{\numberline {2.6.2}version 2.4.1}{163}{subsection.2.6.2}
+\contentsline {subsection}{\numberline {2.6.3}version 2.4}{163}{subsection.2.6.3}
+\contentsline {subsubsection}{Additions to \texttt {ruffus} namespace}{163}{subsubsection*.267}
+\contentsline {subsubsection}{Installation: use pip}{163}{subsubsection*.268}
+\contentsline {subsubsection}{1) Command Line support}{163}{subsubsection*.269}
+\contentsline {subsubsection}{2) Check pointing}{164}{subsubsection*.270}
+\contentsline {subsubsection}{3) \emph {subdivide()} (\emph {syntax})}{164}{subsubsection*.271}
+\contentsline {subsubsection}{4) \emph {mkdir()} (\emph {syntax}) with \emph {formatter()}, \emph {suffix()} and \emph {regex()}}{164}{subsubsection*.272}
+\contentsline {subsubsection}{5) \emph {originate()} (\emph {syntax})}{164}{subsubsection*.273}
+\contentsline {subsubsection}{6) New flexible \emph {formatter()} (\emph {syntax}) alternative to \emph {regex()} \& \emph {suffix()}}{165}{subsubsection*.274}
+\contentsline {subsubsection}{7) Combinatorics (all vs. all decorators)}{165}{subsubsection*.275}
+\contentsline {subsubsection}{8) drmaa support and multithreading:}{165}{subsubsection*.276}
+\contentsline {subsubsection}{9) \texttt {pipeline\_run(...)} and exceptions}{165}{subsubsection*.277}
+\contentsline {subsubsection}{10) Miscellaneous}{165}{subsubsection*.278}
+\contentsline {subsection}{\numberline {2.6.4}version 2.3}{166}{subsection.2.6.4}
+\contentsline {subsection}{\numberline {2.6.5}version 2.2}{167}{subsection.2.6.5}
+\contentsline {subsection}{\numberline {2.6.6}version 2.1.1}{168}{subsection.2.6.6}
+\contentsline {subsection}{\numberline {2.6.7}version 2.1.0}{169}{subsection.2.6.7}
+\contentsline {subsection}{\numberline {2.6.8}version 2.0.10}{169}{subsection.2.6.8}
+\contentsline {subsection}{\numberline {2.6.9}version 2.0.9}{170}{subsection.2.6.9}
+\contentsline {subsection}{\numberline {2.6.10}version 2.0.8}{171}{subsection.2.6.10}
+\contentsline {subsection}{\numberline {2.6.11}version 2.0.2}{171}{subsection.2.6.11}
+\contentsline {subsection}{\numberline {2.6.12}version 2.0}{171}{subsection.2.6.12}
+\contentsline {subsection}{\numberline {2.6.13}version 1.1.4}{171}{subsection.2.6.13}
+\contentsline {subsection}{\numberline {2.6.14}version 1.0.7}{171}{subsection.2.6.14}
+\contentsline {subsection}{\numberline {2.6.15}version 1.0}{171}{subsection.2.6.15}
+\contentsline {section}{\numberline {2.7}Fixed Bugs}{172}{section.2.7}
+\contentsline {section}{\numberline {2.8}Future Changes to Ruffus}{172}{section.2.8}
+\contentsline {subsection}{\numberline {2.8.1}Todo: pipeline\_printout\_graph should print inactive tasks}{172}{subsection.2.8.1}
+\contentsline {subsection}{\numberline {2.8.2}Todo: Mark input strings as non-file names, and add support for dynamically returned parameters}{172}{subsection.2.8.2}
+\contentsline {subsection}{\numberline {2.8.3}Todo: Allow ``extra'' parameters to be used in output substitution}{172}{subsection.2.8.3}
+\contentsline {subsection}{\numberline {2.8.4}Todo: Extra signalling before and after each task and job}{173}{subsection.2.8.4}
+\contentsline {subsection}{\numberline {2.8.5}Todo: \texttt {@split} / \texttt {@subdivide} returns the actual output created}{173}{subsection.2.8.5}
+\contentsline {subsubsection}{Checkpointing}{174}{subsubsection*.279}
+\contentsline {subsection}{\numberline {2.8.6}Todo: New decorators}{174}{subsection.2.8.6}
+\contentsline {subsubsection}{Todo: \texttt {@originate}}{174}{subsubsection*.280}
+\contentsline {subsubsection}{Todo: \texttt {@recombine}}{174}{subsubsection*.281}
+\contentsline {subsection}{\numberline {2.8.7}Todo: Named parameters in decorators for clarity}{174}{subsection.2.8.7}
+\contentsline {subsection}{\numberline {2.8.8}Todo: Bioinformatics example to end all examples}{174}{subsection.2.8.8}
+\contentsline {subsection}{\numberline {2.8.9}Todo: Allow the next task to start before all jobs in the previous task have finished}{175}{subsection.2.8.9}
+\contentsline {subsubsection}{Converting to per-job rather than per task dependencies}{175}{subsubsection*.282}
+\contentsline {subsubsection}{Implementation}{175}{subsubsection*.283}
+\contentsline {section}{\numberline {2.9}Planned Improvements to Ruffus}{176}{section.2.9}
+\contentsline {subsection}{\numberline {2.9.1}Planned: Running python code (task functions) transparently on remote cluster nodes}{176}{subsection.2.9.1}
+\contentsline {subsection}{\numberline {2.9.2}Planned: Custom parameter generator}{177}{subsection.2.9.2}
+\contentsline {subsection}{\numberline {2.9.3}Planned: Ruffus GUI interface.}{177}{subsection.2.9.3}
+\contentsline {subsection}{\numberline {2.9.4}Planned: Non-decorator / Function interface to Ruffus}{177}{subsection.2.9.4}
+\contentsline {subsection}{\numberline {2.9.5}Planned: Remove intermediate files}{177}{subsection.2.9.5}
+\contentsline {subsection}{\numberline {2.9.6}Planned: @retry\_on\_error(NUM\_OF\_RETRIES)}{178}{subsection.2.9.6}
+\contentsline {subsection}{\numberline {2.9.7}Planned: Clean up}{178}{subsection.2.9.7}
+\contentsline {section}{\numberline {2.10}Implementation Tips}{179}{section.2.10}
+\contentsline {subsection}{\numberline {2.10.1}Release}{179}{subsection.2.10.1}
+\contentsline {subsection}{\numberline {2.10.2}dbdict.py}{180}{subsection.2.10.2}
+\contentsline {subsection}{\numberline {2.10.3}how to write new decorators}{180}{subsection.2.10.3}
+\contentsline {section}{\numberline {2.11}Implementation notes}{181}{section.2.11}
+\contentsline {subsection}{\numberline {2.11.1}\texttt {Ctrl-C} handling}{181}{subsection.2.11.1}
+\contentsline {subsection}{\numberline {2.11.2}Python3 compatability}{182}{subsection.2.11.2}
+\contentsline {subsection}{\numberline {2.11.3}Refactoring: parameter handling}{183}{subsection.2.11.3}
+\contentsline {subsection}{\numberline {2.11.4}\texttt {formatter}}{183}{subsection.2.11.4}
+\contentsline {subsubsection}{\texttt {formatter()}: \texttt {regex()} and \texttt {suffix()}}{184}{subsubsection*.286}
+\contentsline {subsection}{\numberline {2.11.5}@product()}{184}{subsection.2.11.5}
+\contentsline {subsection}{\numberline {2.11.6}\texttt {@permutations(...),} \texttt {@combinations(...),} \texttt {@combinations\_with\_replacement(...)}}{185}{subsection.2.11.6}
+\contentsline {subsection}{\numberline {2.11.7}drmaa alternatives}{185}{subsection.2.11.7}
+\contentsline {subsection}{\numberline {2.11.8}Task completion monitoring}{185}{subsection.2.11.8}
+\contentsline {subsubsection}{How easy is it to abstract out the database?}{185}{subsubsection*.287}
+\contentsline {subsubsection}{Can we query the database, get Job history / stats?}{185}{subsubsection*.288}
+\contentsline {subsubsection}{What are the run time performance implications?}{186}{subsubsection*.289}
+\contentsline {subsubsection}{Avoid pauses between tasks}{186}{subsubsection*.290}
+\contentsline {subsection}{\numberline {2.11.9}\texttt {@mkdir(...),}}{186}{subsection.2.11.9}
+\contentsline {section}{\numberline {2.12}FAQ}{186}{section.2.12}
+\contentsline {subsection}{\numberline {2.12.1}Citations}{186}{subsection.2.12.1}
+\contentsline {subsubsection}{Q. How should \emph {Ruffus} be cited in academic publications?}{186}{subsubsection*.291}
+\contentsline {subsection}{\numberline {2.12.2}Good practices}{186}{subsection.2.12.2}
+\contentsline {subsubsection}{Q. What is the best way of keeping my data and workings separate?}{186}{subsubsection*.292}
+\contentsline {subsubsection}{Q. What is the best way of handling data in file pairs (or triplets etc.)}{187}{subsubsection*.293}
+\contentsline {subsection}{\numberline {2.12.3}General}{188}{subsection.2.12.3}
+\contentsline {subsubsection}{Q. \emph {Ruffus} won't create dependency graphs}{188}{subsubsection*.294}
+\contentsline {subsubsection}{Q. \emph {Ruffus} seems to be hanging in the same place}{188}{subsubsection*.295}
+\contentsline {subsubsection}{Q. Regular expression substitutions don't work}{188}{subsubsection*.296}
+\contentsline {subsubsection}{Q. How to force a pipeline to appear up to date?}{189}{subsubsection*.297}
+\contentsline {subsubsection}{Q. How can I use my own decorators with Ruffus?}{189}{subsubsection*.298}
+\contentsline {paragraph}{1. Use @wraps from \texttt {functools} or Michele Simionato's decorator module}{189}{paragraph*.299}
+\contentsline {paragraph}{2. Always call Ruffus decorators first before your own decorators.}{189}{paragraph*.300}
+\contentsline {paragraph}{Example decorator:}{190}{paragraph*.301}
+\contentsline {paragraph}{1. Using functools @wraps}{190}{paragraph*.302}
+\contentsline {paragraph}{2. Using Michele Simionato's decorator module}{191}{paragraph*.303}
+\contentsline {paragraph}{2. By hand, using a callable object}{191}{paragraph*.304}
+\contentsline {subsubsection}{Q. Can a task function in a \emph {Ruffus} pipeline be called normally outside of Ruffus?}{191}{subsubsection*.305}
+\contentsline {subsubsection}{Q. My \emph {Ruffus} tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?}{191}{subsubsection*.306}
+\contentsline {subsubsection}{Q. How can a \emph {Ruffus} task produce output which goes off in different directions?}{192}{subsubsection*.307}
+\contentsline {subsubsection}{Q. Can I call extra code before each job?}{193}{subsubsection*.308}
+\contentsline {subsubsection}{Q. Does \emph {Ruffus} allow checkpointing: to distinguish interrupted and completed results?}{194}{subsubsection*.309}
+\contentsline {paragraph}{A. Use the builtin sqlite checkpointing}{194}{paragraph*.310}
+\contentsline {paragraph}{A. Use a flag file}{194}{paragraph*.311}
+\contentsline {paragraph}{A. Use a temp file}{195}{paragraph*.312}
+\contentsline {subsection}{\numberline {2.12.4}Windows}{196}{subsection.2.12.4}
+\contentsline {subsubsection}{Q. Windows seems to spawn \emph {ruffus} processes recursively}{196}{subsubsection*.313}
+\contentsline {subsection}{\numberline {2.12.5}Sun Grid Engine / PBS / SLURM etc}{196}{subsection.2.12.5}
+\contentsline {subsubsection}{Q. Can Ruffus be used to manage a cluster or grid based pipeline?}{196}{subsubsection*.314}
+\contentsline {subsubsection}{Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies}{197}{subsubsection*.315}
+\contentsline {subsubsection}{Q. Keeping Large intermediate files}{197}{subsubsection*.316}
+\contentsline {subsection}{\numberline {2.12.6}Sharing python objects between Ruffus processes running concurrently}{197}{subsection.2.12.6}
+\contentsline {subsubsection}{Can ordinary python objects be shared between processes?}{198}{subsubsection*.317}
+\contentsline {subsubsection}{Why am I getting \texttt {PicklingError}?}{198}{subsubsection*.318}
+\contentsline {subsubsection}{How about synchronising python objects in real time?}{199}{subsubsection*.319}
+\contentsline {subsubsection}{Can I share and synchronise my own python classes via proxies?}{199}{subsubsection*.320}
+\contentsline {subsubsection}{How do I send python objects back and forth without tangling myself in horrible synchronisation code?}{200}{subsubsection*.321}
+\contentsline {subsubsection}{How do I share large amounts of data efficiently across processes?}{200}{subsubsection*.322}
+\contentsline {section}{\numberline {2.13}Glossary}{200}{section.2.13}
+\contentsline {section}{\numberline {2.14}Hall of Fame: User contributed flowcharts}{201}{section.2.14}
+\contentsline {subsection}{\numberline {2.14.1}RNASeq pipeline}{201}{subsection.2.14.1}
+\contentsline {subsection}{\numberline {2.14.2}non-coding evolutionary constraints}{203}{subsection.2.14.2}
+\contentsline {subsection}{\numberline {2.14.3}SNP annotation}{203}{subsection.2.14.3}
+\contentsline {subsection}{\numberline {2.14.4}Chip-Seq analysis}{204}{subsection.2.14.4}
+\contentsline {section}{\numberline {2.15}Why \emph {Ruffus}?}{204}{section.2.15}
+\contentsline {chapter}{\numberline {3}Examples}{207}{chapter.3}
+\contentsline {section}{\numberline {3.1}Construction of a simple pipeline to run BLAST jobs}{207}{section.3.1}
+\contentsline {subsection}{\numberline {3.1.1}Overview}{207}{subsection.3.1.1}
+\contentsline {subsection}{\numberline {3.1.2}Prerequisites}{207}{subsection.3.1.2}
+\contentsline {subsubsection}{1. Ruffus}{207}{subsubsection*.329}
+\contentsline {subsubsection}{2. BLAST}{208}{subsubsection*.330}
+\contentsline {subsubsection}{3. human refseq sequence database}{208}{subsubsection*.331}
+\contentsline {subsubsection}{4. test sequences}{208}{subsubsection*.332}
+\contentsline {subsection}{\numberline {3.1.3}Code}{208}{subsection.3.1.3}
+\contentsline {subsection}{\numberline {3.1.4}Step 1. Splitting up the query sequences}{208}{subsection.3.1.4}
+\contentsline {subsection}{\numberline {3.1.5}Step 2. Run BLAST jobs in parallel}{209}{subsection.3.1.5}
+\contentsline {subsection}{\numberline {3.1.6}Step 3. Combining BLAST results}{209}{subsection.3.1.6}
+\contentsline {subsection}{\numberline {3.1.7}Step 4. Running the pipeline}{210}{subsection.3.1.7}
+\contentsline {subsection}{\numberline {3.1.8}Step 5. Testing dependencies}{210}{subsection.3.1.8}
+\contentsline {subsection}{\numberline {3.1.9}What is next?}{211}{subsection.3.1.9}
+\contentsline {section}{\numberline {3.2}Part 2: A slightly more practical pipeline to run blasts jobs}{211}{section.3.2}
+\contentsline {subsection}{\numberline {3.2.1}Overview}{211}{subsection.3.2.1}
+\contentsline {subsection}{\numberline {3.2.2}Step 1. Cleaning up any leftover junk from previous pipeline runs}{212}{subsection.3.2.2}
+\contentsline {subsection}{\numberline {3.2.3}Step 2. Adding a ``flag'' file to mark successful completion}{212}{subsection.3.2.3}
+\contentsline {subsection}{\numberline {3.2.4}Step 3. Allowing the script to be invoked on the command line}{213}{subsection.3.2.4}
+\contentsline {subsection}{\numberline {3.2.5}Step 4. Printing out a flowchart for the pipeline}{213}{subsection.3.2.5}
+\contentsline {subsection}{\numberline {3.2.6}Step 5. Errors}{213}{subsection.3.2.6}
+\contentsline {subsection}{\numberline {3.2.7}Step 6. Will it run?}{214}{subsection.3.2.7}
+\contentsline {section}{\numberline {3.3}Ruffus code}{215}{section.3.3}
+\contentsline {section}{\numberline {3.4}Ruffus code}{216}{section.3.4}
+\contentsline {section}{\numberline {3.5}Example code for \emph {FAQ Good practices: ``What is the best way of handling data in file pairs (or triplets etc.)?''}}{220}{section.3.5}
+\contentsline {chapter}{\numberline {4}Reference:}{223}{chapter.4}
+\contentsline {section}{\numberline {4.1}Decorators}{223}{section.4.1}
+\contentsline {subsection}{\numberline {4.1.1}Ruffus Decorators}{223}{subsection.4.1.1}
+\contentsline {subsubsection}{\emph {Core}}{223}{subsubsection*.333}
+\contentsline {subsubsection}{\emph {Combinatorics}}{225}{subsubsection*.334}
+\contentsline {subsubsection}{\emph {Advanced}}{227}{subsubsection*.335}
+\contentsline {subsubsection}{\emph {Esoteric!}}{228}{subsubsection*.336}
+\contentsline {subsection}{\numberline {4.1.2}Indicator Objects}{228}{subsection.4.1.2}
+\contentsline {subsubsection}{\emph {formatter}}{228}{subsubsection*.337}
+\contentsline {subsubsection}{\emph {suffix}}{231}{subsubsection*.338}
+\contentsline {subsubsection}{\emph {regex}}{232}{subsubsection*.339}
+\contentsline {subsubsection}{\emph {add\_inputs}}{232}{subsubsection*.340}
+\contentsline {subsubsection}{\emph {inputs}}{233}{subsubsection*.341}
+\contentsline {subsubsection}{\emph {mkdir}}{234}{subsubsection*.342}
+\contentsline {subsubsection}{\emph {touch\_file}}{234}{subsubsection*.343}
+\contentsline {subsubsection}{\emph {output\_from}}{235}{subsubsection*.344}
+\contentsline {subsubsection}{\emph {combine}}{235}{subsubsection*.345}
+\contentsline {subsection}{\numberline {4.1.3}@originate}{237}{subsection.4.1.3}
+\contentsline {subsubsection}{\emph {@originate} ( \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.349}
+\contentsline {subsection}{\numberline {4.1.4}@split}{237}{subsection.4.1.4}
+\contentsline {subsubsection}{\emph {@split} ( \emph {tasks\_or\_file\_names}, \emph {output\_files}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.357}
+\contentsline {subsection}{\numberline {4.1.5}@split with \texttt {regex(...)}, \texttt {add\_inputs} and \texttt {inputs}}{237}{subsection.4.1.5}
+\contentsline {subsection}{\numberline {4.1.6}@transform}{237}{subsection.4.1.6}
+\contentsline {subsubsection}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.372}
+\contentsline {subsection}{\numberline {4.1.7}@merge}{237}{subsection.4.1.7}
+\contentsline {subsubsection}{\emph {@merge} ( \emph {tasks\_or\_file\_names}, \emph {output\_file}, {[}\emph {extra\_parameters},...{]} )}{237}{subsubsection*.384}
+\contentsline {subsection}{\numberline {4.1.8}@subdivide}{239}{subsection.4.1.8}
+\contentsline {subsubsection}{\emph {@subdivide} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[} \emph {inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs} \emph {(}\emph {input\_pattern\_or\_glob}\emph {)} {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.399}
+\contentsline {subsection}{\numberline {4.1.9}@transform with \texttt {add\_inputs} and \texttt {inputs}}{239}{subsection.4.1.9}
+\contentsline {subsubsection}{\emph {@transform} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {inputs} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.419}
+\contentsline {subsection}{\numberline {4.1.10}@collate}{239}{subsection.4.1.10}
+\contentsline {subsubsection}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.436}
+\contentsline {subsection}{\numberline {4.1.11}@collate with \texttt {add\_inputs} and \texttt {inputs}}{239}{subsection.4.1.11}
+\contentsline {subsubsection}{\emph {@collate} ( \emph {tasks\_or\_file\_names}, \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)} \textbar {} \emph {add\_inputs}\emph {(}\emph {input\_pattern\_or\_glob}\emph {)}{]} , \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{239}{subsubsection*.453}
+\contentsline {subsection}{\numberline {4.1.12}@graphviz}{239}{subsection.4.1.12}
+\contentsline {subsubsection}{\emph {@graphviz} ( \emph {graphviz\_parameters},...{]} )}{239}{subsubsection*.461}
+\contentsline {subsection}{\numberline {4.1.13}@mkdir}{239}{subsection.4.1.13}
+\contentsline {subsubsection}{\emph {@mkdir} ( \emph {tasks\_or\_file\_names}, \emph {suffix}\emph {(}\emph {suffix\_string}\emph {)}\textbar {} \emph {regex}\emph {(}\emph {matching\_regex}\emph {)} \textbar {} \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern})}{239}{subsubsection*.472}
+\contentsline {subsection}{\numberline {4.1.14}@jobs\_limit}{239}{subsection.4.1.14}
+\contentsline {subsubsection}{\emph {@jobs\_limit} ( \emph {maximum\_num\_of\_jobs}, {[} \emph {name} {]})}{239}{subsubsection*.481}
+\contentsline {subsection}{\numberline {4.1.15}@posttask}{239}{subsection.4.1.15}
+\contentsline {subsubsection}{\emph {@posttask} (\emph {function} \textbar {} \emph {touch\_file}\emph {(}\emph {file\_name}\emph {)})}{239}{subsubsection*.487}
+\contentsline {subsection}{\numberline {4.1.16}@active\_if}{239}{subsection.4.1.16}
+\contentsline {subsubsection}{\emph {@active\_if}(on\_or\_off1, {[}on\_or\_off2,...{]})}{239}{subsubsection*.491}
+\contentsline {subsection}{\numberline {4.1.17}@follows}{239}{subsection.4.1.17}
+\contentsline {subsubsection}{\emph {@follows}(\emph {task} \textbar {} \emph {``task\_name''} \textbar {} \emph {mkdir} (\emph {directory\_name}), {[}more\_tasks, ...{]})}{239}{subsubsection*.499}
+\contentsline {subsection}{\numberline {4.1.18}@product}{241}{subsection.4.1.18}
+\contentsline {subsubsection}{\emph {@product} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, {[}\emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, ... {]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.510}
+\contentsline {subsection}{\numberline {4.1.19}@permutations}{241}{subsection.4.1.19}
+\contentsline {subsubsection}{\emph {@permutations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.522}
+\contentsline {subsection}{\numberline {4.1.20}@combinations}{241}{subsection.4.1.20}
+\contentsline {subsubsection}{\emph {@combinations} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.534}
+\contentsline {subsection}{\numberline {4.1.21}@combinations\_with\_replacement}{241}{subsection.4.1.21}
+\contentsline {subsubsection}{\emph {@combinations\_with\_replacement} ( \emph {tasks\_or\_file\_names}, \emph {formatter}\emph {(}\emph {matching\_formatter}\emph {)}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]} )}{241}{subsubsection*.546}
+\contentsline {subsection}{\numberline {4.1.22}Generating parameters on the fly for @files}{243}{subsection.4.1.22}
+\contentsline {subsubsection}{\emph {@files} (\emph {custom\_function})}{243}{subsubsection*.552}
+\contentsline {subsection}{\numberline {4.1.23}@check\_if\_uptodate}{243}{subsection.4.1.23}
+\contentsline {subsubsection}{\emph {@check\_if\_uptodate} (\emph {dependency\_checking\_function})}{243}{subsubsection*.555}
+\contentsline {subsection}{\numberline {4.1.24}@parallel}{243}{subsection.4.1.24}
+\contentsline {subsubsection}{\emph {@parallel} ( {[} {[}\emph {job\_params}, ...{]}, {[}\emph {job\_params}, ...{]}...{]} \textbar {} \emph {parameter\_generating\_function})}{243}{subsubsection*.560}
+\contentsline {subsection}{\numberline {4.1.25}@files}{245}{subsection.4.1.25}
+\contentsline {subsubsection}{\emph {@files} (\emph {input1}, \emph {output1}, {[}\emph {extra\_parameters1}, ...{]})}{245}{subsubsection*.574}
+\contentsline {paragraph}{@files for single jobs}{245}{paragraph*.575}
+\contentsline {subsubsection}{\emph {@files} ( \emph {((} \emph {input}, \emph {output}, {[}\emph {extra\_parameters},...{]} \emph {), (...), ...)} )}{245}{subsubsection*.579}
+\contentsline {paragraph}{@files in parallel}{245}{paragraph*.580}
+\contentsline {subsection}{\numberline {4.1.26}@files\_re}{245}{subsection.4.1.26}
+\contentsline {subsubsection}{\emph {@files\_re} (\emph {tasks\_or\_file\_names}, \emph {matching\_regex}, {[}\emph {input\_pattern}{]}, \emph {output\_pattern}, {[}\emph {extra\_parameters},...{]})}{245}{subsubsection*.594}
+\contentsline {paragraph}{Legacy design now deprecated. We suggest using \emph {@transform()} instead}{245}{paragraph*.595}
+\contentsline {section}{\numberline {4.2}Modules:}{246}{section.4.2}
+\contentsline {subsection}{\numberline {4.2.1}ruffus.Task}{246}{subsection.4.2.1}
+\contentsline {subsubsection}{Decorators}{246}{subsubsection*.601}
+\contentsline {subsubsection}{Pipeline functions}{246}{subsubsection*.602}
+\contentsline {paragraph}{pipeline\_run}{246}{paragraph*.603}
+\contentsline {paragraph}{pipeline\_printout}{247}{paragraph*.605}
+\contentsline {paragraph}{pipeline\_printout\_graph}{248}{paragraph*.607}
+\contentsline {subsubsection}{Logging}{249}{subsubsection*.609}
+\contentsline {subsubsection}{Implementation:}{249}{subsubsection*.612}
+\contentsline {paragraph}{Parameter factories:}{249}{paragraph*.613}
+\contentsline {paragraph}{Wrappers around jobs:}{250}{paragraph*.620}
+\contentsline {paragraph}{Checking if job is update:}{250}{paragraph*.624}
+\contentsline {subsubsection}{Exceptions and Errors}{251}{subsubsection*.627}
+\contentsline {subsection}{\numberline {4.2.2}ruffus.proxy\_logger}{251}{subsection.4.2.2}
+\contentsline {subsubsection}{Create proxy for logging for use with multiprocessing}{251}{subsubsection*.631}
+\contentsline {paragraph}{Example 1}{251}{paragraph*.632}
+\contentsline {paragraph}{Example 2}{251}{paragraph*.633}
+\contentsline {paragraph}{Example 3}{251}{paragraph*.634}
+\contentsline {paragraph}{To use:}{252}{paragraph*.635}
+\contentsline {subsubsection}{Proxies for a log:}{252}{subsubsection*.636}
+\contentsline {subsubsection}{Create a logging object}{252}{subsubsection*.638}
+\contentsline {chapter}{\numberline {5}Indices and tables}{255}{chapter.5}
+\contentsline {chapter}{Python Module Index}{257}{section*.640}
diff --git a/doc/_build/latex/simple_tutorial_complex_flowchart.png b/doc/_build/latex/simple_tutorial_complex_flowchart.png
new file mode 100644
index 0000000..63b4d85
Binary files /dev/null and b/doc/_build/latex/simple_tutorial_complex_flowchart.png differ
diff --git a/doc/_build/latex/simple_tutorial_complex_flowchart_error.png b/doc/_build/latex/simple_tutorial_complex_flowchart_error.png
new file mode 100644
index 0000000..cb1f604
Binary files /dev/null and b/doc/_build/latex/simple_tutorial_complex_flowchart_error.png differ
diff --git a/doc/_build/latex/simple_tutorial_stage5_after.png b/doc/_build/latex/simple_tutorial_stage5_after.png
new file mode 100644
index 0000000..ac66986
Binary files /dev/null and b/doc/_build/latex/simple_tutorial_stage5_after.png differ
diff --git a/doc/_build/latex/simple_tutorial_stage5_before.png b/doc/_build/latex/simple_tutorial_stage5_before.png
new file mode 100644
index 0000000..96d5a4b
Binary files /dev/null and b/doc/_build/latex/simple_tutorial_stage5_before.png differ
diff --git a/doc/_build/latex/simple_tutorial_stage5_flowchart.png b/doc/_build/latex/simple_tutorial_stage5_flowchart.png
new file mode 100644
index 0000000..3714afb
Binary files /dev/null and b/doc/_build/latex/simple_tutorial_stage5_flowchart.png differ
diff --git a/doc/_build/latex/simple_tutorial_zoo_animals_formatter_example.jpg b/doc/_build/latex/simple_tutorial_zoo_animals_formatter_example.jpg
new file mode 100644
index 0000000..cfbfcc4
Binary files /dev/null and b/doc/_build/latex/simple_tutorial_zoo_animals_formatter_example.jpg differ
diff --git a/doc/_build/latex/sphinx.sty b/doc/_build/latex/sphinx.sty
new file mode 100644
index 0000000..9b083cc
--- /dev/null
+++ b/doc/_build/latex/sphinx.sty
@@ -0,0 +1,520 @@
+%
+% sphinx.sty
+%
+% Adapted from the old python.sty, mostly written by Fred Drake,
+% by Georg Brandl.
+%
+
+\NeedsTeXFormat{LaTeX2e}[1995/12/01]
+\ProvidesPackage{sphinx}[2010/01/15 LaTeX package (Sphinx markup)]
+
+\@ifclassloaded{memoir}{}{\RequirePackage{fancyhdr}}
+
+\RequirePackage{textcomp}
+\RequirePackage{fancybox}
+\RequirePackage{titlesec}
+\RequirePackage{tabulary}
+\RequirePackage{amsmath} % for \text
+\RequirePackage{makeidx}
+\RequirePackage{framed}
+\RequirePackage{ifthen}
+\RequirePackage{color}
+% For highlighted code.
+\RequirePackage{fancyvrb}
+% For table captions.
+\RequirePackage{threeparttable}
+% Handle footnotes in tables.
+\RequirePackage{footnote}
+\makesavenoteenv{tabulary}
+% For floating figures in the text.
+\RequirePackage{wrapfig}
+% Separate paragraphs by space by default.
+\RequirePackage{parskip}
+
+% Redefine these colors to your liking in the preamble.
+\definecolor{TitleColor}{rgb}{0.126,0.263,0.361}
+\definecolor{InnerLinkColor}{rgb}{0.208,0.374,0.486}
+\definecolor{OuterLinkColor}{rgb}{0.216,0.439,0.388}
+% Redefine these colors to something not white if you want to have colored
+% background and border for code examples.
+\definecolor{VerbatimColor}{rgb}{1,1,1}
+\definecolor{VerbatimBorderColor}{rgb}{1,1,1}
+
+% Uncomment these two lines to ignore the paper size and make the page
+% size more like a typical published manual.
+%\renewcommand{\paperheight}{9in}
+%\renewcommand{\paperwidth}{8.5in} % typical squarish manual
+%\renewcommand{\paperwidth}{7in} % O'Reilly ``Programmming Python''
+
+% use pdfoutput for pTeX and dvipdfmx
+\ifx\kanjiskip\undefined\else
+ \ifx\Gin at driver{dvipdfmx.def}\undefined\else
+ \newcount\pdfoutput\pdfoutput=0
+ \fi
+\fi
+
+% For graphicx, check if we are compiling under latex or pdflatex.
+\ifx\pdftexversion\undefined
+ \usepackage{graphicx}
+\else
+ \usepackage[pdftex]{graphicx}
+\fi
+
+% for PDF output, use colors and maximal compression
+\newif\ifsphinxpdfoutput\sphinxpdfoutputfalse
+\ifx\pdfoutput\undefined\else\ifcase\pdfoutput
+ \let\py at NormalColor\relax
+ \let\py at TitleColor\relax
+\else
+ \sphinxpdfoutputtrue
+ \input{pdfcolor}
+ \def\py at NormalColor{\color[rgb]{0.0,0.0,0.0}}
+ \def\py at TitleColor{\color{TitleColor}}
+ \pdfcompresslevel=9
+\fi\fi
+
+% XeLaTeX can do colors, too
+\ifx\XeTeXrevision\undefined\else
+ \def\py at NormalColor{\color[rgb]{0.0,0.0,0.0}}
+ \def\py at TitleColor{\color{TitleColor}}
+\fi
+
+% Increase printable page size (copied from fullpage.sty)
+\topmargin 0pt
+\advance \topmargin by -\headheight
+\advance \topmargin by -\headsep
+
+% attempt to work a little better for A4 users
+\textheight \paperheight
+\advance\textheight by -2in
+
+\oddsidemargin 0pt
+\evensidemargin 0pt
+%\evensidemargin -.25in % for ``manual size'' documents
+\marginparwidth 0.5in
+
+\textwidth \paperwidth
+\advance\textwidth by -2in
+
+
+% Style parameters and macros used by most documents here
+\raggedbottom
+\sloppy
+\hbadness = 5000 % don't print trivial gripes
+
+\pagestyle{empty} % start this way
+
+% Use this to set the font family for headers and other decor:
+\newcommand{\py at HeaderFamily}{\sffamily\bfseries}
+
+% Redefine the 'normal' header/footer style when using "fancyhdr" package:
+\@ifundefined{fancyhf}{}{
+ % Use \pagestyle{normal} as the primary pagestyle for text.
+ \fancypagestyle{normal}{
+ \fancyhf{}
+ \fancyfoot[LE,RO]{{\py at HeaderFamily\thepage}}
+ \fancyfoot[LO]{{\py at HeaderFamily\nouppercase{\rightmark}}}
+ \fancyfoot[RE]{{\py at HeaderFamily\nouppercase{\leftmark}}}
+ \fancyhead[LE,RO]{{\py at HeaderFamily \@title, \py at release}}
+ \renewcommand{\headrulewidth}{0.4pt}
+ \renewcommand{\footrulewidth}{0.4pt}
+ % define chaptermark with \@chappos when \@chappos is available for Japanese
+ \ifx\@chappos\undefined\else
+ \def\chaptermark##1{\markboth{\@chapapp\space\thechapter\space\@chappos\space ##1}{}}
+ \fi
+ }
+ % Update the plain style so we get the page number & footer line,
+ % but not a chapter or section title. This is to keep the first
+ % page of a chapter and the blank page between chapters `clean.'
+ \fancypagestyle{plain}{
+ \fancyhf{}
+ \fancyfoot[LE,RO]{{\py at HeaderFamily\thepage}}
+ \renewcommand{\headrulewidth}{0pt}
+ \renewcommand{\footrulewidth}{0.4pt}
+ }
+}
+
+% Some custom font markup commands.
+%
+\newcommand{\strong}[1]{{\textbf{#1}}}
+\newcommand{\code}[1]{\texttt{#1}}
+\newcommand{\bfcode}[1]{\code{\bfseries#1}}
+\newcommand{\email}[1]{\textsf{#1}}
+
+% Redefine the Verbatim environment to allow border and background colors.
+% The original environment is still used for verbatims within tables.
+\let\OriginalVerbatim=\Verbatim
+\let\endOriginalVerbatim=\endVerbatim
+
+% Play with vspace to be able to keep the indentation.
+\newlength\distancetoright
+\def\mycolorbox#1{%
+ \setlength\distancetoright{\linewidth}%
+ \advance\distancetoright -\@totalleftmargin %
+ \fcolorbox{VerbatimBorderColor}{VerbatimColor}{%
+ \begin{minipage}{\distancetoright}%
+ #1
+ \end{minipage}%
+ }%
+}
+\def\FrameCommand{\mycolorbox}
+
+\renewcommand{\Verbatim}[1][1]{%
+ % list starts new par, but we don't want it to be set apart vertically
+ \bgroup\parskip=0pt%
+ \smallskip%
+ % The list environement is needed to control perfectly the vertical
+ % space.
+ \list{}{%
+ \setlength\parskip{0pt}%
+ \setlength\itemsep{0ex}%
+ \setlength\topsep{0ex}%
+ \setlength\partopsep{0pt}%
+ \setlength\leftmargin{0pt}%
+ }%
+ \item\MakeFramed {\FrameRestore}%
+ \small%
+ \OriginalVerbatim[#1]%
+}
+\renewcommand{\endVerbatim}{%
+ \endOriginalVerbatim%
+ \endMakeFramed%
+ \endlist%
+ % close group to restore \parskip
+ \egroup%
+}
+
+
+% \moduleauthor{name}{email}
+\newcommand{\moduleauthor}[2]{}
+
+% \sectionauthor{name}{email}
+\newcommand{\sectionauthor}[2]{}
+
+% Augment the sectioning commands used to get our own font family in place,
+% and reset some internal data items:
+\titleformat{\section}{\Large\py at HeaderFamily}%
+ {\py at TitleColor\thesection}{0.5em}{\py at TitleColor}{\py at NormalColor}
+\titleformat{\subsection}{\large\py at HeaderFamily}%
+ {\py at TitleColor\thesubsection}{0.5em}{\py at TitleColor}{\py at NormalColor}
+\titleformat{\subsubsection}{\py at HeaderFamily}%
+ {\py at TitleColor\thesubsubsection}{0.5em}{\py at TitleColor}{\py at NormalColor}
+\titleformat{\paragraph}{\small\py at HeaderFamily}%
+ {\py at TitleColor}{0em}{\py at TitleColor}{\py at NormalColor}
+
+% {fulllineitems} is the main environment for object descriptions.
+%
+\newcommand{\py at itemnewline}[1]{%
+ \@tempdima\linewidth%
+ \advance\@tempdima \leftmargin\makebox[\@tempdima][l]{#1}%
+}
+
+\newenvironment{fulllineitems}{
+ \begin{list}{}{\labelwidth \leftmargin \labelsep 0pt
+ \rightmargin 0pt \topsep -\parskip \partopsep \parskip
+ \itemsep -\parsep
+ \let\makelabel=\py at itemnewline}
+}{\end{list}}
+
+% \optional is used for ``[, arg]``, i.e. desc_optional nodes.
+\newcommand{\optional}[1]{%
+ {\textnormal{\Large[}}{#1}\hspace{0.5mm}{\textnormal{\Large]}}}
+
+\newlength{\py at argswidth}
+\newcommand{\py at sigparams}[2]{%
+ \parbox[t]{\py at argswidth}{#1\code{)}#2}}
+\newcommand{\pysigline}[1]{\item[#1]\nopagebreak}
+\newcommand{\pysiglinewithargsret}[3]{%
+ \settowidth{\py at argswidth}{#1\code{(}}%
+ \addtolength{\py at argswidth}{-2\py at argswidth}%
+ \addtolength{\py at argswidth}{\linewidth}%
+ \item[#1\code{(}\py at sigparams{#2}{#3}]}
+
+% Production lists
+%
+\newenvironment{productionlist}{
+% \def\optional##1{{\Large[}##1{\Large]}}
+ \def\production##1##2{\\\code{##1}&::=&\code{##2}}
+ \def\productioncont##1{\\& &\code{##1}}
+ \parindent=2em
+ \indent
+ \begin{tabular}{lcl}
+}{%
+ \end{tabular}
+}
+
+% Notices / Admonitions
+%
+\newlength{\py at noticelength}
+
+\newcommand{\py at heavybox}{
+ \setlength{\fboxrule}{1pt}
+ \setlength{\fboxsep}{6pt}
+ \setlength{\py at noticelength}{\linewidth}
+ \addtolength{\py at noticelength}{-2\fboxsep}
+ \addtolength{\py at noticelength}{-2\fboxrule}
+ %\setlength{\shadowsize}{3pt}
+ \noindent\Sbox
+ \minipage{\py at noticelength}
+}
+\newcommand{\py at endheavybox}{
+ \endminipage
+ \endSbox
+ \fbox{\TheSbox}
+}
+
+\newcommand{\py at lightbox}{{%
+ \setlength\parskip{0pt}\par
+ \noindent\rule[0ex]{\linewidth}{0.5pt}%
+ \par\noindent\vspace{-0.5ex}%
+ }}
+\newcommand{\py at endlightbox}{{%
+ \setlength{\parskip}{0pt}%
+ \par\noindent\rule[0.5ex]{\linewidth}{0.5pt}%
+ \par\vspace{-0.5ex}%
+ }}
+
+% Some are quite plain:
+\newcommand{\py at noticestart@note}{\py at lightbox}
+\newcommand{\py at noticeend@note}{\py at endlightbox}
+\newcommand{\py at noticestart@hint}{\py at lightbox}
+\newcommand{\py at noticeend@hint}{\py at endlightbox}
+\newcommand{\py at noticestart@important}{\py at lightbox}
+\newcommand{\py at noticeend@important}{\py at endlightbox}
+\newcommand{\py at noticestart@tip}{\py at lightbox}
+\newcommand{\py at noticeend@tip}{\py at endlightbox}
+
+% Others gets more visible distinction:
+\newcommand{\py at noticestart@warning}{\py at heavybox}
+\newcommand{\py at noticeend@warning}{\py at endheavybox}
+\newcommand{\py at noticestart@caution}{\py at heavybox}
+\newcommand{\py at noticeend@caution}{\py at endheavybox}
+\newcommand{\py at noticestart@attention}{\py at heavybox}
+\newcommand{\py at noticeend@attention}{\py at endheavybox}
+\newcommand{\py at noticestart@danger}{\py at heavybox}
+\newcommand{\py at noticeend@danger}{\py at endheavybox}
+\newcommand{\py at noticestart@error}{\py at heavybox}
+\newcommand{\py at noticeend@error}{\py at endheavybox}
+
+\newenvironment{notice}[2]{
+ \def\py at noticetype{#1}
+ \csname py at noticestart@#1\endcsname
+ \strong{#2}
+}{\csname py at noticeend@\py at noticetype\endcsname}
+
+% Allow the release number to be specified independently of the
+% \date{}. This allows the date to reflect the document's date and
+% release to specify the release that is documented.
+%
+\newcommand{\py at release}{}
+\newcommand{\version}{}
+\newcommand{\shortversion}{}
+\newcommand{\releaseinfo}{}
+\newcommand{\releasename}{Release}
+\newcommand{\release}[1]{%
+ \renewcommand{\py at release}{\releasename\space\version}%
+ \renewcommand{\version}{#1}}
+\newcommand{\setshortversion}[1]{%
+ \renewcommand{\shortversion}{#1}}
+\newcommand{\setreleaseinfo}[1]{%
+ \renewcommand{\releaseinfo}{#1}}
+
+% Allow specification of the author's address separately from the
+% author's name. This can be used to format them differently, which
+% is a good thing.
+%
+\newcommand{\py at authoraddress}{}
+\newcommand{\authoraddress}[1]{\renewcommand{\py at authoraddress}{#1}}
+
+% This sets up the fancy chapter headings that make the documents look
+% at least a little better than the usual LaTeX output.
+%
+\@ifundefined{ChTitleVar}{}{
+ \ChNameVar{\raggedleft\normalsize\py at HeaderFamily}
+ \ChNumVar{\raggedleft \bfseries\Large\py at HeaderFamily}
+ \ChTitleVar{\raggedleft \textrm{\Huge\py at HeaderFamily}}
+ % This creates chapter heads without the leading \vspace*{}:
+ \def\@makechapterhead#1{%
+ {\parindent \z@ \raggedright \normalfont
+ \ifnum \c at secnumdepth >\m at ne
+ \DOCH
+ \fi
+ \interlinepenalty\@M
+ \DOTI{#1}
+ }
+ }
+}
+
+% Redefine description environment so that it is usable inside fulllineitems.
+%
+\renewcommand{\description}{%
+ \list{}{\labelwidth\z@%
+ \itemindent-\leftmargin%
+ \labelsep5pt%
+ \let\makelabel=\descriptionlabel}}
+
+% Definition lists; requested by AMK for HOWTO documents. Probably useful
+% elsewhere as well, so keep in in the general style support.
+%
+\newenvironment{definitions}{%
+ \begin{description}%
+ \def\term##1{\item[##1]\mbox{}\\*[0mm]}
+}{%
+ \end{description}%
+}
+
+% Tell TeX about pathological hyphenation cases:
+\hyphenation{Base-HTTP-Re-quest-Hand-ler}
+
+
+% The following is stuff copied from docutils' latex writer.
+%
+\newcommand{\optionlistlabel}[1]{\bf #1 \hfill}
+\newenvironment{optionlist}[1]
+{\begin{list}{}
+ {\setlength{\labelwidth}{#1}
+ \setlength{\rightmargin}{1cm}
+ \setlength{\leftmargin}{\rightmargin}
+ \addtolength{\leftmargin}{\labelwidth}
+ \addtolength{\leftmargin}{\labelsep}
+ \renewcommand{\makelabel}{\optionlistlabel}}
+}{\end{list}}
+
+\newlength{\lineblockindentation}
+\setlength{\lineblockindentation}{2.5em}
+\newenvironment{lineblock}[1]
+{\begin{list}{}
+ {\setlength{\partopsep}{\parskip}
+ \addtolength{\partopsep}{\baselineskip}
+ \topsep0pt\itemsep0.15\baselineskip\parsep0pt
+ \leftmargin#1}
+ \raggedright}
+{\end{list}}
+
+% Redefine includgraphics for avoiding images larger than the screen size
+% If the size is not specified.
+\let\py at Oldincludegraphics\includegraphics
+
+\newbox\image at box%
+\newdimen\image at width%
+\renewcommand\includegraphics[2][\@empty]{%
+ \ifx#1\@empty%
+ \setbox\image at box=\hbox{\py at Oldincludegraphics{#2}}%
+ \image at width\wd\image at box%
+ \ifdim \image at width>\linewidth%
+ \setbox\image at box=\hbox{\py at Oldincludegraphics[width=\linewidth]{#2}}%
+ \box\image at box%
+ \else%
+ \py at Oldincludegraphics{#2}%
+ \fi%
+ \else%
+ \py at Oldincludegraphics[#1]{#2}%
+ \fi%
+}
+
+% to make pdf with correct encoded bookmarks in Japanese
+% this should precede the hyperref package
+\ifx\kanjiskip\undefined\else
+ \usepackage{atbegshi}
+ \ifx\ucs\undefined
+ \ifnum 42146=\euc"A4A2
+ \AtBeginShipoutFirst{\special{pdf:tounicode EUC-UCS2}}
+ \else
+ \AtBeginShipoutFirst{\special{pdf:tounicode 90ms-RKSJ-UCS2}}
+ \fi
+ \else
+ \AtBeginShipoutFirst{\special{pdf:tounicode UTF8-UCS2}}
+ \fi
+\fi
+
+% Include hyperref last.
+\RequirePackage[colorlinks,breaklinks,
+ linkcolor=InnerLinkColor,filecolor=OuterLinkColor,
+ menucolor=OuterLinkColor,urlcolor=OuterLinkColor,
+ citecolor=InnerLinkColor]{hyperref}
+% Fix anchor placement for figures with captions.
+% (Note: we don't use a package option here; instead, we give an explicit
+% \capstart for figures that actually have a caption.)
+\RequirePackage{hypcap}
+
+% From docutils.writers.latex2e
+\providecommand{\DUspan}[2]{%
+ {% group ("span") to limit the scope of styling commands
+ \@for\node at class@name:=#1\do{%
+ \ifcsname docutilsrole\node at class@name\endcsname%
+ \csname docutilsrole\node at class@name\endcsname%
+ \fi%
+ }%
+ {#2}% node content
+ }% close "span"
+}
+
+\providecommand*{\DUprovidelength}[2]{
+ \ifthenelse{\isundefined{#1}}{\newlength{#1}\setlength{#1}{#2}}{}
+}
+
+\DUprovidelength{\DUlineblockindent}{2.5em}
+\ifthenelse{\isundefined{\DUlineblock}}{
+ \newenvironment{DUlineblock}[1]{%
+ \list{}{\setlength{\partopsep}{\parskip}
+ \addtolength{\partopsep}{\baselineskip}
+ \setlength{\topsep}{0pt}
+ \setlength{\itemsep}{0.15\baselineskip}
+ \setlength{\parsep}{0pt}
+ \setlength{\leftmargin}{#1}}
+ \raggedright
+ }
+ {\endlist}
+}{}
+
+
+% From footmisc.sty: allows footnotes in titles
+\let\FN at sf@@footnote\footnote
+\def\footnote{\ifx\protect\@typeset at protect
+ \expandafter\FN at sf@@footnote
+ \else
+ \expandafter\FN at sf@gobble at opt
+ \fi
+}
+\edef\FN at sf@gobble at opt{\noexpand\protect
+ \expandafter\noexpand\csname FN at sf@gobble at opt \endcsname}
+\expandafter\def\csname FN at sf@gobble at opt \endcsname{%
+ \@ifnextchar[%]
+ \FN at sf@gobble at twobracket
+ \@gobble
+}
+\def\FN at sf@gobble at twobracket[#1]#2{}
+
+% adjust the margins for footer,
+% this works with the jsclasses only (Japanese standard document classes)
+\ifx\@jsc at uplatextrue\undefined\else
+ \hypersetup{setpagesize=false}
+ \setlength\footskip{2\baselineskip}
+ \addtolength{\textheight}{-2\baselineskip}
+\fi
+
+% fix the double index and bibliography on the table of contents
+% in jsclasses (Japanese standard document classes)
+\ifx\@jsc at uplatextrue\undefined\else
+ \renewcommand{\theindex}{
+ \cleardoublepage
+ \phantomsection
+ \py at OldTheindex
+ }
+ \renewcommand{\thebibliography}[1]{
+ \cleardoublepage
+ \phantomsection
+ \py at OldThebibliography{1}
+ }
+\fi
+
+% do not use \@chappos in Appendix in pTeX
+\ifx\kanjiskip\undefined\else
+ \renewcommand{\appendix}{\par
+ \setcounter{chapter}{0}
+ \setcounter{section}{0}
+ \gdef\@chapapp{\appendixname}
+ \gdef\@chappos{}
+ \gdef\thechapter{\@Alph\c at chapter}
+ }
+\fi
diff --git a/doc/_build/latex/sphinxhowto.cls b/doc/_build/latex/sphinxhowto.cls
new file mode 100644
index 0000000..26e63a7
--- /dev/null
+++ b/doc/_build/latex/sphinxhowto.cls
@@ -0,0 +1,104 @@
+%
+% sphinxhowto.cls for Sphinx (http://sphinx-doc.org/)
+%
+
+\NeedsTeXFormat{LaTeX2e}[1995/12/01]
+\ProvidesClass{sphinxhowto}[2009/06/02 Document class (Sphinx HOWTO)]
+
+% 'oneside' option overriding the 'twoside' default
+\newif\if at oneside
+\DeclareOption{oneside}{\@onesidetrue}
+% Pass remaining document options to the parent class.
+\DeclareOption*{\PassOptionsToClass{\CurrentOption}{\sphinxdocclass}}
+\ProcessOptions\relax
+
+% Default to two-side document
+\if at oneside
+% nothing to do (oneside is the default)
+\else
+\PassOptionsToClass{twoside}{\sphinxdocclass}
+\fi
+
+\LoadClass{\sphinxdocclass}
+
+% Set some sane defaults for section numbering depth and TOC depth. You can
+% reset these counters in your preamble.
+%
+\setcounter{secnumdepth}{2}
+
+% Change the title page to look a bit better, and fit in with the fncychap
+% ``Bjarne'' style a bit better.
+%
+\renewcommand{\maketitle}{
+ \rule{\textwidth}{1pt}
+ \ifsphinxpdfoutput
+ \begingroup
+ % These \defs are required to deal with multi-line authors; it
+ % changes \\ to ', ' (comma-space), making it pass muster for
+ % generating document info in the PDF file.
+ \def\\{, }
+ \def\and{and }
+ \pdfinfo{
+ /Author (\@author)
+ /Title (\@title)
+ }
+ \endgroup
+ \fi
+ \begin{flushright}
+ \sphinxlogo%
+ {\rm\Huge\py at HeaderFamily \@title} \par
+ {\em\large\py at HeaderFamily \py at release\releaseinfo} \par
+ \vspace{25pt}
+ {\Large\py at HeaderFamily
+ \begin{tabular}[t]{c}
+ \@author
+ \end{tabular}} \par
+ \vspace{25pt}
+ \@date \par
+ \py at authoraddress \par
+ \end{flushright}
+ \@thanks
+ \setcounter{footnote}{0}
+ \let\thanks\relax\let\maketitle\relax
+ %\gdef\@thanks{}\gdef\@author{}\gdef\@title{}
+}
+
+\let\py at OldTableofcontents=\tableofcontents
+\renewcommand{\tableofcontents}{
+ \begingroup
+ \parskip = 0mm
+ \py at OldTableofcontents
+ \endgroup
+ \rule{\textwidth}{1pt}
+ \vspace{12pt}
+}
+
+\@ifundefined{fancyhf}{
+ \pagestyle{plain}}{
+ \pagestyle{normal}} % start this way; change for
+\pagenumbering{arabic} % ToC & chapters
+
+\thispagestyle{empty}
+
+% Fix the bibliography environment to add an entry to the Table of
+% Contents.
+% For an article document class this environment is a section,
+% so no page break before it.
+\let\py at OldThebibliography=\thebibliography
+\renewcommand{\thebibliography}[1]{
+ \phantomsection
+ \py at OldThebibliography{1}
+ \addcontentsline{toc}{section}{\bibname}
+}
+
+% Same for the indices.
+% The memoir class already does this, so we don't duplicate it in that case.
+%
+\@ifclassloaded{memoir}{}{
+ \let\py at OldTheindex=\theindex
+ \renewcommand{\theindex}{
+ \phantomsection
+ \py at OldTheindex
+ \addcontentsline{toc}{section}{\indexname}
+ }
+}
diff --git a/doc/_build/latex/sphinxmanual.cls b/doc/_build/latex/sphinxmanual.cls
new file mode 100644
index 0000000..26df488
--- /dev/null
+++ b/doc/_build/latex/sphinxmanual.cls
@@ -0,0 +1,147 @@
+%
+% sphinxmanual.cls for Sphinx (http://sphinx-doc.org/)
+%
+
+\NeedsTeXFormat{LaTeX2e}[1995/12/01]
+\ProvidesClass{sphinxmanual}[2009/06/02 Document class (Sphinx manual)]
+
+% chapters starting at odd pages (overridden by 'openany' document option)
+\PassOptionsToClass{openright}{\sphinxdocclass}
+
+% 'oneside' option overriding the 'twoside' default
+\newif\if at oneside
+\DeclareOption{oneside}{\@onesidetrue}
+% Pass remaining document options to the parent class.
+\DeclareOption*{\PassOptionsToClass{\CurrentOption}{\sphinxdocclass}}
+\ProcessOptions\relax
+
+% Defaults two-side document
+\if at oneside
+% nothing to do (oneside is the default)
+\else
+\PassOptionsToClass{twoside}{\sphinxdocclass}
+\fi
+
+\LoadClass{\sphinxdocclass}
+
+% Set some sane defaults for section numbering depth and TOC depth. You can
+% reset these counters in your preamble.
+%
+\setcounter{secnumdepth}{2}
+\setcounter{tocdepth}{1}
+
+% Change the title page to look a bit better, and fit in with the fncychap
+% ``Bjarne'' style a bit better.
+%
+\renewcommand{\maketitle}{%
+ \begin{titlepage}%
+ \let\footnotesize\small
+ \let\footnoterule\relax
+ \rule{\textwidth}{1pt}%
+ \ifsphinxpdfoutput
+ \begingroup
+ % These \defs are required to deal with multi-line authors; it
+ % changes \\ to ', ' (comma-space), making it pass muster for
+ % generating document info in the PDF file.
+ \def\\{, }
+ \def\and{and }
+ \pdfinfo{
+ /Author (\@author)
+ /Title (\@title)
+ }
+ \endgroup
+ \fi
+ \begin{flushright}%
+ \sphinxlogo%
+ {\rm\Huge\py at HeaderFamily \@title \par}%
+ {\em\LARGE\py at HeaderFamily \py at release\releaseinfo \par}
+ \vfill
+ {\LARGE\py at HeaderFamily
+ \begin{tabular}[t]{c}
+ \@author
+ \end{tabular}
+ \par}
+ \vfill\vfill
+ {\large
+ \@date \par
+ \vfill
+ \py at authoraddress \par
+ }%
+ \end{flushright}%\par
+ \@thanks
+ \end{titlepage}%
+ \cleardoublepage%
+ \setcounter{footnote}{0}%
+ \let\thanks\relax\let\maketitle\relax
+ %\gdef\@thanks{}\gdef\@author{}\gdef\@title{}
+}
+
+
+% Catch the end of the {abstract} environment, but here make sure the abstract
+% is followed by a blank page if the 'openright' option is used.
+%
+\let\py at OldEndAbstract=\endabstract
+\renewcommand{\endabstract}{
+ \if at openright
+ \ifodd\value{page}
+ \typeout{Adding blank page after the abstract.}
+ \vfil\pagebreak
+ \fi
+ \fi
+ \py at OldEndAbstract
+}
+
+% This wraps the \tableofcontents macro with all the magic to get the spacing
+% right and have the right number of pages if the 'openright' option has been
+% used. This eliminates a fair amount of crud in the individual document files.
+%
+\let\py at OldTableofcontents=\tableofcontents
+\renewcommand{\tableofcontents}{%
+ \setcounter{page}{1}%
+ \pagebreak%
+ \pagestyle{plain}%
+ {%
+ \parskip = 0mm%
+ \py at OldTableofcontents%
+ \if at openright%
+ \ifodd\value{page}%
+ \typeout{Adding blank page after the table of contents.}%
+ \pagebreak\hspace{0pt}%
+ \fi%
+ \fi%
+ \cleardoublepage%
+ }%
+ \pagenumbering{arabic}%
+ \@ifundefined{fancyhf}{}{\pagestyle{normal}}%
+}
+\pagenumbering{roman}
+
+% This is needed to get the width of the section # area wide enough in the
+% library reference. Doing it here keeps it the same for all the manuals.
+%
+\renewcommand*\l at section{\@dottedtocline{1}{1.5em}{2.6em}}
+\renewcommand*\l at subsection{\@dottedtocline{2}{4.1em}{3.5em}}
+
+% Fix the bibliography environment to add an entry to the Table of
+% Contents.
+% For a report document class this environment is a chapter.
+\let\py at OldThebibliography=\thebibliography
+\renewcommand{\thebibliography}[1]{
+ \cleardoublepage
+ \phantomsection
+ \py at OldThebibliography{1}
+ \addcontentsline{toc}{chapter}{\bibname}
+}
+
+% Same for the indices.
+% The memoir class already does this, so we don't duplicate it in that case.
+%
+\@ifclassloaded{memoir}{}{
+ \let\py at OldTheindex=\theindex
+ \renewcommand{\theindex}{
+ \cleardoublepage
+ \phantomsection
+ \py at OldTheindex
+ \addcontentsline{toc}{chapter}{\indexname}
+ }
+}
diff --git a/doc/_build/latex/tabulary.sty b/doc/_build/latex/tabulary.sty
new file mode 100644
index 0000000..ba83c0a
--- /dev/null
+++ b/doc/_build/latex/tabulary.sty
@@ -0,0 +1,452 @@
+%%
+%% This is file `tabulary.sty',
+%% generated with the docstrip utility.
+%%
+%% The original source files were:
+%%
+%% tabulary.dtx (with options: `package')
+%% DRAFT VERSION
+%%
+%% File `tabulary.dtx'.
+%% Copyright (C) 1995 1996 2003 David Carlisle
+%% This file may be distributed under the terms of the LPPL.
+%% See 00readme.txt for details.
+%%
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{tabulary}
+ [2007/10/02 v0.9 tabulary package (DPC)]
+\RequirePackage{array}
+\catcode`\Z=14
+\DeclareOption{debugshow}{\catcode`\Z=9\relax}
+\ProcessOptions
+\def\arraybackslash{\let\\=\@arraycr}
+\def\@finalstrut#1{%
+ \unskip\ifhmode\nobreak\fi\vrule\@width\z@\@height\z@\@depth\dp#1}
+\newcount\TY at count
+\def\tabulary{%
+ \let\TY at final\tabular
+ \let\endTY at final\endtabular
+ \TY at tabular}
+\def\TY at tabular#1{%
+ \edef\TY@{\@currenvir}%
+ {\ifnum0=`}\fi
+ \@ovxx\TY at linewidth
+ \@ovyy\TY at tablewidth
+ \count@\z@
+ \@tempswatrue
+ \@whilesw\if at tempswa\fi{%
+ \advance\count@\@ne
+ \expandafter\ifx\csname TY at F\the\count@\endcsname\relax
+ \@tempswafalse
+ \else
+ \expandafter\let\csname TY at SF\the\count@\expandafter\endcsname
+ \csname TY at F\the\count@\endcsname
+ \global\expandafter\let\csname TY at F\the\count@\endcsname\relax
+ \expandafter\let\csname TY at S\the\count@\expandafter\endcsname
+ \csname TY@\the\count@\endcsname
+ \fi}%
+ \global\TY at count\@ne
+ \TY at width\xdef{0pt}%
+ \global\TY at tablewidth\z@
+ \global\TY at linewidth#1\relax
+Z\message{^^J^^JTable^^J%
+Z Target Width: \the\TY at linewidth^^J%
+Z \string\tabcolsep: \the\tabcolsep\space
+Z \string\arrayrulewidth: \the\arrayrulewidth\space
+Z \string\doublerulesep: \the\doublerulesep^^J%
+Z \string\tymin: \the\tymin\space
+Z \string\tymax: \the\tymax^^J}%
+ \let\@classz\TY at classz
+ \let\verb\TX at verb
+ \toks@{}\TY at get@body}
+\let\TY@@mkpream\@mkpream
+\def\TY at mkpream{%
+ \def\@addamp{%
+ \if at firstamp \@firstampfalse \else
+ \global\advance\TY at count\@ne
+ \edef\@preamble{\@preamble &}\fi
+ \TY at width\xdef{0pt}}%
+ \def\@acol{%
+ \TY at subwidth\col at sep
+ \@addtopreamble{\hskip\col at sep}}%
+ \let\@arrayrule\TY at arrayrule
+ \let\@classvi\TY at classvi
+ \def\@classv{\save at decl
+ \expandafter\NC at ecs\@nextchar\extracolsep{}\extracolsep\@@@
+ \sbox\z@{\d at llarbegin\@nextchar\d at llarend}%
+ \TY at subwidth{\wd\z@}%
+ \@addtopreamble{\d at llarbegin\the at toks\the\count@\relax\d at llarend}%
+ \prepnext at tok}%
+ \global\let\@mkpream\TY@@mkpream
+ \TY@@mkpream}
+\def\TY at arrayrule{%
+ \TY at subwidth\arrayrulewidth
+ \@addtopreamble \vline}
+\def\TY at classvi{\ifcase \@lastchclass
+ \@acol \or
+ \TY at subwidth\doublerulesep
+ \@addtopreamble{\hskip \doublerulesep}\or
+ \@acol \or
+ \@classvii
+ \fi}
+\def\TY at tab{%
+ \setbox\z@\hbox\bgroup
+ \let\[$\let\]$%
+ \let\equation$\let\endequation$%
+ \col at sep\tabcolsep
+ \let\d at llarbegin\begingroup\let\d at llarend\endgroup
+ \let\@mkpream\TY at mkpream
+ \def\multicolumn##1##2##3{\multispan##1\relax}%
+ \CT at start\TY at tabarray}
+\def\TY at tabarray{\@ifnextchar[{\TY at array}{\@array[t]}}
+\def\TY at array[#1]{\@array[t]}
+\def\TY at width#1{%
+ \expandafter#1\csname TY@\the\TY at count\endcsname}
+\def\TY at subwidth#1{%
+ \TY at width\dimen@
+ \advance\dimen at -#1\relax
+ \TY at width\xdef{\the\dimen@}%
+ \global\advance\TY at linewidth-#1\relax}
+\def\endtabulary{%
+ \gdef\@halignto{}%
+ \let\TY at footnote\footnote%
+ \def\footnote{}% prevent footnotes from doing anything
+ \expandafter\TY at tab\the\toks@
+ \crcr\omit
+ {\xdef\TY at save@row{}%
+ \loop
+ \advance\TY at count\m at ne
+ \ifnum\TY at count>\z@
+ \xdef\TY at save@row{\TY at save@row&\omit}%
+ \repeat}\TY at save@row
+ \endarray\global\setbox1=\lastbox\setbox0=\vbox{\unvbox1
+ \unskip\global\setbox1=\lastbox}\egroup
+ \dimen@\TY at linewidth
+ \divide\dimen@\TY at count
+ \ifdim\dimen@<\tymin
+ \TY at warn{tymin too large (\the\tymin), resetting to \the\dimen@}%
+ \tymin\dimen@
+ \fi
+ \setbox\tw@=\hbox{\unhbox\@ne
+ \loop
+\@tempdima=\lastskip
+\ifdim\@tempdima>\z@
+Z \message{ecs=\the\@tempdima^^J}%
+ \global\advance\TY at linewidth-\@tempdima
+\fi
+ \unskip
+ \setbox\tw@=\lastbox
+ \ifhbox\tw@
+Z \message{Col \the\TY at count: Initial=\the\wd\tw@\space}%
+ \ifdim\wd\tw@>\tymax
+ \wd\tw@\tymax
+Z \message{> max\space}%
+Z \else
+Z \message{ \@spaces\space}%
+ \fi
+ \TY at width\dimen@
+Z \message{\the\dimen@\space}%
+ \advance\dimen@\wd\tw@
+Z \message{Final=\the\dimen@\space}%
+ \TY at width\xdef{\the\dimen@}%
+ \ifdim\dimen@<\tymin
+Z \message{< tymin}%
+ \global\advance\TY at linewidth-\dimen@
+ \expandafter\xdef\csname TY at F\the\TY at count\endcsname
+ {\the\dimen@}%
+ \else
+ \expandafter\ifx\csname TY at F\the\TY at count\endcsname\z@
+Z \message{***}%
+ \global\advance\TY at linewidth-\dimen@
+ \expandafter\xdef\csname TY at F\the\TY at count\endcsname
+ {\the\dimen@}%
+ \else
+Z \message{> tymin}%
+ \global\advance\TY at tablewidth\dimen@
+ \global\expandafter\let\csname TY at F\the\TY at count\endcsname
+ \maxdimen
+ \fi\fi
+ \advance\TY at count\m at ne
+ \repeat}%
+ \TY at checkmin
+ \TY at checkmin
+ \TY at checkmin
+ \TY at checkmin
+ \TY at count\z@
+ \let\TY at box\TY at box@v
+ \let\footnote\TY at footnote % restore footnotes
+ {\expandafter\TY at final\the\toks@\endTY at final}%
+ \count@\z@
+ \@tempswatrue
+ \@whilesw\if at tempswa\fi{%
+ \advance\count@\@ne
+ \expandafter\ifx\csname TY at SF\the\count@\endcsname\relax
+ \@tempswafalse
+ \else
+ \global\expandafter\let\csname TY at F\the\count@\expandafter\endcsname
+ \csname TY at SF\the\count@\endcsname
+ \global\expandafter\let\csname TY@\the\count@\expandafter\endcsname
+ \csname TY at S\the\count@\endcsname
+ \fi}%
+ \TY at linewidth\@ovxx
+ \TY at tablewidth\@ovyy
+ \ifnum0=`{\fi}}
+\def\TY at checkmin{%
+ \let\TY at checkmin\relax
+\ifdim\TY at tablewidth>\z@
+ \Gscale at div\TY at ratio\TY at linewidth\TY at tablewidth
+ \ifdim\TY at tablewidth <\linewidth
+ \def\TY at ratio{1}%
+ \fi
+\else
+ \TY at warn{No suitable columns!}%
+ \def\TY at ratio{1}%
+\fi
+\count@\z@
+Z \message{^^JLine Width: \the\TY at linewidth,
+Z Natural Width: \the\TY at tablewidth,
+Z Ratio: \TY at ratio^^J}%
+\@tempdima\z@
+\loop
+\ifnum\count@<\TY at count
+\advance\count@\@ne
+ \ifdim\csname TY at F\the\count@\endcsname>\tymin
+ \dimen@\csname TY@\the\count@\endcsname
+ \dimen@\TY at ratio\dimen@
+ \ifdim\dimen@<\tymin
+Z \message{Column \the\count@\space ->}%
+ \global\expandafter\let\csname TY at F\the\count@\endcsname\tymin
+ \global\advance\TY at linewidth-\tymin
+ \global\advance\TY at tablewidth-\csname TY@\the\count@\endcsname
+ \let\TY at checkmin\TY@@checkmin
+ \else
+ \expandafter\xdef\csname TY at F\the\count@\endcsname{\the\dimen@}%
+ \advance\@tempdima\csname TY at F\the\count@\endcsname
+ \fi
+ \fi
+Z \dimen@\csname TY at F\the\count@\endcsname\message{\the\dimen@, }%
+\repeat
+Z \message{^^JTotal:\the\@tempdima^^J}%
+}
+\let\TY@@checkmin\TY at checkmin
+\newdimen\TY at linewidth
+\def\tyformat{\everypar{{\nobreak\hskip\z at skip}}}
+\newdimen\tymin
+\tymin=10pt
+\newdimen\tymax
+\tymax=2\textwidth
+\def\@testpach{\@chclass
+ \ifnum \@lastchclass=6 \@ne \@chnum \@ne \else
+ \ifnum \@lastchclass=7 5 \else
+ \ifnum \@lastchclass=8 \tw@ \else
+ \ifnum \@lastchclass=9 \thr@@
+ \else \z@
+ \ifnum \@lastchclass = 10 \else
+ \edef\@nextchar{\expandafter\string\@nextchar}%
+ \@chnum
+ \if \@nextchar c\z@ \else
+ \if \@nextchar l\@ne \else
+ \if \@nextchar r\tw@ \else
+ \if \@nextchar C7 \else
+ \if \@nextchar L8 \else
+ \if \@nextchar R9 \else
+ \if \@nextchar J10 \else
+ \z@ \@chclass
+ \if\@nextchar |\@ne \else
+ \if \@nextchar !6 \else
+ \if \@nextchar @7 \else
+ \if \@nextchar <8 \else
+ \if \@nextchar >9 \else
+ 10
+ \@chnum
+ \if \@nextchar m\thr@@\else
+ \if \@nextchar p4 \else
+ \if \@nextchar b5 \else
+ \z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi\fi \fi \fi\fi \fi
+ \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi}
+\def\TY at classz{%
+ \@classx
+ \@tempcnta\count@
+ \ifx\TY at box\TY at box@v
+ \global\advance\TY at count\@ne
+ \fi
+ \let\centering c%
+ \let\raggedright\noindent
+ \let\raggedleft\indent
+ \let\arraybackslash\relax
+ \prepnext at tok
+ \ifnum\@chnum<4
+ \global\expandafter\let\csname TY at F\the\TY at count\endcsname\z@
+ \fi
+ \ifnum\@chnum=6
+ \global\expandafter\let\csname TY at F\the\TY at count\endcsname\z@
+ \fi
+ \@addtopreamble{%
+ \ifcase\@chnum
+ \hfil \d at llarbegin\insert at column\d at llarend \hfil \or
+ \kern\z@
+ \d at llarbegin \insert at column \d at llarend \hfil \or
+ \hfil\kern\z@ \d at llarbegin \insert at column \d at llarend \or
+ $\vcenter\@startpbox{\@nextchar}\insert at column \@endpbox $\or
+ \vtop \@startpbox{\@nextchar}\insert at column \@endpbox \or
+ \vbox \@startpbox{\@nextchar}\insert at column \@endpbox \or
+ \d at llarbegin \insert at column \d at llarend \or% dubious "s" case
+ \TY at box\centering\or
+ \TY at box\raggedright\or
+ \TY at box\raggedleft\or
+ \TY at box\relax
+ \fi}\prepnext at tok}
+\def\TY at box#1{%
+ \ifx\centering#1%
+ \hfil \d at llarbegin\insert at column\d at llarend \hfil \else
+ \ifx\raggedright#1%
+ \kern\z@%<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+ \d at llarbegin \insert at column \d at llarend \hfil \else
+ \ifx\raggedleft#1%
+ \hfil\kern\z@ \d at llarbegin \insert at column \d at llarend \else
+ \ifx\relax#1%
+ \d at llarbegin \insert at column \d at llarend
+ \fi \fi \fi \fi}
+\def\TY at box@v#1{%
+ \vtop \@startpbox{\csname TY at F\the\TY at count\endcsname}%
+ #1\arraybackslash\tyformat
+ \insert at column\@endpbox}
+\newdimen\TY at tablewidth
+\def\Gscale at div#1#2#3{%
+ \setlength\dimen@{#3}%
+ \ifdim\dimen@=\z@
+ \PackageError{graphics}{Division by 0}\@eha
+ \dimen@#2%
+ \fi
+ \edef\@tempd{\the\dimen@}%
+ \setlength\dimen@{#2}%
+ \count at 65536\relax
+ \ifdim\dimen@<\z@
+ \dimen at -\dimen@
+ \count at -\count@
+ \fi
+ \loop
+ \ifdim\dimen@<8192\p@
+ \dimen@\tw@\dimen@
+ \divide\count@\tw@
+ \repeat
+ \dimen at ii=\@tempd\relax
+ \divide\dimen at ii\count@
+ \divide\dimen@\dimen at ii
+ \edef#1{\strip at pt\dimen@}}
+\long\def\TY at get@body#1\end
+ {\toks@\expandafter{\the\toks@#1}\TY at find@end}
+\def\TY at find@end#1{%
+ \def\@tempa{#1}%
+ \ifx\@tempa\TY@\def\@tempa{\end{#1}}\expandafter\@tempa
+ \else\toks@\expandafter
+ {\the\toks@\end{#1}}\expandafter\TY at get@body\fi}
+\def\TY at warn{%
+ \PackageWarning{tabulary}}
+\catcode`\Z=11
+\AtBeginDocument{
+\@ifpackageloaded{colortbl}{%
+\expandafter\def\expandafter\@mkpream\expandafter#\expandafter1%
+ \expandafter{%
+ \expandafter\let\expandafter\CT at setup\expandafter\relax
+ \expandafter\let\expandafter\CT at color\expandafter\relax
+ \expandafter\let\expandafter\CT at do@color\expandafter\relax
+ \expandafter\let\expandafter\color\expandafter\relax
+ \expandafter\let\expandafter\CT at column@color\expandafter\relax
+ \expandafter\let\expandafter\CT at row@color\expandafter\relax
+ \@mkpream{#1}}
+\let\TY@@mkpream\@mkpream
+\def\TY at classz{%
+ \@classx
+ \@tempcnta\count@
+ \ifx\TY at box\TY at box@v
+ \global\advance\TY at count\@ne
+ \fi
+ \let\centering c%
+ \let\raggedright\noindent
+ \let\raggedleft\indent
+ \let\arraybackslash\relax
+ \prepnext at tok
+\expandafter\CT at extract\the\toks\@tempcnta\columncolor!\@nil
+ \ifnum\@chnum<4
+ \global\expandafter\let\csname TY at F\the\TY at count\endcsname\z@
+ \fi
+ \ifnum\@chnum=6
+ \global\expandafter\let\csname TY at F\the\TY at count\endcsname\z@
+ \fi
+ \@addtopreamble{%
+ \setbox\z@\hbox\bgroup\bgroup
+ \ifcase\@chnum
+ \hskip\stretch{.5}\kern\z@
+ \d at llarbegin\insert at column\d at llarend\hskip\stretch{.5}\or
+ \kern\z@%<<<<<<<<<<<<<<<<<<<<<<<<<<<
+ \d at llarbegin \insert at column \d at llarend \hfill \or
+ \hfill\kern\z@ \d at llarbegin \insert at column \d at llarend \or
+ $\vcenter\@startpbox{\@nextchar}\insert at column \@endpbox $\or
+ \vtop \@startpbox{\@nextchar}\insert at column \@endpbox \or
+ \vbox \@startpbox{\@nextchar}\insert at column \@endpbox \or
+ \d at llarbegin \insert at column \d at llarend \or% dubious s case
+ \TY at box\centering\or
+ \TY at box\raggedright\or
+ \TY at box\raggedleft\or
+ \TY at box\relax
+ \fi
+ \egroup\egroup
+\begingroup
+ \CT at setup
+ \CT at column@color
+ \CT at row@color
+ \CT at do@color
+\endgroup
+ \@tempdima\ht\z@
+ \advance\@tempdima\minrowclearance
+ \vrule\@height\@tempdima\@width\z@
+\unhbox\z@
+}\prepnext at tok}%
+ \def\TY at arrayrule{%
+ \TY at subwidth\arrayrulewidth
+ \@addtopreamble{{\CT at arc@\vline}}}%
+ \def\TY at classvi{\ifcase \@lastchclass
+ \@acol \or
+ \TY at subwidth\doublerulesep
+ \ifx\CT at drsc@\relax
+ \@addtopreamble{\hskip\doublerulesep}%
+ \else
+ \@addtopreamble{{\CT at drsc@\vrule\@width\doublerulesep}}%
+ \fi\or
+ \@acol \or
+ \@classvii
+ \fi}%
+}{%
+\let\CT at start\relax
+}
+}
+{\uccode`\*=`\ %
+\uppercase{\gdef\TX at verb{%
+ \leavevmode\null\TX at vwarn
+ {\ifnum0=`}\fi\ttfamily\let\\\ignorespaces
+ \@ifstar{\let~*\TX at vb}{\TX at vb}}}}
+\def\TX at vb#1{\def\@tempa##1#1{\toks@{##1}\edef\@tempa{\the\toks@}%
+ \expandafter\TX at v\meaning\@tempa\\ \\\ifnum0=`{\fi}}\@tempa!}
+\def\TX at v#1!{\afterassignment\TX at vfirst\let\@tempa= }
+\begingroup
+\catcode`\*=\catcode`\#
+\catcode`\#=12
+\gdef\TX at vfirst{%
+ \if\@tempa#%
+ \def\@tempb{\TX at v@#}%
+ \else
+ \let\@tempb\TX at v@
+ \if\@tempa\space~\else\@tempa\fi
+ \fi
+ \@tempb}
+\gdef\TX at v@*1 *2{%
+ \TX at v@hash*1##\relax\if*2\\\else~\expandafter\TX at v@\fi*2}
+\gdef\TX at v@hash*1##*2{*1\ifx*2\relax\else#\expandafter\TX at v@hash\fi*2}
+\endgroup
+\def\TX at vwarn{%
+ \@warning{\noexpand\verb may be unreliable inside tabularx/y}%
+ \global\let\TX at vwarn\@empty}
+\endinput
+%%
+%% End of file `tabulary.sty'.
diff --git a/doc/_build/latex/theoretical_pipeline_schematic.png b/doc/_build/latex/theoretical_pipeline_schematic.png
new file mode 100644
index 0000000..a84cd9d
Binary files /dev/null and b/doc/_build/latex/theoretical_pipeline_schematic.png differ
diff --git a/doc/_build/latex/transform_1_to_1_example.png b/doc/_build/latex/transform_1_to_1_example.png
new file mode 100644
index 0000000..ed794d6
Binary files /dev/null and b/doc/_build/latex/transform_1_to_1_example.png differ
diff --git a/doc/_build/latex/tutorial_key.png b/doc/_build/latex/tutorial_key.png
new file mode 100644
index 0000000..a65505b
Binary files /dev/null and b/doc/_build/latex/tutorial_key.png differ
diff --git a/doc/_build/latex/tutorial_ruffus_files.jpg b/doc/_build/latex/tutorial_ruffus_files.jpg
new file mode 100644
index 0000000..00afb51
Binary files /dev/null and b/doc/_build/latex/tutorial_ruffus_files.jpg differ
diff --git a/doc/_build/latex/tutorial_step1_decorator_syntax.png b/doc/_build/latex/tutorial_step1_decorator_syntax.png
new file mode 100644
index 0000000..0fc68ab
Binary files /dev/null and b/doc/_build/latex/tutorial_step1_decorator_syntax.png differ
diff --git a/doc/_build/latex/wikimedia_bandedkrait.jpg b/doc/_build/latex/wikimedia_bandedkrait.jpg
new file mode 100644
index 0000000..e227f02
Binary files /dev/null and b/doc/_build/latex/wikimedia_bandedkrait.jpg differ
diff --git a/doc/_build/latex/wikimedia_cyl_ruffus.jpg b/doc/_build/latex/wikimedia_cyl_ruffus.jpg
new file mode 100644
index 0000000..d60e3bc
Binary files /dev/null and b/doc/_build/latex/wikimedia_cyl_ruffus.jpg differ
diff --git a/doc/_templates/index.html b/doc/_templates/index.html
new file mode 100644
index 0000000..4ade6f5
--- /dev/null
+++ b/doc/_templates/index.html
@@ -0,0 +1,160 @@
+{% extends "layout.html" %}
+{% set title = 'Ruffus' %}
+
+{% block body %}
+
+<p>
+<a href="why_ruffus.html#why-ruffus"> <img src="_images/logo.jpg" alt="logo"></a>
+</p>
+
+
+Ruffus is a Computation Pipeline library for python. It is open-sourced,
+powerful and user-friendly, and widely used in science and bioinformatics.
+
+<h1>Welcome</h1>
+
+<table class="Introduction" align="center" style="margin-left: 30px">
+<tr>
+ <td width="50%">
+ <p><I>Ruffus</I> is designed to allow scientific and other analyses to
+ be automated with the minimum of fuss and the least effort.</p>
+
+ These are <I>Ruffus</I>'s strengths:
+ <ul>
+ <li><b>Lightweight</b>: Suitable for the simplest of tasks</li>
+
+ <li><b>Scalable</b>: Handles even fiendishly complicated pipelines</a>
+ which would cause <i>make</i> or <i>scons</i> to go cross-eyed and recursive.</li>
+
+ <li><b>Standard python</b>: No "clever magic", no pre-processing. </li>
+ <li><b>Unintrusive</b>: Unambitious, lightweight syntax which tries to do this
+ one small thing well. </li>
+ </ul>
+
+ <p>Please join me (email: ruffus_lib at llew.org.uk) in setting the direction of
+ this project if you are interested.
+ </p>
+
+ </td>
+
+ <td width="50%">
+
+ <img src="_images/front_page_flowchart.png" alt="flowchart">
+
+ </td>
+
+</tr>
+</table>
+
+
+
+
+
+
+<h1> Documentation</h1>
+
+<table class="contentstable" align="center" style="margin-left: 30px">
+<tr>
+ <td width="50%">
+
+ <p class="biglink"><a class="biglink"
+ href="installation.html">Download</a><br/>
+ <span class="linkdescr">to install <I>Ruffus</I></span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="tutorials/new_tutorial/introduction.html">Simple Tutorial</a><br/>
+ <span class="linkdescr">Start here for a quick introduction to <i>Ruffus</i></span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="tutorials/new_tutorial/manual_contents.html">Manual</a>
+ <a class="biglink" href="_downloads/ruffus.pdf">(pdf)</a><br/>
+ <span class="linkdescr">for an-depth demonstration of all
+ <I>Ruffus</I> features</span></p>
+
+ </td>
+
+ <td width="50%">
+
+
+ <p class="biglink"><a class="biglink"
+ href="contents.html">Table of contents</a><br/>
+ <span class="linkdescr">for an complete listing of all the documentation</span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="faq.html">Frequently Answered Questions</a><br/>
+ <span class="linkdescr">for any common problems, clever solutions from the community</span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="design.html">Design</a><br/>
+ <span class="linkdescr">to understand the design of <I>Ruffus</I></span></p>
+
+ <p class="biglink"><a class="biglink"
+ href="cheatsheet.html">Cheat Sheet</a><br/>
+ <span class="linkdescr">for Ruffus syntax</span></p>
+ </td>
+
+</tr>
+</table>
+
+
+
+
+<h2>Get <I>Ruffus</I></h2>
+
+<p>
+ <i>Ruffus</i> is available as an <a
+ href="http://peak.telecommunity.com/DevCenter/EasyInstall">easy-install</a>able package on the <a href="http://pypi.python.org/pypi/ruffus">Python Package
+ Index</a>.
+ </p>
+Just run:
+
+<div style="margin-left: 30px" class="highlight-python"><pre>sudo pip install ruffus --upgrade</pre> or
+</div>
+
+
+<div style="margin-left: 30px" class="highlight-python"><pre>easy_install -U ruffus</pre>
+</div>
+
+<br>
+<p>
+The very latest (in development) code can be obtained via <a href="https://code.google.com/p/ruffus/source/checkout"> git </a>:
+ <div style="color: #ff0000" >
+ <pre>git clone https://bunbun68@code.google.com/p/ruffus/</pre>
+</div>
+</p>
+
+<h1>Feedback and Getting Involved:</h1>
+<ul>
+ <li>
+ The <I>Ruffus</I> project is hosted with <b><a
+ href="http://code.google.com/p/ruffus">Google
+ Code here.</a></b><br><br>
+ </li>
+
+ <li>
+ <img src="https://groups.google.com/forum/my-groups-color.png"
+ height=30 alt="Google Groups"> <b>Subscribe to the <a href="https://groups.google.com/forum/#!forum/ruffus_discuss"><i>ruffus_discuss</i> mailing list </a></b>
+ <br>
+ <form action="http://groups.google.com/group/ruffus_discuss/boxsubscribe">
+ Email: <input type=text name=email>
+ <input type=submit name="sub" value="Subscribe">
+ </form><br>
+ </li>
+
+
+ <li>
+ <a href="http://groups.google.com/group/ruffus_discuss">Check out the mailing list without subscribing here.</a>
+ <br>
+ </li>
+
+ <li>
+ <a href="http://code.google.com/p/ruffus/issues/list">Bugs or feature requests can be posted here.</a>
+ <br>
+ </li>
+
+
+</ul>
+
+
+{% endblock %}
+
diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
new file mode 100644
index 0000000..554a7c9
--- /dev/null
+++ b/doc/_templates/layout.html
@@ -0,0 +1,76 @@
+{% extends "!layout.html" %}
+
+{% block sidebarsearch %}
+ {{ super() }}
+ <h3> Quick Reference:</h3>
+ <ul>
+ <h4><a href="{{ pathto('decorators/decorators') }}#core">Core:</a></h4>
+ <ul>
+ <li><a href="{{ pathto('decorators/originate') }}">@originate</a> </li>
+ <li><a href="{{ pathto('decorators/split') }}">@split</a> </li>
+ <li><a href="{{ pathto('decorators/transform') }}">@transform</a> </li>
+ <li><a href="{{ pathto('decorators/merge') }}">@merge</a> </li>
+ </ul>
+
+ <h4><a href="{{ pathto('decorators/decorators') }}#advanced">Advanced</a></h4>
+ <ul>
+ <li><a href="{{ pathto('decorators/subdivide') }}">@subdivide</a> </li>
+ <li><a href="{{ pathto('decorators/transform_ex') }}">@transform (add_inputs) </a> </li>
+ <li><a href="{{ pathto('decorators/collate') }}">@collate</a> </li>
+ <li><a href="{{ pathto('decorators/collate_ex') }}">@collate (add_inputs)</a> </li>
+ <li><a href="{{ pathto('decorators/graphviz') }}">@graphviz</a> </li>
+ <li><a href="{{ pathto('decorators/mkdir') }}">@mkdir</a> </li>
+ <li><a href="{{ pathto('decorators/follows') }}">@follows / mkdir</a> </li>
+ <li><a href="{{ pathto('decorators/posttask') }}">@posttask touch_file</a> </li>
+ <li><a href="{{ pathto('decorators/active_if') }}">@active_if</a> </li>
+ <li><a href="{{ pathto('decorators/jobs_limit') }}">@jobs_limit</a> </li>
+ </ul>
+
+ <h4><a href="{{ pathto('decorators/decorators') }}#combinatorics">Combinatorial:</a></h4>
+ <ul>
+ <li><a href="{{ pathto('decorators/product') }}">@product </a> </li>
+ <li><a href="{{ pathto('decorators/permutations') }}">@permutations </a> </li>
+ <li><a href="{{ pathto('decorators/combinations') }}">@combinations </a> </li>
+ <li><a href="{{ pathto('decorators/combinations_with_replacement') }}">@combinations_ _with_replacement </a> </li>
+ </ul>
+
+
+ <h4><a href="{{ pathto('decorators/decorators') }}#esoteric">Esoteric</a></h4>
+ <ul>
+ <li><a href="{{ pathto('decorators/files_ex') }}">@files (on the fly)</a> </li>
+ <li><a href="{{ pathto('decorators/parallel') }}">@parallel</a> </li>
+ <li><a href="{{ pathto('decorators/check_if_uptodate') }}">@check_if_uptodate</a> </li>
+ </ul>
+
+
+ <h4 ><a href="{{ pathto('decorators/indicator_objects') }}">Indicator objects</a></h4>
+ <h4 >Pipeline functions</h4>
+ <ul>
+ <li><a href="{{ pathto('pipeline_functions') }}#pipeline-functions-pipeline-run">pipeline_run</a> </li>
+ <li><a href="{{ pathto('pipeline_functions') }}#pipeline-functions-pipeline-printout">pipeline_printout</a> </li>
+ <li><a href="{{ pathto('pipeline_functions') }}#pipeline-functions-pipeline-printout-graph">pipeline_printout_graph</a> </li>
+ <li><a href="{{ pathto('pipeline_functions') }}#pipeline-functions-pipeline-get-task-names">pipeline_get_task_names</a> </li>
+ <li><a href="{{ pathto('drmaa_wrapper_functions') }}#drmaa-wrapper-functions-drmaa_wrapper-run_job">drmaa_wrapper.run_job</a> </li>
+ </ul>
+
+ <a href="{{ pathto('todo') }}">Future plans</a>
+ </ul>
+{% endblock %}
+
+{% block rootrellink %}
+ Ruffus v. {{ release }}
+ <li><a href="{{ pathto('index') }}">Home</a> | </li>
+ <li><a href="{{ pathto('contents') }}">Contents</a> | </li>
+ <li><a href="{{ pathto('installation') }}">Install</a> | </li>
+ <li><a href="{{ pathto('tutorials/new_tutorial/introduction') }}">Manual</a> / </li>
+ <li><a href="{{ pathto('tutorials/new_tutorial/manual_contents') }}">(TOC)</a> | </li>
+ <li><a href="{{ pathto('faq') }}">FAQ</a> | </li>
+ <li><a href="{{ pathto('cheatsheet') }}">Cheat sheet</a> | </li>
+ <li><a href="{{ pathto('tutorials/new_tutorial/command_line') }}">Command Line</a> | </li>
+ <li><a href="{{ pathto('gallery') }}">Gallery</a> | </li>
+ <li><a href="{{ pathto('history') }}">Latest Changes</a> » </li>
+{% endblock %}
+
+
+
+
diff --git a/doc/cheatsheet.rst b/doc/cheatsheet.rst
new file mode 100644
index 0000000..aa5585d
--- /dev/null
+++ b/doc/cheatsheet.rst
@@ -0,0 +1,85 @@
+.. include:: global.inc
+.. _cheat_sheet:
+
+
+#####################
+Cheat Sheet
+#####################
+
+The ``ruffus`` module is a lightweight way to add support
+for running computational pipelines.
+
+| Each stage or **task** in a computational pipeline is represented by a python function
+| Each python function can be called in parallel to run multiple **jobs**.
+
+================================================
+1. Annotate functions with **Ruffus** decorators
+================================================
+
+
+******
+Core
+******
+.. csv-table::
+ :header: "Decorator", "Syntax"
+ :widths: 100, 600,1
+
+ "@originate (:ref:`Manual <new_manual.originate>`)
+ ", "
+ :ref:`@originate <decorators.originate>` ( ``output_files``, [``extra_parameters``,...] )
+ ", ""
+ "@split (:ref:`Manual <new_manual.split>`)
+ ", "
+ :ref:`@split <decorators.split>` ( ``tasks_or_file_names``, ``output_files``, [``extra_parameters``,...] )
+ ", ""
+ "@transform (:ref:`Manual <new_manual.transform>`)
+ ", "
+ | :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`suffix <decorators.transform.suffix_string>`\ *(*\ ``suffix_string``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ | :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`regex <decorators.transform.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+
+ ", ""
+ "@merge (:ref:`Manual <new_manual.merge>`)
+ ", "
+ :ref:`@merge <decorators.merge>` (``tasks_or_file_names``, ``output``, [``extra_parameters``,...] )
+ ", ""
+ "@posttask (:ref:`Manual <new_manual.posttask>`)
+ ", "
+ | :ref:`@posttask <decorators.posttask>` ( ``signal_task_completion_function`` )
+ | :ref:`@posttask <decorators.posttask>` (:ref:`touch_file <decorators.touch_file>`\ ( ``'task1.completed'`` ))
+
+ ", ""
+
+************************************************************************************************
+See :ref:`Decorators <decorators>` for a complete list of decorators
+************************************************************************************************
+
+
+
+================================================
+2. Print dependency graph if necessary
+================================================
+
+- For a graphical flowchart in ``jpg``, ``svg``, ``dot``, ``png``, ``ps``, ``gif`` formats::
+
+ pipeline_printout_graph ( "flowchart.svg")
+
+.. comment
+
+ This requires the `dot programme <http://www.graphviz.org/>`_ to be installed
+
+- For a text printout of all jobs ::
+
+ pipeline_printout()
+
+
+================================================
+3. Run the pipeline
+================================================
+
+::
+
+ pipeline_run(multiprocess = N_PARALLEL_JOBS)
+
+
+
+
diff --git a/doc/complex_dags/dot/all.dot b/doc/complex_dags/dot/all.dot
new file mode 100644
index 0000000..4efa939
--- /dev/null
+++ b/doc/complex_dags/dot/all.dot
@@ -0,0 +1,83 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.3;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[fontcolor=blue, shape=plaintext, label="task1"];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task7"];
+t0 -> t2[color=blue];
+t3[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task8"];
+t2 -> t3[color=blue];
+t4[fontcolor=blue, shape=plaintext, label="task9"];
+t3 -> t4[color=blue];
+t5[fontcolor=blue, shape=plaintext, label="task10"];
+t4 -> t5[color=blue];
+t6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task11"];
+t5 -> t6[color=blue];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t10[fontcolor=blue, shape=plaintext, label="task5"];
+t9 -> t10[color=blue];
+t11[fontcolor=blue, shape=plaintext, label="task6"];
+t10 -> t11[color=blue];
+t7[fontcolor=blue, shape=plaintext, label="task12"];
+t11 -> t7[color=blue];
+t6 -> t7[color=blue];
+t13[fontcolor=blue, shape=plaintext, label="task18"];
+t7 -> t13[color=blue];
+t19[fontcolor=blue, shape=plaintext, label="task19"];
+t13 -> t19[color=blue];
+t20[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task20"];
+t19 -> t20[color=blue];
+t21[fontcolor=blue, shape=plaintext, label="task21"];
+t20 -> t21[color=blue];
+t8 -> t21[color=blue];
+t22[fontcolor=blue, shape=plaintext, label="task22"];
+t21 -> t22[color=blue];
+t23[fontcolor=blue, shape=plaintext, label="task23"];
+t22 -> t23[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[fontcolor=blue, shape=plaintext, label="task17"];
+t16 -> t17[color=blue];
+t18[fontcolor=blue, shape=plaintext, label="task24"];
+t17 -> t18[color=blue];
+t24[color=orange, fontcolor=orange, shape=tripleoctagon, label="task25"];
+t23 -> t24[color=blue];
+t18 -> t24[color=blue];
+}
+subgraph clusterkey
+{
+style=filled;
+fontsize=30;
+color=gray90;
+label = "Key:";
+node[fontsize=10];
+k1[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=15, label="Final target"];
+k2[shape=box, style=filled, fontsize=15, fillcolor=red, label="Vicious cycle"];
+k3[fontcolor=blue, shape=plaintext, fontsize=15, label="Task to run"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Force pipeline run from this task"];
+k5[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=15, label="Up-to-date Final target"];
+k6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Up-to-date task forced to rerun"];
+k7[style=filled, color=olivedrab, label="Up-to-date task", shape=octagon, fontsize=15, fillcolor=olivedrab, fontcolor=black];
+k8[style=filled, color=gray, label="Up-to-date dependence", shape=octagon, fontsize=15, fillcolor=white, fontcolor=gray];
+k1->k2[color=red];k2->k1 [color=red];k2->k3->k4->k5[color=blue];k5->k6->k7->k8[color=gray];}
+}
diff --git a/doc/complex_dags/dot/non_dag.dot b/doc/complex_dags/dot/non_dag.dot
new file mode 100644
index 0000000..244c06b
--- /dev/null
+++ b/doc/complex_dags/dot/non_dag.dot
@@ -0,0 +1,86 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.3;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t2[fontcolor=blue, shape=plaintext, label="task7"];
+t0 -> t2[color=blue];
+t3[fontcolor=blue, shape=plaintext, label="task8"];
+t2 -> t3[color=blue];
+t4[fontcolor=blue, shape=plaintext, label="task9"];
+t3 -> t4[color=blue];
+t4 -> t5[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t9 -> t10[color=blue];
+t13[fontcolor=blue, shape=plaintext, label="task18"];
+t7 -> t13[color=blue];
+t19[fontcolor=blue, shape=plaintext, label="task19"];
+t13 -> t19[color=blue];
+t20[fontcolor=blue, shape=plaintext, label="task20"];
+t19 -> t20[color=blue];
+t21[fontcolor=blue, shape=plaintext, label="task21"];
+t20 -> t21[color=blue];
+t8 -> t21[color=blue];
+t22[fontcolor=blue, shape=plaintext, label="task22"];
+t21 -> t22[color=blue];
+t23[fontcolor=blue, shape=plaintext, label="task23"];
+t22 -> t23[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[fontcolor=blue, shape=plaintext, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[fontcolor=blue, shape=plaintext, label="task17"];
+t16 -> t17[color=blue];
+t18[fontcolor=blue, shape=plaintext, label="task24"];
+t17 -> t18[color=blue];
+t24[color=orange, fontcolor=orange, shape=tripleoctagon, label="task25"];
+t23 -> t24[color=blue];
+t18 -> t24[color=blue];
+t1[shape=box, style=filled, fillcolor=red, label="task2"];
+t0 -> t1[color=red ];
+t0[shape=box, style=filled, fillcolor=red, label="task1"];
+t1 -> t0[color=red , constraint=false];
+t5[shape=box, style=filled, fillcolor=red, label="task10"];
+t7 -> t5[color=red , constraint=false];
+t6[shape=box, style=filled, fillcolor=red, label="task11"];
+t5 -> t6[color=red ];
+t10[shape=box, style=filled, fillcolor=red, label="task5"];
+t5 -> t10[color=red ];
+t11[shape=box, style=filled, fillcolor=red, label="task6"];
+t10 -> t11[color=red ];
+t7[shape=box, style=filled, fillcolor=red, label="task12"];
+t11 -> t7[color=red , constraint=false];
+t6 -> t7[color=red ];
+}
+subgraph clusterkey
+{
+style=filled;
+fontsize=30;
+color=gray90;
+label = "Key:";
+node[fontsize=10];
+k1[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=15, label="Final target"];
+k2[shape=box, style=filled, fontsize=15, fillcolor=red, label="Vicious cycle"];
+k3[fontcolor=blue, shape=plaintext, fontsize=15, label="Task to run"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Force pipeline run from this task"];
+k5[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=15, label="Up-to-date Final target"];
+k6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Up-to-date task forced to rerun"];
+k7[style=filled, color=olivedrab, label="Up-to-date task", shape=octagon, fontsize=15, fillcolor=olivedrab, fontcolor=black];
+k8[style=filled, color=gray, label="Up-to-date dependence", shape=octagon, fontsize=15, fillcolor=white, fontcolor=gray];
+k1->k2[color=red];k2->k1 [color=red];k2->k3->k4->k5[color=blue];k5->k6->k7->k8[color=gray];}
+}
diff --git a/doc/complex_dags/dot/task17.dot b/doc/complex_dags/dot/task17.dot
new file mode 100644
index 0000000..6395d4f
--- /dev/null
+++ b/doc/complex_dags/dot/task17.dot
@@ -0,0 +1,83 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.3;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[fontcolor=blue, shape=plaintext, label="task1"];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task7"];
+t0 -> t2[color=blue];
+t3[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task8"];
+t2 -> t3[color=blue];
+t4[fontcolor=blue, shape=plaintext, label="task9"];
+t3 -> t4[color=blue];
+t5[fontcolor=blue, shape=plaintext, label="task10"];
+t4 -> t5[color=blue];
+t6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task11"];
+t5 -> t6[color=blue];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t10[fontcolor=blue, shape=plaintext, label="task5"];
+t9 -> t10[color=blue];
+t11[fontcolor=blue, shape=plaintext, label="task6"];
+t10 -> t11[color=blue];
+t7[fontcolor=blue, shape=plaintext, label="task12"];
+t11 -> t7[color=blue];
+t6 -> t7[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[color=orange, fontcolor=orange, shape=tripleoctagon, label="task17"];
+t16 -> t17[color=blue];
+t13[style=filled, color=gray, label="task18", shape=octagon, fillcolor=white, fontcolor=gray];
+t7 -> t13[color=gray, arrowtype=normal];
+t19[style=filled, color=gray, label="task19", shape=octagon, fillcolor=white, fontcolor=gray];
+t13 -> t19[color=gray, arrowtype=normal];
+t20[style=filled, color=gray, label="task20", shape=octagon, fillcolor=white, fontcolor=gray];
+t19 -> t20[color=gray, arrowtype=normal];
+t21[style=filled, color=gray, label="task21", shape=octagon, fillcolor=white, fontcolor=gray];
+t20 -> t21[color=gray, arrowtype=normal];
+t8 -> t21[color=gray, arrowtype=normal];
+t22[style=filled, color=gray, label="task22", shape=octagon, fillcolor=white, fontcolor=gray];
+t21 -> t22[color=gray, arrowtype=normal];
+t23[style=filled, color=gray, label="task23", shape=octagon, fillcolor=white, fontcolor=gray];
+t22 -> t23[color=gray, arrowtype=normal];
+t18[style=filled, color=gray, label="task24", shape=octagon, fillcolor=white, fontcolor=gray];
+t17 -> t18[color=gray, arrowtype=normal];
+t24[style=filled, color=gray, label="task25", shape=octagon, fillcolor=white, fontcolor=gray];
+t23 -> t24[color=gray, arrowtype=normal];
+t18 -> t24[color=gray, arrowtype=normal];
+}
+subgraph clusterkey
+{
+style=filled;
+fontsize=30;
+color=gray90;
+label = "Key:";
+node[fontsize=10];
+k1[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=15, label="Final target"];
+k2[shape=box, style=filled, fontsize=15, fillcolor=red, label="Vicious cycle"];
+k3[fontcolor=blue, shape=plaintext, fontsize=15, label="Task to run"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Force pipeline run from this task"];
+k5[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=15, label="Up-to-date Final target"];
+k6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Up-to-date task forced to rerun"];
+k7[style=filled, color=olivedrab, label="Up-to-date task", shape=octagon, fontsize=15, fillcolor=olivedrab, fontcolor=black];
+k8[style=filled, color=gray, label="Up-to-date dependence", shape=octagon, fontsize=15, fillcolor=white, fontcolor=gray];
+k1->k2[color=red];k2->k1 [color=red];k2->k3->k4->k5[color=blue];k5->k6->k7->k8[color=gray];}
+}
diff --git a/doc/complex_dags/dot/task17_from_task9.dot b/doc/complex_dags/dot/task17_from_task9.dot
new file mode 100644
index 0000000..22dcd21
--- /dev/null
+++ b/doc/complex_dags/dot/task17_from_task9.dot
@@ -0,0 +1,83 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.3;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[fontcolor=blue, shape=plaintext, label="task1"];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task7"];
+t0 -> t2[color=blue];
+t3[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task8"];
+t2 -> t3[color=blue];
+t4[color=blue, fontcolor=blue, shape=tripleoctagon, label="task9"];
+t3 -> t4[color=blue];
+t5[fontcolor=blue, shape=plaintext, label="task10"];
+t4 -> t5[color=blue];
+t6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task11"];
+t5 -> t6[color=blue];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t10[fontcolor=blue, shape=plaintext, label="task5"];
+t9 -> t10[color=blue];
+t11[fontcolor=blue, shape=plaintext, label="task6"];
+t10 -> t11[color=blue];
+t7[fontcolor=blue, shape=plaintext, label="task12"];
+t11 -> t7[color=blue];
+t6 -> t7[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[color=orange, fontcolor=orange, shape=tripleoctagon, label="task17"];
+t16 -> t17[color=blue];
+t13[style=filled, color=gray, label="task18", shape=octagon, fillcolor=white, fontcolor=gray];
+t7 -> t13[color=gray, arrowtype=normal];
+t19[style=filled, color=gray, label="task19", shape=octagon, fillcolor=white, fontcolor=gray];
+t13 -> t19[color=gray, arrowtype=normal];
+t20[style=filled, color=gray, label="task20", shape=octagon, fillcolor=white, fontcolor=gray];
+t19 -> t20[color=gray, arrowtype=normal];
+t21[style=filled, color=gray, label="task21", shape=octagon, fillcolor=white, fontcolor=gray];
+t20 -> t21[color=gray, arrowtype=normal];
+t8 -> t21[color=gray, arrowtype=normal];
+t22[style=filled, color=gray, label="task22", shape=octagon, fillcolor=white, fontcolor=gray];
+t21 -> t22[color=gray, arrowtype=normal];
+t23[style=filled, color=gray, label="task23", shape=octagon, fillcolor=white, fontcolor=gray];
+t22 -> t23[color=gray, arrowtype=normal];
+t18[style=filled, color=gray, label="task24", shape=octagon, fillcolor=white, fontcolor=gray];
+t17 -> t18[color=gray, arrowtype=normal];
+t24[style=filled, color=gray, label="task25", shape=octagon, fillcolor=white, fontcolor=gray];
+t23 -> t24[color=gray, arrowtype=normal];
+t18 -> t24[color=gray, arrowtype=normal];
+}
+subgraph clusterkey
+{
+style=filled;
+fontsize=30;
+color=gray90;
+label = "Key:";
+node[fontsize=10];
+k1[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=15, label="Final target"];
+k2[shape=box, style=filled, fontsize=15, fillcolor=red, label="Vicious cycle"];
+k3[fontcolor=blue, shape=plaintext, fontsize=15, label="Task to run"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Force pipeline run from this task"];
+k5[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=15, label="Up-to-date Final target"];
+k6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Up-to-date task forced to rerun"];
+k7[style=filled, color=olivedrab, label="Up-to-date task", shape=octagon, fontsize=15, fillcolor=olivedrab, fontcolor=black];
+k8[style=filled, color=gray, label="Up-to-date dependence", shape=octagon, fontsize=15, fillcolor=white, fontcolor=gray];
+k1->k2[color=red];k2->k1 [color=red];k2->k3->k4->k5[color=blue];k5->k6->k7->k8[color=gray];}
+}
diff --git a/doc/complex_dags/dot/task25_from_task9.dot b/doc/complex_dags/dot/task25_from_task9.dot
new file mode 100644
index 0000000..56a59d5
--- /dev/null
+++ b/doc/complex_dags/dot/task25_from_task9.dot
@@ -0,0 +1,83 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.3;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[fontcolor=blue, shape=plaintext, label="task1"];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task7"];
+t0 -> t2[color=blue];
+t3[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task8"];
+t2 -> t3[color=blue];
+t4[color=blue, fontcolor=blue, shape=tripleoctagon, label="task9"];
+t3 -> t4[color=blue];
+t5[fontcolor=blue, shape=plaintext, label="task10"];
+t4 -> t5[color=blue];
+t6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task11"];
+t5 -> t6[color=blue];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t10[fontcolor=blue, shape=plaintext, label="task5"];
+t9 -> t10[color=blue];
+t11[fontcolor=blue, shape=plaintext, label="task6"];
+t10 -> t11[color=blue];
+t7[fontcolor=blue, shape=plaintext, label="task12"];
+t11 -> t7[color=blue];
+t6 -> t7[color=blue];
+t13[fontcolor=blue, shape=plaintext, label="task18"];
+t7 -> t13[color=blue];
+t19[fontcolor=blue, shape=plaintext, label="task19"];
+t13 -> t19[color=blue];
+t20[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task20"];
+t19 -> t20[color=blue];
+t21[fontcolor=blue, shape=plaintext, label="task21"];
+t20 -> t21[color=blue];
+t8 -> t21[color=blue];
+t22[fontcolor=blue, shape=plaintext, label="task22"];
+t21 -> t22[color=blue];
+t23[fontcolor=blue, shape=plaintext, label="task23"];
+t22 -> t23[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[fontcolor=blue, shape=plaintext, label="task17"];
+t16 -> t17[color=blue];
+t18[fontcolor=blue, shape=plaintext, label="task24"];
+t17 -> t18[color=blue];
+t24[color=orange, fontcolor=orange, shape=tripleoctagon, label="task25"];
+t23 -> t24[color=blue];
+t18 -> t24[color=blue];
+}
+subgraph clusterkey
+{
+style=filled;
+fontsize=30;
+color=gray90;
+label = "Key:";
+node[fontsize=10];
+k1[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=15, label="Final target"];
+k2[shape=box, style=filled, fontsize=15, fillcolor=red, label="Vicious cycle"];
+k3[fontcolor=blue, shape=plaintext, fontsize=15, label="Task to run"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Force pipeline run from this task"];
+k5[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=15, label="Up-to-date Final target"];
+k6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Up-to-date task forced to rerun"];
+k7[style=filled, color=olivedrab, label="Up-to-date task", shape=octagon, fontsize=15, fillcolor=olivedrab, fontcolor=black];
+k8[style=filled, color=gray, label="Up-to-date dependence", shape=octagon, fontsize=15, fillcolor=white, fontcolor=gray];
+k1->k2[color=red];k2->k1 [color=red];k2->k3->k4->k5[color=blue];k5->k6->k7->k8[color=gray];}
+}
diff --git a/doc/complex_dags/jpg/all.jpg b/doc/complex_dags/jpg/all.jpg
new file mode 100644
index 0000000..740d132
Binary files /dev/null and b/doc/complex_dags/jpg/all.jpg differ
diff --git a/doc/complex_dags/jpg/all_sm.jpg b/doc/complex_dags/jpg/all_sm.jpg
new file mode 100644
index 0000000..445a16b
Binary files /dev/null and b/doc/complex_dags/jpg/all_sm.jpg differ
diff --git a/doc/complex_dags/jpg/non_dag.jpg b/doc/complex_dags/jpg/non_dag.jpg
new file mode 100644
index 0000000..6d89976
Binary files /dev/null and b/doc/complex_dags/jpg/non_dag.jpg differ
diff --git a/doc/complex_dags/jpg/non_dag_sm.jpg b/doc/complex_dags/jpg/non_dag_sm.jpg
new file mode 100644
index 0000000..d07de50
Binary files /dev/null and b/doc/complex_dags/jpg/non_dag_sm.jpg differ
diff --git a/doc/complex_dags/jpg/task17.jpg b/doc/complex_dags/jpg/task17.jpg
new file mode 100644
index 0000000..8674c53
Binary files /dev/null and b/doc/complex_dags/jpg/task17.jpg differ
diff --git a/doc/complex_dags/jpg/task17_from_task9.jpg b/doc/complex_dags/jpg/task17_from_task9.jpg
new file mode 100644
index 0000000..8f148b2
Binary files /dev/null and b/doc/complex_dags/jpg/task17_from_task9.jpg differ
diff --git a/doc/complex_dags/jpg/task17_from_task9_sm.jpg b/doc/complex_dags/jpg/task17_from_task9_sm.jpg
new file mode 100644
index 0000000..1add029
Binary files /dev/null and b/doc/complex_dags/jpg/task17_from_task9_sm.jpg differ
diff --git a/doc/complex_dags/jpg/task17_sm.jpg b/doc/complex_dags/jpg/task17_sm.jpg
new file mode 100644
index 0000000..cf4e1eb
Binary files /dev/null and b/doc/complex_dags/jpg/task17_sm.jpg differ
diff --git a/doc/complex_dags/jpg/task25_from_task9.jpg b/doc/complex_dags/jpg/task25_from_task9.jpg
new file mode 100644
index 0000000..a70f217
Binary files /dev/null and b/doc/complex_dags/jpg/task25_from_task9.jpg differ
diff --git a/doc/complex_dags/jpg/task25_from_task9_sm.jpg b/doc/complex_dags/jpg/task25_from_task9_sm.jpg
new file mode 100644
index 0000000..83060b0
Binary files /dev/null and b/doc/complex_dags/jpg/task25_from_task9_sm.jpg differ
diff --git a/doc/complex_dags/png/all.png b/doc/complex_dags/png/all.png
new file mode 100644
index 0000000..3007c66
Binary files /dev/null and b/doc/complex_dags/png/all.png differ
diff --git a/doc/complex_dags/png/non_dag.png b/doc/complex_dags/png/non_dag.png
new file mode 100644
index 0000000..023b706
Binary files /dev/null and b/doc/complex_dags/png/non_dag.png differ
diff --git a/doc/complex_dags/png/task17.png b/doc/complex_dags/png/task17.png
new file mode 100644
index 0000000..57399b2
Binary files /dev/null and b/doc/complex_dags/png/task17.png differ
diff --git a/doc/complex_dags/png/task17_from_task9.png b/doc/complex_dags/png/task17_from_task9.png
new file mode 100644
index 0000000..f3a68cd
Binary files /dev/null and b/doc/complex_dags/png/task17_from_task9.png differ
diff --git a/doc/complex_dags/png/task25_from_task9.png b/doc/complex_dags/png/task25_from_task9.png
new file mode 100644
index 0000000..80fa2c0
Binary files /dev/null and b/doc/complex_dags/png/task25_from_task9.png differ
diff --git a/doc/complex_dags/svg/all.svg b/doc/complex_dags/svg/all.svg
new file mode 100644
index 0000000..c9fc3c8
--- /dev/null
+++ b/doc/complex_dags/svg/all.svg
@@ -0,0 +1,373 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Mar 30 10:09:11 UTC 2009)
+ For user: (lg) leo goodstadt -->
+<!-- Title: tree Pages: 1 -->
+<svg width="278pt" height="576pt"
+ viewBox="0.00 0.00 278.30 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.373057 0.373057) rotate(0) translate(4 1540)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1540 742,-1540 742,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1519 322,-1519 322,-16 8,-16"/>
+<text text-anchor="middle" x="165" y="-1488" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="330,-916 330,-1528 730,-1528 730,-916 330,-916"/>
+<text text-anchor="middle" x="530" y="-1497" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<text text-anchor="middle" x="214" y="-1443" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task1</text>
+</g>
+<!-- t2 -->
+<g id="node3" class="node"><title>t2</title>
+<polygon style="fill:none;stroke:olivedrab;" points="208.912,-1301.63 208.912,-1320.37 179.088,-1333.63 136.912,-1333.63 107.088,-1320.37 107.088,-1301.63 136.912,-1288.37 179.088,-1288.37 208.912,-1301.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="212.912,-1299.03 212.912,-1322.97 179.937,-1337.63 136.063,-1337.63 103.088,-1322.97 103.088,-1299.03 136.063,-1284.37 179.937,-1284.37 212.912,-1299.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="216.912,-1296.43 216.912,-1325.57 180.786,-1341.63 135.214,-1341.63 99.0883,-1325.57 99.0883,-1296.43 135.214,-1280.37 180.786,-1280.37 216.912,-1296.43"/>
+<text text-anchor="middle" x="158" y="-1305" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task7</text>
+</g>
+<!-- t0->t2 -->
+<g id="edge3" class="edge"><title>t0->t2</title>
+<path style="fill:none;stroke:blue;" d="M207,-1431C198,-1411 184,-1378 174,-1351"/>
+<polygon style="fill:blue;stroke:blue;" points="177.26,-1349.72 170,-1342 170.863,-1352.56 177.26,-1349.72"/>
+</g>
+<!-- t1 -->
+<g id="node13" class="node"><title>t1</title>
+<text text-anchor="middle" x="271" y="-1305" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task2</text>
+</g>
+<!-- t0->t1 -->
+<g id="edge13" class="edge"><title>t0->t1</title>
+<path style="fill:none;stroke:blue;" d="M221,-1431C231,-1408 249,-1366 260,-1339"/>
+<polygon style="fill:blue;stroke:blue;" points="263.536,-1339.58 264,-1329 257.036,-1336.98 263.536,-1339.58"/>
+</g>
+<!-- t3 -->
+<g id="node5" class="node"><title>t3</title>
+<polygon style="fill:none;stroke:olivedrab;" points="208.912,-1141.63 208.912,-1160.37 179.088,-1173.63 136.912,-1173.63 107.088,-1160.37 107.088,-1141.63 136.912,-1128.37 179.088,-1128.37 208.912,-1141.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="212.912,-1139.03 212.912,-1162.97 179.937,-1177.63 136.063,-1177.63 103.088,-1162.97 103.088,-1139.03 136.063,-1124.37 179.937,-1124.37 212.912,-1139.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="216.912,-1136.43 216.912,-1165.57 180.786,-1181.63 135.214,-1181.63 99.0883,-1165.57 99.0883,-1136.43 135.214,-1120.37 180.786,-1120.37 216.912,-1136.43"/>
+<text text-anchor="middle" x="158" y="-1145" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task8</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge5" class="edge"><title>t2->t3</title>
+<path style="fill:none;stroke:blue;" d="M158,-1280C158,-1255 158,-1219 158,-1192"/>
+<polygon style="fill:blue;stroke:blue;" points="161.5,-1192 158,-1182 154.5,-1192 161.5,-1192"/>
+</g>
+<!-- t4 -->
+<g id="node7" class="node"><title>t4</title>
+<text text-anchor="middle" x="158" y="-997" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task9</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge7" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:blue;" d="M158,-1120C158,-1094 158,-1057 158,-1031"/>
+<polygon style="fill:blue;stroke:blue;" points="161.5,-1031 158,-1021 154.5,-1031 161.5,-1031"/>
+</g>
+<!-- t5 -->
+<g id="node9" class="node"><title>t5</title>
+<text text-anchor="middle" x="103" y="-878" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task10</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge9" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:blue;" d="M150,-985C140,-966 126,-934 115,-911"/>
+<polygon style="fill:blue;stroke:blue;" points="118.26,-909.717 111,-902 111.863,-912.56 118.26,-909.717"/>
+</g>
+<!-- t15 -->
+<g id="node43" class="node"><title>t15</title>
+<text text-anchor="middle" x="81" y="-403" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task15</text>
+</g>
+<!-- t4->t15 -->
+<g id="edge47" class="edge"><title>t4->t15</title>
+<path style="fill:none;stroke:blue;" d="M161,-985C165,-962 171,-920 171,-884 171,-884 171,-884 171,-843 171,-804 178,-792 161,-758 149,-732 132,-735 116,-712 73,-646 63,-625 47,-548 42,-520 42,-512 47,-486 51,-469 58,-451 65,-436"/>
+<polygon style="fill:blue;stroke:blue;" points="68.2031,-437.441 70,-427 62.084,-434.042 68.2031,-437.441"/>
+</g>
+<!-- t6 -->
+<g id="node11" class="node"><title>t6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="144.104,-779.627 144.104,-798.373 108.896,-811.627 59.1041,-811.627 23.8959,-798.373 23.8959,-779.627 59.1041,-766.373 108.896,-766.373 144.104,-779.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="148.104,-776.859 148.104,-801.141 109.624,-815.627 58.3761,-815.627 19.8959,-801.141 19.8959,-776.859 58.3761,-762.373 109.624,-762.373 148.104,-776.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="152.104,-774.091 152.104,-803.909 110.352,-819.627 57.6481,-819.627 15.8959,-803.909 15.8959,-774.091 57.6481,-758.373 110.352,-758.373 152.104,-774.091"/>
+<text text-anchor="middle" x="84" y="-783" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task11</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge11" class="edge"><title>t5->t6</title>
+<path style="fill:none;stroke:blue;" d="M99,-866C97,-856 95,-842 92,-830"/>
+<polygon style="fill:blue;stroke:blue;" points="95.3933,-829.119 90,-820 88.5292,-830.492 95.3933,-829.119"/>
+</g>
+<!-- t7 -->
+<g id="node23" class="node"><title>t7</title>
+<text text-anchor="middle" x="167" y="-688" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task12</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge25" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:blue;" d="M111,-758C122,-746 134,-732 144,-720"/>
+<polygon style="fill:blue;stroke:blue;" points="147.049,-721.831 151,-712 141.781,-717.221 147.049,-721.831"/>
+</g>
+<!-- t16 -->
+<g id="node46" class="node"><title>t16</title>
+<text text-anchor="middle" x="81" y="-308" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task16</text>
+</g>
+<!-- t6->t16 -->
+<g id="edge51" class="edge"><title>t6->t16</title>
+<path style="fill:none;stroke:blue;" d="M50,-761C34,-743 17,-720 17,-694 17,-694 17,-694 17,-409 17,-382 35,-357 52,-339"/>
+<polygon style="fill:blue;stroke:blue;" points="54.4038,-341.546 59,-332 49.454,-336.596 54.4038,-341.546"/>
+</g>
+<!-- t8 -->
+<g id="node15" class="node"><title>t8</title>
+<text text-anchor="middle" x="271" y="-1145" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task3</text>
+</g>
+<!-- t1->t8 -->
+<g id="edge15" class="edge"><title>t1->t8</title>
+<path style="fill:none;stroke:blue;" d="M271,-1293C271,-1265 271,-1212 271,-1179"/>
+<polygon style="fill:blue;stroke:blue;" points="274.5,-1179 271,-1169 267.5,-1179 274.5,-1179"/>
+</g>
+<!-- t9 -->
+<g id="node17" class="node"><title>t9</title>
+<text text-anchor="middle" x="254" y="-997" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task4</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge17" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:blue;" d="M269,-1133C265,-1107 260,-1061 257,-1031"/>
+<polygon style="fill:blue;stroke:blue;" points="260.478,-1030.6 256,-1021 253.512,-1031.3 260.478,-1030.6"/>
+</g>
+<!-- t21 -->
+<g id="node32" class="node"><title>t21</title>
+<text text-anchor="middle" x="226" y="-308" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task21</text>
+</g>
+<!-- t8->t21 -->
+<g id="edge35" class="edge"><title>t8->t21</title>
+<path style="fill:none;stroke:blue;" d="M280,-1133C292,-1105 313,-1051 313,-1003 313,-1003 313,-1003 313,-409 313,-379 290,-355 267,-338"/>
+<polygon style="fill:blue;stroke:blue;" points="269.1,-335.2 259,-332 264.9,-340.8 269.1,-335.2"/>
+</g>
+<!-- t10 -->
+<g id="node19" class="node"><title>t10</title>
+<text text-anchor="middle" x="230" y="-878" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task5</text>
+</g>
+<!-- t9->t10 -->
+<g id="edge19" class="edge"><title>t9->t10</title>
+<path style="fill:none;stroke:blue;" d="M250,-985C247,-966 240,-935 236,-912"/>
+<polygon style="fill:blue;stroke:blue;" points="239.393,-911.119 234,-902 232.529,-912.492 239.393,-911.119"/>
+</g>
+<!-- t11 -->
+<g id="node21" class="node"><title>t11</title>
+<text text-anchor="middle" x="227" y="-783" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task6</text>
+</g>
+<!-- t10->t11 -->
+<g id="edge21" class="edge"><title>t10->t11</title>
+<path style="fill:none;stroke:blue;" d="M229,-866C229,-852 228,-833 228,-817"/>
+<polygon style="fill:blue;stroke:blue;" points="231.5,-817 228,-807 224.5,-817 231.5,-817"/>
+</g>
+<!-- t11->t7 -->
+<g id="edge23" class="edge"><title>t11->t7</title>
+<path style="fill:none;stroke:blue;" d="M215,-771C207,-757 194,-737 184,-721"/>
+<polygon style="fill:blue;stroke:blue;" points="186.916,-719.042 179,-712 180.797,-722.441 186.916,-719.042"/>
+</g>
+<!-- t13 -->
+<g id="node26" class="node"><title>t13</title>
+<text text-anchor="middle" x="239" y="-606" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task18</text>
+</g>
+<!-- t7->t13 -->
+<g id="edge27" class="edge"><title>t7->t13</title>
+<path style="fill:none;stroke:blue;" d="M183,-676C193,-665 206,-650 216,-638"/>
+<polygon style="fill:blue;stroke:blue;" points="219.049,-639.831 223,-630 213.781,-635.221 219.049,-639.831"/>
+</g>
+<!-- t12 -->
+<g id="node39" class="node"><title>t12</title>
+<text text-anchor="middle" x="132" y="-606" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task13</text>
+</g>
+<!-- t7->t12 -->
+<g id="edge41" class="edge"><title>t7->t12</title>
+<path style="fill:none;stroke:blue;" d="M159,-676C154,-666 149,-652 144,-639"/>
+<polygon style="fill:blue;stroke:blue;" points="147.26,-637.717 140,-630 140.863,-640.56 147.26,-637.717"/>
+</g>
+<!-- t19 -->
+<g id="node28" class="node"><title>t19</title>
+<text text-anchor="middle" x="252" y="-511" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task19</text>
+</g>
+<!-- t13->t19 -->
+<g id="edge29" class="edge"><title>t13->t19</title>
+<path style="fill:none;stroke:blue;" d="M242,-594C244,-580 246,-561 248,-545"/>
+<polygon style="fill:blue;stroke:blue;" points="251.488,-545.299 249,-535 244.522,-544.602 251.488,-545.299"/>
+</g>
+<!-- t20 -->
+<g id="node30" class="node"><title>t20</title>
+<polygon style="fill:none;stroke:olivedrab;" points="286.104,-399.627 286.104,-418.373 250.896,-431.627 201.104,-431.627 165.896,-418.373 165.896,-399.627 201.104,-386.373 250.896,-386.373 286.104,-399.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="290.104,-396.859 290.104,-421.141 251.624,-435.627 200.376,-435.627 161.896,-421.141 161.896,-396.859 200.376,-382.373 251.624,-382.373 290.104,-396.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="294.104,-394.091 294.104,-423.909 252.352,-439.627 199.648,-439.627 157.896,-423.909 157.896,-394.091 199.648,-378.373 252.352,-378.373 294.104,-394.091"/>
+<text text-anchor="middle" x="226" y="-403" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task20</text>
+</g>
+<!-- t19->t20 -->
+<g id="edge31" class="edge"><title>t19->t20</title>
+<path style="fill:none;stroke:blue;" d="M248,-499C245,-486 240,-467 236,-450"/>
+<polygon style="fill:blue;stroke:blue;" points="239.226,-448.573 233,-440 232.521,-450.584 239.226,-448.573"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge33" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:blue;" d="M226,-378C226,-367 226,-354 226,-342"/>
+<polygon style="fill:blue;stroke:blue;" points="229.5,-342 226,-332 222.5,-342 229.5,-342"/>
+</g>
+<!-- t22 -->
+<g id="node35" class="node"><title>t22</title>
+<text text-anchor="middle" x="195" y="-226" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task22</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge37" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:blue;" d="M219,-296C215,-286 210,-272 205,-259"/>
+<polygon style="fill:blue;stroke:blue;" points="208.483,-258.38 202,-250 201.842,-260.594 208.483,-258.38"/>
+</g>
+<!-- t23 -->
+<g id="node37" class="node"><title>t23</title>
+<text text-anchor="middle" x="187" y="-144" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task23</text>
+</g>
+<!-- t22->t23 -->
+<g id="edge39" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:blue;" d="M193,-214C192,-204 191,-190 190,-178"/>
+<polygon style="fill:blue;stroke:blue;" points="193.478,-177.602 189,-168 186.512,-178.299 193.478,-177.602"/>
+</g>
+<!-- t24 -->
+<g id="node53" class="node"><title>t24</title>
+<polygon style="fill:none;stroke:orange;" points="174.104,-45.6274 174.104,-64.3726 138.896,-77.6274 89.1041,-77.6274 53.8959,-64.3726 53.8959,-45.6274 89.1041,-32.3726 138.896,-32.3726 174.104,-45.6274"/>
+<polygon style="fill:none;stroke:orange;" points="178.104,-42.8592 178.104,-67.1408 139.624,-81.6274 88.3761,-81.6274 49.8959,-67.1408 49.8959,-42.8592 88.3761,-28.3726 139.624,-28.3726 178.104,-42.8592"/>
+<polygon style="fill:none;stroke:orange;" points="182.104,-40.091 182.104,-69.909 140.352,-85.6274 87.6481,-85.6274 45.8959,-69.909 45.8959,-40.091 87.6481,-24.3726 140.352,-24.3726 182.104,-40.091"/>
+<text text-anchor="middle" x="114" y="-49" style="font-family:Times New Roman;font-size:20.00;fill:orange;">task25</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge57" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:blue;" d="M173,-132C165,-121 154,-107 144,-94"/>
+<polygon style="fill:blue;stroke:blue;" points="146.8,-91.9 138,-86 141.2,-96.1 146.8,-91.9"/>
+</g>
+<!-- t14 -->
+<g id="node41" class="node"><title>t14</title>
+<polygon style="fill:none;stroke:olivedrab;" points="184.104,-507.627 184.104,-526.373 148.896,-539.627 99.1041,-539.627 63.8959,-526.373 63.8959,-507.627 99.1041,-494.373 148.896,-494.373 184.104,-507.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="188.104,-504.859 188.104,-529.141 149.624,-543.627 98.3761,-543.627 59.8959,-529.141 59.8959,-504.859 98.3761,-490.373 149.624,-490.373 188.104,-504.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="192.104,-502.091 192.104,-531.909 150.352,-547.627 97.6481,-547.627 55.8959,-531.909 55.8959,-502.091 97.6481,-486.373 150.352,-486.373 192.104,-502.091"/>
+<text text-anchor="middle" x="124" y="-511" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task14</text>
+</g>
+<!-- t12->t14 -->
+<g id="edge43" class="edge"><title>t12->t14</title>
+<path style="fill:none;stroke:blue;" d="M130,-594C129,-584 128,-571 127,-558"/>
+<polygon style="fill:blue;stroke:blue;" points="130.478,-557.602 126,-548 123.512,-558.299 130.478,-557.602"/>
+</g>
+<!-- t14->t15 -->
+<g id="edge45" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:blue;" d="M112,-486C105,-470 98,-451 92,-436"/>
+<polygon style="fill:blue;stroke:blue;" points="95.2598,-434.717 88,-427 88.8631,-437.56 95.2598,-434.717"/>
+</g>
+<!-- t15->t16 -->
+<g id="edge49" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:blue;" d="M81,-391C81,-377 81,-358 81,-342"/>
+<polygon style="fill:blue;stroke:blue;" points="84.5001,-342 81,-332 77.5001,-342 84.5001,-342"/>
+</g>
+<!-- t17 -->
+<g id="node49" class="node"><title>t17</title>
+<text text-anchor="middle" x="83" y="-226" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task17</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge53" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:blue;" d="M81,-296C82,-286 82,-272 82,-260"/>
+<polygon style="fill:blue;stroke:blue;" points="85.4875,-260.299 83,-250 78.5222,-259.602 85.4875,-260.299"/>
+</g>
+<!-- t18 -->
+<g id="node51" class="node"><title>t18</title>
+<text text-anchor="middle" x="83" y="-144" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task24</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge55" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:blue;" d="M83,-214C83,-204 83,-190 83,-178"/>
+<polygon style="fill:blue;stroke:blue;" points="86.5001,-178 83,-168 79.5001,-178 86.5001,-178"/>
+</g>
+<!-- t18->t24 -->
+<g id="edge59" class="edge"><title>t18->t24</title>
+<path style="fill:none;stroke:blue;" d="M89,-132C92,-122 97,-108 101,-96"/>
+<polygon style="fill:blue;stroke:blue;" points="104.479,-96.584 104,-86 97.7741,-94.5725 104.479,-96.584"/>
+</g>
+<!-- k1 -->
+<g id="node57" class="node"><title>k1</title>
+<polygon style="fill:none;stroke:orange;" points="602.125,-1441.38 602.125,-1456.62 559.875,-1467.38 500.125,-1467.38 457.875,-1456.62 457.875,-1441.38 500.125,-1430.62 559.875,-1430.62 602.125,-1441.38"/>
+<polygon style="fill:none;stroke:orange;" points="606.125,-1438.28 606.125,-1459.72 560.377,-1471.38 499.623,-1471.38 453.875,-1459.72 453.875,-1438.28 499.623,-1426.62 560.377,-1426.62 606.125,-1438.28"/>
+<polygon style="fill:none;stroke:orange;" points="610.125,-1435.17 610.125,-1462.83 560.879,-1475.38 499.121,-1475.38 449.875,-1462.83 449.875,-1435.17 499.121,-1422.62 560.879,-1422.62 610.125,-1435.17"/>
+<text text-anchor="middle" x="530" y="-1444.5" style="font-family:Times New Roman;font-size:15.00;fill:orange;">Final target</text>
+</g>
+<!-- k2 -->
+<g id="node58" class="node"><title>k2</title>
+<polygon style="fill:red;stroke:black;" points="586,-1400 474,-1400 474,-1364 586,-1364 586,-1400"/>
+<text text-anchor="middle" x="530" y="-1377.5" style="font-family:Times New Roman;font-size:15.00;">Vicious cycle</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge62" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:red;" d="M523,-1423C523,-1419 523,-1414 523,-1410"/>
+<polygon style="fill:red;stroke:red;" points="526.488,-1410.3 524,-1400 519.522,-1409.6 526.488,-1410.3"/>
+</g>
+<!-- k2->k1 -->
+<g id="edge64" class="edge"><title>k2->k1</title>
+<path style="fill:none;stroke:red;" d="M536,-1400C537,-1404 537,-1408 537,-1413"/>
+<polygon style="fill:red;stroke:red;" points="533.5,-1413 537,-1423 540.5,-1413 533.5,-1413"/>
+</g>
+<!-- k3 -->
+<g id="node59" class="node"><title>k3</title>
+<text text-anchor="middle" x="530" y="-1306.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Task to run</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge66" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M530,-1364C530,-1356 530,-1348 530,-1339"/>
+<polygon style="fill:blue;stroke:blue;" points="533.5,-1339 530,-1329 526.5,-1339 533.5,-1339"/>
+</g>
+<!-- k4 -->
+<g id="node60" class="node"><title>k4</title>
+<polygon style="fill:none;stroke:blue;" points="713.848,-1223.38 713.848,-1238.62 606.152,-1249.38 453.848,-1249.38 346.152,-1238.62 346.152,-1223.38 453.848,-1212.62 606.152,-1212.62 713.848,-1223.38"/>
+<polygon style="fill:none;stroke:blue;" points="717.848,-1219.76 717.848,-1242.24 606.352,-1253.38 453.648,-1253.38 342.152,-1242.24 342.152,-1219.76 453.648,-1208.62 606.352,-1208.62 717.848,-1219.76"/>
+<polygon style="fill:none;stroke:blue;" points="721.848,-1216.14 721.848,-1245.86 606.551,-1257.38 453.449,-1257.38 338.152,-1245.86 338.152,-1216.14 453.449,-1204.62 606.551,-1204.62 721.848,-1216.14"/>
+<text text-anchor="middle" x="530" y="-1226.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Force pipeline run from this task</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge67" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M530,-1293C530,-1285 530,-1277 530,-1268"/>
+<polygon style="fill:blue;stroke:blue;" points="533.5,-1268 530,-1258 526.5,-1268 533.5,-1268"/>
+</g>
+<!-- k5 -->
+<g id="node61" class="node"><title>k5</title>
+<polygon style="fill:none;stroke:gray;" points="662.229,-1143.38 662.229,-1158.62 584.771,-1169.38 475.229,-1169.38 397.771,-1158.62 397.771,-1143.38 475.229,-1132.62 584.771,-1132.62 662.229,-1143.38"/>
+<polygon style="fill:none;stroke:gray;" points="666.229,-1139.9 666.229,-1162.1 585.048,-1173.38 474.952,-1173.38 393.771,-1162.1 393.771,-1139.9 474.952,-1128.62 585.048,-1128.62 666.229,-1139.9"/>
+<polygon style="fill:none;stroke:gray;" points="670.229,-1136.42 670.229,-1165.58 585.325,-1177.38 474.675,-1177.38 389.771,-1165.58 389.771,-1136.42 474.675,-1124.62 585.325,-1124.62 670.229,-1136.42"/>
+<text text-anchor="middle" x="530" y="-1146.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date Final target</text>
+</g>
+<!-- k4->k5 -->
+<g id="edge68" class="edge"><title>k4->k5</title>
+<path style="fill:none;stroke:blue;" d="M530,-1204C530,-1198 530,-1193 530,-1187"/>
+<polygon style="fill:blue;stroke:blue;" points="533.5,-1187 530,-1177 526.5,-1187 533.5,-1187"/>
+</g>
+<!-- k6 -->
+<g id="node62" class="node"><title>k6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="706.07,-1063.38 706.07,-1078.62 602.93,-1089.38 457.07,-1089.38 353.93,-1078.62 353.93,-1063.38 457.07,-1052.62 602.93,-1052.62 706.07,-1063.38"/>
+<polygon style="fill:none;stroke:olivedrab;" points="710.07,-1059.78 710.07,-1082.22 603.139,-1093.38 456.861,-1093.38 349.93,-1082.22 349.93,-1059.78 456.861,-1048.62 603.139,-1048.62 710.07,-1059.78"/>
+<polygon style="fill:none;stroke:olivedrab;" points="714.07,-1056.18 714.07,-1085.82 603.347,-1097.38 456.653,-1097.38 345.93,-1085.82 345.93,-1056.18 456.653,-1044.62 603.347,-1044.62 714.07,-1056.18"/>
+<text text-anchor="middle" x="530" y="-1066.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Up-to-date task forced to rerun</text>
+</g>
+<!-- k5->k6 -->
+<g id="edge70" class="edge"><title>k5->k6</title>
+<path style="fill:none;stroke:gray;" d="M530,-1124C530,-1118 530,-1113 530,-1107"/>
+<polygon style="fill:gray;stroke:gray;" points="533.5,-1107 530,-1097 526.5,-1107 533.5,-1107"/>
+</g>
+<!-- k7 -->
+<g id="node63" class="node"><title>k7</title>
+<polygon style="fill:olivedrab;stroke:olivedrab;" points="622.839,-995.385 622.839,-1010.62 568.455,-1021.38 491.545,-1021.38 437.161,-1010.62 437.161,-995.385 491.545,-984.615 568.455,-984.615 622.839,-995.385"/>
+<text text-anchor="middle" x="530" y="-998.5" style="font-family:Times New Roman;font-size:15.00;">Up-to-date task</text>
+</g>
+<!-- k6->k7 -->
+<g id="edge71" class="edge"><title>k6->k7</title>
+<path style="fill:none;stroke:gray;" d="M530,-1045C530,-1041 530,-1036 530,-1032"/>
+<polygon style="fill:gray;stroke:gray;" points="533.5,-1032 530,-1022 526.5,-1032 533.5,-1032"/>
+</g>
+<!-- k8 -->
+<g id="node64" class="node"><title>k8</title>
+<polygon style="fill:white;stroke:gray;" points="667.179,-935.385 667.179,-950.615 586.821,-961.385 473.179,-961.385 392.821,-950.615 392.821,-935.385 473.179,-924.615 586.821,-924.615 667.179,-935.385"/>
+<text text-anchor="middle" x="530" y="-938.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date dependence</text>
+</g>
+<!-- k7->k8 -->
+<g id="edge72" class="edge"><title>k7->k8</title>
+<path style="fill:none;stroke:gray;" d="M530,-984C530,-980 530,-976 530,-971"/>
+<polygon style="fill:gray;stroke:gray;" points="533.5,-971 530,-961 526.5,-971 533.5,-971"/>
+</g>
+</g>
+</svg>
diff --git a/doc/complex_dags/svg/non_dag.svg b/doc/complex_dags/svg/non_dag.svg
new file mode 100644
index 0000000..5912dbe
--- /dev/null
+++ b/doc/complex_dags/svg/non_dag.svg
@@ -0,0 +1,380 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Mar 30 10:09:11 UTC 2009)
+ For user: (lg) leo goodstadt -->
+<!-- Title: tree Pages: 1 -->
+<svg width="282pt" height="576pt"
+ viewBox="0.00 0.00 281.56 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.402235 0.402235) rotate(0) translate(4 1428)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1428 696,-1428 696,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1407 276,-1407 276,-16 8,-16"/>
+<text text-anchor="middle" x="142" y="-1376" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="284,-838 284,-1416 684,-1416 684,-838 284,-838"/>
+<text text-anchor="middle" x="484" y="-1385" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t2 -->
+<g id="node2" class="node"><title>t2</title>
+<text text-anchor="middle" x="83" y="-1206" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task7</text>
+</g>
+<!-- t3 -->
+<g id="node5" class="node"><title>t3</title>
+<text text-anchor="middle" x="79" y="-1063" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task8</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge5" class="edge"><title>t2->t3</title>
+<path style="fill:none;stroke:blue;" d="M82,-1194C81,-1169 80,-1126 79,-1097"/>
+<polygon style="fill:blue;stroke:blue;" points="82.5001,-1097 79,-1087 75.5001,-1097 82.5001,-1097"/>
+</g>
+<!-- t0 -->
+<g id="node3" class="node"><title>t0</title>
+<polygon style="fill:red;stroke:black;" points="167,-1355 95,-1355 95,-1319 167,-1319 167,-1355"/>
+<text text-anchor="middle" x="131" y="-1331" style="font-family:Times New Roman;font-size:20.00;">task1</text>
+</g>
+<!-- t0->t2 -->
+<g id="edge3" class="edge"><title>t0->t2</title>
+<path style="fill:none;stroke:blue;" d="M124,-1319C116,-1298 103,-1264 94,-1239"/>
+<polygon style="fill:blue;stroke:blue;" points="97.2598,-1237.72 90,-1230 90.8631,-1240.56 97.2598,-1237.72"/>
+</g>
+<!-- t1 -->
+<g id="node12" class="node"><title>t1</title>
+<polygon style="fill:red;stroke:black;" points="199,-1163 127,-1163 127,-1127 199,-1127 199,-1163"/>
+<text text-anchor="middle" x="163" y="-1139" style="font-family:Times New Roman;font-size:20.00;">task2</text>
+</g>
+<!-- t0->t1 -->
+<g id="edge51" class="edge"><title>t0->t1</title>
+<path style="fill:none;stroke:red;" d="M130,-1319C132,-1285 144,-1213 154,-1173"/>
+<polygon style="fill:red;stroke:red;" points="157.471,-1173.49 156,-1163 150.607,-1172.12 157.471,-1173.49"/>
+</g>
+<!-- t4 -->
+<g id="node7" class="node"><title>t4</title>
+<text text-anchor="middle" x="78" y="-919" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task9</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge7" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:blue;" d="M79,-1051C79,-1026 78,-982 78,-953"/>
+<polygon style="fill:blue;stroke:blue;" points="81.5001,-953 78,-943 74.5001,-953 81.5001,-953"/>
+</g>
+<!-- t5 -->
+<g id="node10" class="node"><title>t5</title>
+<polygon style="fill:red;stroke:black;" points="120,-824 36,-824 36,-788 120,-788 120,-824"/>
+<text text-anchor="middle" x="78" y="-800" style="font-family:Times New Roman;font-size:20.00;">task10</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge9" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:blue;" d="M78,-907C78,-888 78,-857 78,-834"/>
+<polygon style="fill:blue;stroke:blue;" points="81.5001,-834 78,-824 74.5001,-834 81.5001,-834"/>
+</g>
+<!-- t15 -->
+<g id="node36" class="node"><title>t15</title>
+<text text-anchor="middle" x="84" y="-390" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task15</text>
+</g>
+<!-- t4->t15 -->
+<g id="edge37" class="edge"><title>t4->t15</title>
+<path style="fill:none;stroke:blue;" d="M62,-907C44,-885 17,-845 17,-806 17,-806 17,-806 17,-478 17,-455 33,-435 48,-421"/>
+<polygon style="fill:blue;stroke:blue;" points="50.779,-423.219 56,-414 46.1694,-417.951 50.779,-423.219"/>
+</g>
+<!-- t10 -->
+<g id="node17" class="node"><title>t10</title>
+<polygon style="fill:red;stroke:black;" points="110,-742 38,-742 38,-706 110,-706 110,-742"/>
+<text text-anchor="middle" x="74" y="-718" style="font-family:Times New Roman;font-size:20.00;">task5</text>
+</g>
+<!-- t5->t10 -->
+<g id="edge59" class="edge"><title>t5->t10</title>
+<path style="fill:none;stroke:red;" d="M77,-788C76,-778 76,-764 76,-752"/>
+<polygon style="fill:red;stroke:red;" points="79.4778,-751.602 75,-742 72.5125,-752.299 79.4778,-751.602"/>
+</g>
+<!-- t6 -->
+<g id="node41" class="node"><title>t6</title>
+<polygon style="fill:red;stroke:black;" points="232,-742 148,-742 148,-706 232,-706 232,-742"/>
+<text text-anchor="middle" x="190" y="-718" style="font-family:Times New Roman;font-size:20.00;">task11</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge57" class="edge"><title>t5->t6</title>
+<path style="fill:none;stroke:red;" d="M103,-788C119,-777 140,-761 157,-748"/>
+<polygon style="fill:red;stroke:red;" points="159.1,-750.8 165,-742 154.9,-745.2 159.1,-750.8"/>
+</g>
+<!-- t8 -->
+<g id="node11" class="node"><title>t8</title>
+<text text-anchor="middle" x="174" y="-919" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task3</text>
+</g>
+<!-- t9 -->
+<g id="node14" class="node"><title>t9</title>
+<text text-anchor="middle" x="174" y="-800" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task4</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge13" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:blue;" d="M174,-907C174,-888 174,-857 174,-834"/>
+<polygon style="fill:blue;stroke:blue;" points="177.5,-834 174,-824 170.5,-834 177.5,-834"/>
+</g>
+<!-- t21 -->
+<g id="node25" class="node"><title>t21</title>
+<text text-anchor="middle" x="216" y="-308" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task21</text>
+</g>
+<!-- t8->t21 -->
+<g id="edge25" class="edge"><title>t8->t21</title>
+<path style="fill:none;stroke:blue;" d="M201,-907C228,-886 267,-849 267,-806 267,-806 267,-806 267,-396 267,-375 255,-355 243,-340"/>
+<polygon style="fill:blue;stroke:blue;" points="245.219,-337.221 236,-332 239.951,-341.831 245.219,-337.221"/>
+</g>
+<!-- t1->t0 -->
+<g id="edge53" class="edge"><title>t1->t0</title>
+<path style="fill:none;stroke:red;" d="M164,-1163C161,-1197 150,-1269 140,-1309"/>
+<polygon style="fill:red;stroke:red;" points="136.529,-1308.51 138,-1319 143.393,-1309.88 136.529,-1308.51"/>
+</g>
+<!-- t1->t8 -->
+<g id="edge11" class="edge"><title>t1->t8</title>
+<path style="fill:none;stroke:blue;" d="M164,-1127C166,-1088 170,-1000 173,-953"/>
+<polygon style="fill:blue;stroke:blue;" points="176.5,-953 173,-943 169.5,-953 176.5,-953"/>
+</g>
+<!-- t9->t10 -->
+<g id="edge15" class="edge"><title>t9->t10</title>
+<path style="fill:none;stroke:blue;" d="M152,-788C138,-777 119,-762 104,-749"/>
+<polygon style="fill:blue;stroke:blue;" points="105.831,-745.951 96,-742 101.221,-751.219 105.831,-745.951"/>
+</g>
+<!-- t11 -->
+<g id="node55" class="node"><title>t11</title>
+<polygon style="fill:red;stroke:black;" points="108,-660 36,-660 36,-624 108,-624 108,-660"/>
+<text text-anchor="middle" x="72" y="-636" style="font-family:Times New Roman;font-size:20.00;">task6</text>
+</g>
+<!-- t10->t11 -->
+<g id="edge61" class="edge"><title>t10->t11</title>
+<path style="fill:none;stroke:red;" d="M74,-706C73,-696 73,-682 73,-670"/>
+<polygon style="fill:red;stroke:red;" points="76.4778,-669.602 72,-660 69.5125,-670.299 76.4778,-669.602"/>
+</g>
+<!-- t13 -->
+<g id="node18" class="node"><title>t13</title>
+<text text-anchor="middle" x="186" y="-554" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task18</text>
+</g>
+<!-- t19 -->
+<g id="node21" class="node"><title>t19</title>
+<text text-anchor="middle" x="186" y="-472" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task19</text>
+</g>
+<!-- t13->t19 -->
+<g id="edge19" class="edge"><title>t13->t19</title>
+<path style="fill:none;stroke:blue;" d="M186,-542C186,-532 186,-518 186,-506"/>
+<polygon style="fill:blue;stroke:blue;" points="189.5,-506 186,-496 182.5,-506 189.5,-506"/>
+</g>
+<!-- t7 -->
+<g id="node19" class="node"><title>t7</title>
+<polygon style="fill:red;stroke:black;" points="228,-660 144,-660 144,-624 228,-624 228,-660"/>
+<text text-anchor="middle" x="186" y="-636" style="font-family:Times New Roman;font-size:20.00;">task12</text>
+</g>
+<!-- t7->t5 -->
+<g id="edge55" class="edge"><title>t7->t5</title>
+<path style="fill:none;stroke:red;" d="M172,-660C162,-673 150,-690 139,-706 130,-721 129,-726 119,-742 112,-755 103,-768 96,-779"/>
+<polygon style="fill:red;stroke:red;" points="92.6349,-777.738 90,-788 98.4592,-781.621 92.6349,-777.738"/>
+</g>
+<!-- t7->t13 -->
+<g id="edge17" class="edge"><title>t7->t13</title>
+<path style="fill:none;stroke:blue;" d="M186,-624C186,-614 186,-600 186,-588"/>
+<polygon style="fill:blue;stroke:blue;" points="189.5,-588 186,-578 182.5,-588 189.5,-588"/>
+</g>
+<!-- t12 -->
+<g id="node32" class="node"><title>t12</title>
+<text text-anchor="middle" x="84" y="-554" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task13</text>
+</g>
+<!-- t7->t12 -->
+<g id="edge31" class="edge"><title>t7->t12</title>
+<path style="fill:none;stroke:blue;" d="M163,-624C149,-613 130,-598 114,-585"/>
+<polygon style="fill:blue;stroke:blue;" points="115.831,-581.951 106,-578 111.221,-587.219 115.831,-581.951"/>
+</g>
+<!-- t20 -->
+<g id="node23" class="node"><title>t20</title>
+<text text-anchor="middle" x="186" y="-390" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task20</text>
+</g>
+<!-- t19->t20 -->
+<g id="edge21" class="edge"><title>t19->t20</title>
+<path style="fill:none;stroke:blue;" d="M186,-460C186,-450 186,-436 186,-424"/>
+<polygon style="fill:blue;stroke:blue;" points="189.5,-424 186,-414 182.5,-424 189.5,-424"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge23" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:blue;" d="M193,-378C196,-368 201,-354 206,-341"/>
+<polygon style="fill:blue;stroke:blue;" points="209.158,-342.594 209,-332 202.517,-340.38 209.158,-342.594"/>
+</g>
+<!-- t22 -->
+<g id="node28" class="node"><title>t22</title>
+<text text-anchor="middle" x="216" y="-226" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task22</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge27" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:blue;" d="M216,-296C216,-286 216,-272 216,-260"/>
+<polygon style="fill:blue;stroke:blue;" points="219.5,-260 216,-250 212.5,-260 219.5,-260"/>
+</g>
+<!-- t23 -->
+<g id="node30" class="node"><title>t23</title>
+<text text-anchor="middle" x="216" y="-144" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task23</text>
+</g>
+<!-- t22->t23 -->
+<g id="edge29" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:blue;" d="M216,-214C216,-204 216,-190 216,-178"/>
+<polygon style="fill:blue;stroke:blue;" points="219.5,-178 216,-168 212.5,-178 219.5,-178"/>
+</g>
+<!-- t24 -->
+<g id="node47" class="node"><title>t24</title>
+<polygon style="fill:none;stroke:orange;" points="204.104,-45.6274 204.104,-64.3726 168.896,-77.6274 119.104,-77.6274 83.8959,-64.3726 83.8959,-45.6274 119.104,-32.3726 168.896,-32.3726 204.104,-45.6274"/>
+<polygon style="fill:none;stroke:orange;" points="208.104,-42.8592 208.104,-67.1408 169.624,-81.6274 118.376,-81.6274 79.8959,-67.1408 79.8959,-42.8592 118.376,-28.3726 169.624,-28.3726 208.104,-42.8592"/>
+<polygon style="fill:none;stroke:orange;" points="212.104,-40.091 212.104,-69.909 170.352,-85.6274 117.648,-85.6274 75.8959,-69.909 75.8959,-40.091 117.648,-24.3726 170.352,-24.3726 212.104,-40.091"/>
+<text text-anchor="middle" x="144" y="-49" style="font-family:Times New Roman;font-size:20.00;fill:orange;">task25</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge47" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:blue;" d="M202,-132C194,-121 183,-107 173,-94"/>
+<polygon style="fill:blue;stroke:blue;" points="175.8,-91.9 167,-86 170.2,-96.1 175.8,-91.9"/>
+</g>
+<!-- t14 -->
+<g id="node34" class="node"><title>t14</title>
+<text text-anchor="middle" x="84" y="-472" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task14</text>
+</g>
+<!-- t12->t14 -->
+<g id="edge33" class="edge"><title>t12->t14</title>
+<path style="fill:none;stroke:blue;" d="M84,-542C84,-532 84,-518 84,-506"/>
+<polygon style="fill:blue;stroke:blue;" points="87.5001,-506 84,-496 80.5001,-506 87.5001,-506"/>
+</g>
+<!-- t14->t15 -->
+<g id="edge35" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:blue;" d="M84,-460C84,-450 84,-436 84,-424"/>
+<polygon style="fill:blue;stroke:blue;" points="87.5001,-424 84,-414 80.5001,-424 87.5001,-424"/>
+</g>
+<!-- t16 -->
+<g id="node39" class="node"><title>t16</title>
+<text text-anchor="middle" x="114" y="-308" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task16</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge39" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:blue;" d="M91,-378C94,-368 99,-354 104,-341"/>
+<polygon style="fill:blue;stroke:blue;" points="107.158,-342.594 107,-332 100.517,-340.38 107.158,-342.594"/>
+</g>
+<!-- t17 -->
+<g id="node43" class="node"><title>t17</title>
+<text text-anchor="middle" x="114" y="-226" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task17</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge43" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:blue;" d="M114,-296C114,-286 114,-272 114,-260"/>
+<polygon style="fill:blue;stroke:blue;" points="117.5,-260 114,-250 110.5,-260 117.5,-260"/>
+</g>
+<!-- t6->t7 -->
+<g id="edge65" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:red;" d="M189,-706C188,-696 188,-682 188,-670"/>
+<polygon style="fill:red;stroke:red;" points="191.478,-669.602 187,-660 184.512,-670.299 191.478,-669.602"/>
+</g>
+<!-- t6->t16 -->
+<g id="edge41" class="edge"><title>t6->t16</title>
+<path style="fill:none;stroke:blue;" d="M212,-706C228,-691 247,-667 247,-642 247,-642 247,-642 247,-396 247,-376 203,-352 165,-335"/>
+<polygon style="fill:blue;stroke:blue;" points="166.56,-331.863 156,-331 163.717,-338.26 166.56,-331.863"/>
+</g>
+<!-- t18 -->
+<g id="node45" class="node"><title>t18</title>
+<text text-anchor="middle" x="114" y="-144" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task24</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge45" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:blue;" d="M114,-214C114,-204 114,-190 114,-178"/>
+<polygon style="fill:blue;stroke:blue;" points="117.5,-178 114,-168 110.5,-178 117.5,-178"/>
+</g>
+<!-- t18->t24 -->
+<g id="edge49" class="edge"><title>t18->t24</title>
+<path style="fill:none;stroke:blue;" d="M120,-132C123,-122 128,-108 132,-96"/>
+<polygon style="fill:blue;stroke:blue;" points="135.479,-96.584 135,-86 128.774,-94.5725 135.479,-96.584"/>
+</g>
+<!-- t11->t7 -->
+<g id="edge63" class="edge"><title>t11->t7</title>
+<path style="fill:none;stroke:red;" d="M108,-642C117,-642 125,-642 134,-642"/>
+<polygon style="fill:red;stroke:red;" points="134,-645.5 144,-642 134,-638.5 134,-645.5"/>
+</g>
+<!-- k1 -->
+<g id="node60" class="node"><title>k1</title>
+<polygon style="fill:none;stroke:orange;" points="556.125,-1329.38 556.125,-1344.62 513.875,-1355.38 454.125,-1355.38 411.875,-1344.62 411.875,-1329.38 454.125,-1318.62 513.875,-1318.62 556.125,-1329.38"/>
+<polygon style="fill:none;stroke:orange;" points="560.125,-1326.28 560.125,-1347.72 514.377,-1359.38 453.623,-1359.38 407.875,-1347.72 407.875,-1326.28 453.623,-1314.62 514.377,-1314.62 560.125,-1326.28"/>
+<polygon style="fill:none;stroke:orange;" points="564.125,-1323.17 564.125,-1350.83 514.879,-1363.38 453.121,-1363.38 403.875,-1350.83 403.875,-1323.17 453.121,-1310.62 514.879,-1310.62 564.125,-1323.17"/>
+<text text-anchor="middle" x="484" y="-1332.5" style="font-family:Times New Roman;font-size:15.00;fill:orange;">Final target</text>
+</g>
+<!-- k2 -->
+<g id="node61" class="node"><title>k2</title>
+<polygon style="fill:red;stroke:black;" points="540,-1288 428,-1288 428,-1252 540,-1252 540,-1288"/>
+<text text-anchor="middle" x="484" y="-1265.5" style="font-family:Times New Roman;font-size:15.00;">Vicious cycle</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge68" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:red;" d="M477,-1311C477,-1307 477,-1302 477,-1298"/>
+<polygon style="fill:red;stroke:red;" points="480.488,-1298.3 478,-1288 473.522,-1297.6 480.488,-1298.3"/>
+</g>
+<!-- k2->k1 -->
+<g id="edge70" class="edge"><title>k2->k1</title>
+<path style="fill:none;stroke:red;" d="M490,-1288C491,-1292 491,-1296 491,-1301"/>
+<polygon style="fill:red;stroke:red;" points="487.5,-1301 491,-1311 494.5,-1301 487.5,-1301"/>
+</g>
+<!-- k3 -->
+<g id="node62" class="node"><title>k3</title>
+<text text-anchor="middle" x="484" y="-1207.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Task to run</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge72" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M484,-1252C484,-1248 484,-1244 484,-1240"/>
+<polygon style="fill:blue;stroke:blue;" points="487.5,-1240 484,-1230 480.5,-1240 487.5,-1240"/>
+</g>
+<!-- k4 -->
+<g id="node63" class="node"><title>k4</title>
+<polygon style="fill:none;stroke:blue;" points="667.848,-1137.38 667.848,-1152.62 560.152,-1163.38 407.848,-1163.38 300.152,-1152.62 300.152,-1137.38 407.848,-1126.62 560.152,-1126.62 667.848,-1137.38"/>
+<polygon style="fill:none;stroke:blue;" points="671.848,-1133.76 671.848,-1156.24 560.352,-1167.38 407.648,-1167.38 296.152,-1156.24 296.152,-1133.76 407.648,-1122.62 560.352,-1122.62 671.848,-1133.76"/>
+<polygon style="fill:none;stroke:blue;" points="675.848,-1130.14 675.848,-1159.86 560.551,-1171.38 407.449,-1171.38 292.152,-1159.86 292.152,-1130.14 407.449,-1118.62 560.551,-1118.62 675.848,-1130.14"/>
+<text text-anchor="middle" x="484" y="-1140.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Force pipeline run from this task</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge73" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M484,-1194C484,-1190 484,-1186 484,-1181"/>
+<polygon style="fill:blue;stroke:blue;" points="487.5,-1181 484,-1171 480.5,-1181 487.5,-1181"/>
+</g>
+<!-- k5 -->
+<g id="node64" class="node"><title>k5</title>
+<polygon style="fill:none;stroke:gray;" points="616.229,-1061.38 616.229,-1076.62 538.771,-1087.38 429.229,-1087.38 351.771,-1076.62 351.771,-1061.38 429.229,-1050.62 538.771,-1050.62 616.229,-1061.38"/>
+<polygon style="fill:none;stroke:gray;" points="620.229,-1057.9 620.229,-1080.1 539.048,-1091.38 428.952,-1091.38 347.771,-1080.1 347.771,-1057.9 428.952,-1046.62 539.048,-1046.62 620.229,-1057.9"/>
+<polygon style="fill:none;stroke:gray;" points="624.229,-1054.42 624.229,-1083.58 539.325,-1095.38 428.675,-1095.38 343.771,-1083.58 343.771,-1054.42 428.675,-1042.62 539.325,-1042.62 624.229,-1054.42"/>
+<text text-anchor="middle" x="484" y="-1064.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date Final target</text>
+</g>
+<!-- k4->k5 -->
+<g id="edge74" class="edge"><title>k4->k5</title>
+<path style="fill:none;stroke:blue;" d="M484,-1118C484,-1114 484,-1110 484,-1106"/>
+<polygon style="fill:blue;stroke:blue;" points="487.5,-1106 484,-1096 480.5,-1106 487.5,-1106"/>
+</g>
+<!-- k6 -->
+<g id="node65" class="node"><title>k6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="660.07,-985.385 660.07,-1000.62 556.93,-1011.38 411.07,-1011.38 307.93,-1000.62 307.93,-985.385 411.07,-974.615 556.93,-974.615 660.07,-985.385"/>
+<polygon style="fill:none;stroke:olivedrab;" points="664.07,-981.781 664.07,-1004.22 557.139,-1015.38 410.861,-1015.38 303.93,-1004.22 303.93,-981.781 410.861,-970.615 557.139,-970.615 664.07,-981.781"/>
+<polygon style="fill:none;stroke:olivedrab;" points="668.07,-978.177 668.07,-1007.82 557.347,-1019.38 410.653,-1019.38 299.93,-1007.82 299.93,-978.177 410.653,-966.615 557.347,-966.615 668.07,-978.177"/>
+<text text-anchor="middle" x="484" y="-988.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Up-to-date task forced to rerun</text>
+</g>
+<!-- k5->k6 -->
+<g id="edge76" class="edge"><title>k5->k6</title>
+<path style="fill:none;stroke:gray;" d="M484,-1042C484,-1038 484,-1034 484,-1030"/>
+<polygon style="fill:gray;stroke:gray;" points="487.5,-1030 484,-1020 480.5,-1030 487.5,-1030"/>
+</g>
+<!-- k7 -->
+<g id="node66" class="node"><title>k7</title>
+<polygon style="fill:olivedrab;stroke:olivedrab;" points="576.839,-917.385 576.839,-932.615 522.455,-943.385 445.545,-943.385 391.161,-932.615 391.161,-917.385 445.545,-906.615 522.455,-906.615 576.839,-917.385"/>
+<text text-anchor="middle" x="484" y="-920.5" style="font-family:Times New Roman;font-size:15.00;">Up-to-date task</text>
+</g>
+<!-- k6->k7 -->
+<g id="edge77" class="edge"><title>k6->k7</title>
+<path style="fill:none;stroke:gray;" d="M484,-967C484,-963 484,-958 484,-954"/>
+<polygon style="fill:gray;stroke:gray;" points="487.5,-954 484,-944 480.5,-954 487.5,-954"/>
+</g>
+<!-- k8 -->
+<g id="node67" class="node"><title>k8</title>
+<polygon style="fill:white;stroke:gray;" points="621.179,-857.385 621.179,-872.615 540.821,-883.385 427.179,-883.385 346.821,-872.615 346.821,-857.385 427.179,-846.615 540.821,-846.615 621.179,-857.385"/>
+<text text-anchor="middle" x="484" y="-860.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date dependence</text>
+</g>
+<!-- k7->k8 -->
+<g id="edge78" class="edge"><title>k7->k8</title>
+<path style="fill:none;stroke:gray;" d="M484,-906C484,-902 484,-898 484,-893"/>
+<polygon style="fill:gray;stroke:gray;" points="487.5,-893 484,-883 480.5,-893 487.5,-893"/>
+</g>
+</g>
+</svg>
diff --git a/doc/complex_dags/svg/task17.svg b/doc/complex_dags/svg/task17.svg
new file mode 100644
index 0000000..fcfda0f
--- /dev/null
+++ b/doc/complex_dags/svg/task17.svg
@@ -0,0 +1,378 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Mar 30 10:09:11 UTC 2009)
+ For user: (lg) leo goodstadt -->
+<!-- Title: tree Pages: 1 -->
+<svg width="287pt" height="576pt"
+ viewBox="0.00 0.00 287.27 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.367347 0.367347) rotate(0) translate(4 1564)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1564 778,-1564 778,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1543 358,-1543 358,-16 8,-16"/>
+<text text-anchor="middle" x="183" y="-1512" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="366,-940 366,-1552 766,-1552 766,-940 366,-940"/>
+<text text-anchor="middle" x="566" y="-1521" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<text text-anchor="middle" x="221" y="-1467" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task1</text>
+</g>
+<!-- t2 -->
+<g id="node3" class="node"><title>t2</title>
+<polygon style="fill:none;stroke:olivedrab;" points="215.912,-1325.63 215.912,-1344.37 186.088,-1357.63 143.912,-1357.63 114.088,-1344.37 114.088,-1325.63 143.912,-1312.37 186.088,-1312.37 215.912,-1325.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="219.912,-1323.03 219.912,-1346.97 186.937,-1361.63 143.063,-1361.63 110.088,-1346.97 110.088,-1323.03 143.063,-1308.37 186.937,-1308.37 219.912,-1323.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="223.912,-1320.43 223.912,-1349.57 187.786,-1365.63 142.214,-1365.63 106.088,-1349.57 106.088,-1320.43 142.214,-1304.37 187.786,-1304.37 223.912,-1320.43"/>
+<text text-anchor="middle" x="165" y="-1329" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task7</text>
+</g>
+<!-- t0->t2 -->
+<g id="edge3" class="edge"><title>t0->t2</title>
+<path style="fill:none;stroke:blue;" d="M214,-1455C205,-1435 191,-1402 181,-1375"/>
+<polygon style="fill:blue;stroke:blue;" points="184.26,-1373.72 177,-1366 177.863,-1376.56 184.26,-1373.72"/>
+</g>
+<!-- t1 -->
+<g id="node13" class="node"><title>t1</title>
+<text text-anchor="middle" x="278" y="-1329" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task2</text>
+</g>
+<!-- t0->t1 -->
+<g id="edge13" class="edge"><title>t0->t1</title>
+<path style="fill:none;stroke:blue;" d="M228,-1455C238,-1432 256,-1390 267,-1363"/>
+<polygon style="fill:blue;stroke:blue;" points="270.536,-1363.58 271,-1353 264.036,-1360.98 270.536,-1363.58"/>
+</g>
+<!-- t3 -->
+<g id="node5" class="node"><title>t3</title>
+<polygon style="fill:none;stroke:olivedrab;" points="215.912,-1165.63 215.912,-1184.37 186.088,-1197.63 143.912,-1197.63 114.088,-1184.37 114.088,-1165.63 143.912,-1152.37 186.088,-1152.37 215.912,-1165.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="219.912,-1163.03 219.912,-1186.97 186.937,-1201.63 143.063,-1201.63 110.088,-1186.97 110.088,-1163.03 143.063,-1148.37 186.937,-1148.37 219.912,-1163.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="223.912,-1160.43 223.912,-1189.57 187.786,-1205.63 142.214,-1205.63 106.088,-1189.57 106.088,-1160.43 142.214,-1144.37 187.786,-1144.37 223.912,-1160.43"/>
+<text text-anchor="middle" x="165" y="-1169" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task8</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge5" class="edge"><title>t2->t3</title>
+<path style="fill:none;stroke:blue;" d="M165,-1304C165,-1279 165,-1243 165,-1216"/>
+<polygon style="fill:blue;stroke:blue;" points="168.5,-1216 165,-1206 161.5,-1216 168.5,-1216"/>
+</g>
+<!-- t4 -->
+<g id="node7" class="node"><title>t4</title>
+<text text-anchor="middle" x="165" y="-1021" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task9</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge7" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:blue;" d="M165,-1144C165,-1118 165,-1081 165,-1055"/>
+<polygon style="fill:blue;stroke:blue;" points="168.5,-1055 165,-1045 161.5,-1055 168.5,-1055"/>
+</g>
+<!-- t5 -->
+<g id="node9" class="node"><title>t5</title>
+<text text-anchor="middle" x="103" y="-902" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task10</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge9" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:blue;" d="M156,-1009C145,-990 129,-958 117,-935"/>
+<polygon style="fill:blue;stroke:blue;" points="119.916,-933.042 112,-926 113.797,-936.441 119.916,-933.042"/>
+</g>
+<!-- t15 -->
+<g id="node30" class="node"><title>t15</title>
+<text text-anchor="middle" x="81" y="-425" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task15</text>
+</g>
+<!-- t4->t15 -->
+<g id="edge33" class="edge"><title>t4->t15</title>
+<path style="fill:none;stroke:blue;" d="M166,-1009C168,-985 171,-943 171,-908 171,-908 171,-908 171,-867 171,-828 178,-816 161,-782 149,-756 132,-759 116,-736 70,-666 62,-643 47,-562 43,-534 41,-526 47,-500 50,-485 57,-470 64,-458"/>
+<polygon style="fill:blue;stroke:blue;" points="67.2031,-459.441 69,-449 61.084,-456.042 67.2031,-459.441"/>
+</g>
+<!-- t6 -->
+<g id="node11" class="node"><title>t6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="144.104,-803.627 144.104,-822.373 108.896,-835.627 59.1041,-835.627 23.8959,-822.373 23.8959,-803.627 59.1041,-790.373 108.896,-790.373 144.104,-803.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="148.104,-800.859 148.104,-825.141 109.624,-839.627 58.3761,-839.627 19.8959,-825.141 19.8959,-800.859 58.3761,-786.373 109.624,-786.373 148.104,-800.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="152.104,-798.091 152.104,-827.909 110.352,-843.627 57.6481,-843.627 15.8959,-827.909 15.8959,-798.091 57.6481,-782.373 110.352,-782.373 152.104,-798.091"/>
+<text text-anchor="middle" x="84" y="-807" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task11</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge11" class="edge"><title>t5->t6</title>
+<path style="fill:none;stroke:blue;" d="M99,-890C97,-880 95,-866 92,-854"/>
+<polygon style="fill:blue;stroke:blue;" points="95.3933,-853.119 90,-844 88.5292,-854.492 95.3933,-853.119"/>
+</g>
+<!-- t7 -->
+<g id="node23" class="node"><title>t7</title>
+<text text-anchor="middle" x="167" y="-712" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task12</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge25" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:blue;" d="M111,-782C122,-770 134,-756 144,-744"/>
+<polygon style="fill:blue;stroke:blue;" points="147.049,-745.831 151,-736 141.781,-741.221 147.049,-745.831"/>
+</g>
+<!-- t16 -->
+<g id="node33" class="node"><title>t16</title>
+<text text-anchor="middle" x="81" y="-333" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task16</text>
+</g>
+<!-- t6->t16 -->
+<g id="edge37" class="edge"><title>t6->t16</title>
+<path style="fill:none;stroke:blue;" d="M50,-785C34,-767 17,-744 17,-718 17,-718 17,-718 17,-431 17,-405 34,-381 51,-364"/>
+<polygon style="fill:blue;stroke:blue;" points="53.4038,-366.546 58,-357 48.454,-361.596 53.4038,-366.546"/>
+</g>
+<!-- t8 -->
+<g id="node15" class="node"><title>t8</title>
+<text text-anchor="middle" x="278" y="-1169" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task3</text>
+</g>
+<!-- t1->t8 -->
+<g id="edge15" class="edge"><title>t1->t8</title>
+<path style="fill:none;stroke:blue;" d="M278,-1317C278,-1289 278,-1236 278,-1203"/>
+<polygon style="fill:blue;stroke:blue;" points="281.5,-1203 278,-1193 274.5,-1203 281.5,-1203"/>
+</g>
+<!-- t9 -->
+<g id="node17" class="node"><title>t9</title>
+<text text-anchor="middle" x="261" y="-1021" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task4</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge17" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:blue;" d="M276,-1157C272,-1131 267,-1085 264,-1055"/>
+<polygon style="fill:blue;stroke:blue;" points="267.478,-1054.6 263,-1045 260.512,-1055.3 267.478,-1054.6"/>
+</g>
+<!-- t21 -->
+<g id="node44" class="node"><title>t21</title>
+<polygon style="fill:white;stroke:gray;" points="330.104,-329.627 330.104,-348.373 294.896,-361.627 245.104,-361.627 209.896,-348.373 209.896,-329.627 245.104,-316.373 294.896,-316.373 330.104,-329.627"/>
+<text text-anchor="middle" x="270" y="-333" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task21</text>
+</g>
+<!-- t8->t21 -->
+<g id="edge49" class="edge"><title>t8->t21</title>
+<path style="fill:none;stroke:gray;" d="M293,-1157C314,-1130 349,-1078 349,-1027 349,-1027 349,-1027 349,-431 349,-405 330,-381 310,-365"/>
+<polygon style="fill:gray;stroke:gray;" points="312.1,-362.2 302,-359 307.9,-367.8 312.1,-362.2"/>
+</g>
+<!-- t10 -->
+<g id="node19" class="node"><title>t10</title>
+<text text-anchor="middle" x="231" y="-902" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task5</text>
+</g>
+<!-- t9->t10 -->
+<g id="edge19" class="edge"><title>t9->t10</title>
+<path style="fill:none;stroke:blue;" d="M256,-1009C252,-990 244,-959 238,-936"/>
+<polygon style="fill:blue;stroke:blue;" points="241.393,-935.119 236,-926 234.529,-936.492 241.393,-935.119"/>
+</g>
+<!-- t11 -->
+<g id="node21" class="node"><title>t11</title>
+<text text-anchor="middle" x="227" y="-807" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task6</text>
+</g>
+<!-- t10->t11 -->
+<g id="edge21" class="edge"><title>t10->t11</title>
+<path style="fill:none;stroke:blue;" d="M230,-890C229,-876 228,-857 228,-841"/>
+<polygon style="fill:blue;stroke:blue;" points="231.5,-841 228,-831 224.5,-841 231.5,-841"/>
+</g>
+<!-- t11->t7 -->
+<g id="edge23" class="edge"><title>t11->t7</title>
+<path style="fill:none;stroke:blue;" d="M215,-795C207,-781 194,-761 184,-745"/>
+<polygon style="fill:blue;stroke:blue;" points="186.916,-743.042 179,-736 180.797,-746.441 186.916,-743.042"/>
+</g>
+<!-- t12 -->
+<g id="node26" class="node"><title>t12</title>
+<text text-anchor="middle" x="132" y="-625" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task13</text>
+</g>
+<!-- t7->t12 -->
+<g id="edge27" class="edge"><title>t7->t12</title>
+<path style="fill:none;stroke:blue;" d="M160,-700C155,-688 148,-672 143,-658"/>
+<polygon style="fill:blue;stroke:blue;" points="146.26,-656.717 139,-649 139.863,-659.56 146.26,-656.717"/>
+</g>
+<!-- t13 -->
+<g id="node38" class="node"><title>t13</title>
+<polygon style="fill:white;stroke:gray;" points="317.104,-621.627 317.104,-640.373 281.896,-653.627 232.104,-653.627 196.896,-640.373 196.896,-621.627 232.104,-608.373 281.896,-608.373 317.104,-621.627"/>
+<text text-anchor="middle" x="257" y="-625" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task18</text>
+</g>
+<!-- t7->t13 -->
+<g id="edge41" class="edge"><title>t7->t13</title>
+<path style="fill:none;stroke:gray;" d="M186,-700C198,-688 212,-674 225,-661"/>
+<polygon style="fill:gray;stroke:gray;" points="227.779,-663.219 233,-654 223.169,-657.951 227.779,-663.219"/>
+</g>
+<!-- t14 -->
+<g id="node28" class="node"><title>t14</title>
+<polygon style="fill:none;stroke:olivedrab;" points="184.104,-521.627 184.104,-540.373 148.896,-553.627 99.1041,-553.627 63.8959,-540.373 63.8959,-521.627 99.1041,-508.373 148.896,-508.373 184.104,-521.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="188.104,-518.859 188.104,-543.141 149.624,-557.627 98.3761,-557.627 59.8959,-543.141 59.8959,-518.859 98.3761,-504.373 149.624,-504.373 188.104,-518.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="192.104,-516.091 192.104,-545.909 150.352,-561.627 97.6481,-561.627 55.8959,-545.909 55.8959,-516.091 97.6481,-500.373 150.352,-500.373 192.104,-516.091"/>
+<text text-anchor="middle" x="124" y="-525" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task14</text>
+</g>
+<!-- t12->t14 -->
+<g id="edge29" class="edge"><title>t12->t14</title>
+<path style="fill:none;stroke:blue;" d="M131,-613C130,-602 129,-586 127,-572"/>
+<polygon style="fill:blue;stroke:blue;" points="130.478,-571.602 126,-562 123.512,-572.299 130.478,-571.602"/>
+</g>
+<!-- t14->t15 -->
+<g id="edge31" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:blue;" d="M111,-500C105,-486 99,-471 93,-458"/>
+<polygon style="fill:blue;stroke:blue;" points="96.2598,-456.717 89,-449 89.8631,-459.56 96.2598,-456.717"/>
+</g>
+<!-- t15->t16 -->
+<g id="edge35" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:blue;" d="M81,-413C81,-400 81,-382 81,-367"/>
+<polygon style="fill:blue;stroke:blue;" points="84.5001,-367 81,-357 77.5001,-367 84.5001,-367"/>
+</g>
+<!-- t17 -->
+<g id="node36" class="node"><title>t17</title>
+<polygon style="fill:none;stroke:orange;" points="154.104,-229.627 154.104,-248.373 118.896,-261.627 69.1041,-261.627 33.8959,-248.373 33.8959,-229.627 69.1041,-216.373 118.896,-216.373 154.104,-229.627"/>
+<polygon style="fill:none;stroke:orange;" points="158.104,-226.859 158.104,-251.141 119.624,-265.627 68.3761,-265.627 29.8959,-251.141 29.8959,-226.859 68.3761,-212.373 119.624,-212.373 158.104,-226.859"/>
+<polygon style="fill:none;stroke:orange;" points="162.104,-224.091 162.104,-253.909 120.352,-269.627 67.6481,-269.627 25.8959,-253.909 25.8959,-224.091 67.6481,-208.373 120.352,-208.373 162.104,-224.091"/>
+<text text-anchor="middle" x="94" y="-233" style="font-family:Times New Roman;font-size:20.00;fill:orange;">task17</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge39" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:blue;" d="M83,-321C85,-310 87,-294 89,-280"/>
+<polygon style="fill:blue;stroke:blue;" points="92.4875,-280.299 90,-270 85.5222,-279.602 92.4875,-280.299"/>
+</g>
+<!-- t18 -->
+<g id="node51" class="node"><title>t18</title>
+<polygon style="fill:white;stroke:gray;" points="162.104,-129.627 162.104,-148.373 126.896,-161.627 77.1041,-161.627 41.8959,-148.373 41.8959,-129.627 77.1041,-116.373 126.896,-116.373 162.104,-129.627"/>
+<text text-anchor="middle" x="102" y="-133" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task24</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge55" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:gray;" d="M96,-208C98,-197 98,-184 99,-172"/>
+<polygon style="fill:gray;stroke:gray;" points="102.488,-172.299 100,-162 95.5222,-171.602 102.488,-172.299"/>
+</g>
+<!-- t19 -->
+<g id="node40" class="node"><title>t19</title>
+<polygon style="fill:white;stroke:gray;" points="330.104,-521.627 330.104,-540.373 294.896,-553.627 245.104,-553.627 209.896,-540.373 209.896,-521.627 245.104,-508.373 294.896,-508.373 330.104,-521.627"/>
+<text text-anchor="middle" x="270" y="-525" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task19</text>
+</g>
+<!-- t13->t19 -->
+<g id="edge43" class="edge"><title>t13->t19</title>
+<path style="fill:none;stroke:gray;" d="M260,-608C262,-595 264,-578 266,-564"/>
+<polygon style="fill:gray;stroke:gray;" points="269.488,-564.299 267,-554 262.522,-563.602 269.488,-564.299"/>
+</g>
+<!-- t20 -->
+<g id="node42" class="node"><title>t20</title>
+<polygon style="fill:white;stroke:gray;" points="330.104,-421.627 330.104,-440.373 294.896,-453.627 245.104,-453.627 209.896,-440.373 209.896,-421.627 245.104,-408.373 294.896,-408.373 330.104,-421.627"/>
+<text text-anchor="middle" x="270" y="-425" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task20</text>
+</g>
+<!-- t19->t20 -->
+<g id="edge45" class="edge"><title>t19->t20</title>
+<path style="fill:none;stroke:gray;" d="M270,-508C270,-495 270,-479 270,-464"/>
+<polygon style="fill:gray;stroke:gray;" points="273.5,-464 270,-454 266.5,-464 273.5,-464"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge47" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:gray;" d="M270,-408C270,-397 270,-384 270,-372"/>
+<polygon style="fill:gray;stroke:gray;" points="273.5,-372 270,-362 266.5,-372 273.5,-372"/>
+</g>
+<!-- t22 -->
+<g id="node47" class="node"><title>t22</title>
+<polygon style="fill:white;stroke:gray;" points="305.104,-229.627 305.104,-248.373 269.896,-261.627 220.104,-261.627 184.896,-248.373 184.896,-229.627 220.104,-216.373 269.896,-216.373 305.104,-229.627"/>
+<text text-anchor="middle" x="245" y="-233" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task22</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge51" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:gray;" d="M264,-316C261,-303 257,-286 253,-272"/>
+<polygon style="fill:gray;stroke:gray;" points="256.393,-271.119 251,-262 249.529,-272.492 256.393,-271.119"/>
+</g>
+<!-- t23 -->
+<g id="node49" class="node"><title>t23</title>
+<polygon style="fill:white;stroke:gray;" points="301.104,-129.627 301.104,-148.373 265.896,-161.627 216.104,-161.627 180.896,-148.373 180.896,-129.627 216.104,-116.373 265.896,-116.373 301.104,-129.627"/>
+<text text-anchor="middle" x="241" y="-133" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task23</text>
+</g>
+<!-- t22->t23 -->
+<g id="edge53" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:gray;" d="M244,-216C243,-203 243,-187 243,-172"/>
+<polygon style="fill:gray;stroke:gray;" points="246.478,-171.602 242,-162 239.512,-172.299 246.478,-171.602"/>
+</g>
+<!-- t24 -->
+<g id="node53" class="node"><title>t24</title>
+<polygon style="fill:white;stroke:gray;" points="202.104,-37.6274 202.104,-56.3726 166.896,-69.6274 117.104,-69.6274 81.8959,-56.3726 81.8959,-37.6274 117.104,-24.3726 166.896,-24.3726 202.104,-37.6274"/>
+<text text-anchor="middle" x="142" y="-41" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task25</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge57" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:gray;" d="M217,-116C204,-104 188,-90 175,-77"/>
+<polygon style="fill:gray;stroke:gray;" points="176.831,-73.9511 167,-70 172.221,-79.2191 176.831,-73.9511"/>
+</g>
+<!-- t18->t24 -->
+<g id="edge59" class="edge"><title>t18->t24</title>
+<path style="fill:none;stroke:gray;" d="M112,-116C117,-105 123,-91 128,-79"/>
+<polygon style="fill:gray;stroke:gray;" points="131.137,-80.5596 132,-70 124.74,-77.7166 131.137,-80.5596"/>
+</g>
+<!-- k1 -->
+<g id="node57" class="node"><title>k1</title>
+<polygon style="fill:none;stroke:orange;" points="638.125,-1465.38 638.125,-1480.62 595.875,-1491.38 536.125,-1491.38 493.875,-1480.62 493.875,-1465.38 536.125,-1454.62 595.875,-1454.62 638.125,-1465.38"/>
+<polygon style="fill:none;stroke:orange;" points="642.125,-1462.28 642.125,-1483.72 596.377,-1495.38 535.623,-1495.38 489.875,-1483.72 489.875,-1462.28 535.623,-1450.62 596.377,-1450.62 642.125,-1462.28"/>
+<polygon style="fill:none;stroke:orange;" points="646.125,-1459.17 646.125,-1486.83 596.879,-1499.38 535.121,-1499.38 485.875,-1486.83 485.875,-1459.17 535.121,-1446.62 596.879,-1446.62 646.125,-1459.17"/>
+<text text-anchor="middle" x="566" y="-1468.5" style="font-family:Times New Roman;font-size:15.00;fill:orange;">Final target</text>
+</g>
+<!-- k2 -->
+<g id="node58" class="node"><title>k2</title>
+<polygon style="fill:red;stroke:black;" points="622,-1424 510,-1424 510,-1388 622,-1388 622,-1424"/>
+<text text-anchor="middle" x="566" y="-1401.5" style="font-family:Times New Roman;font-size:15.00;">Vicious cycle</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge62" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:red;" d="M559,-1447C559,-1443 559,-1438 559,-1434"/>
+<polygon style="fill:red;stroke:red;" points="562.488,-1434.3 560,-1424 555.522,-1433.6 562.488,-1434.3"/>
+</g>
+<!-- k2->k1 -->
+<g id="edge64" class="edge"><title>k2->k1</title>
+<path style="fill:none;stroke:red;" d="M572,-1424C573,-1428 573,-1432 573,-1437"/>
+<polygon style="fill:red;stroke:red;" points="569.5,-1437 573,-1447 576.5,-1437 569.5,-1437"/>
+</g>
+<!-- k3 -->
+<g id="node59" class="node"><title>k3</title>
+<text text-anchor="middle" x="566" y="-1330.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Task to run</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge66" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M566,-1388C566,-1380 566,-1372 566,-1363"/>
+<polygon style="fill:blue;stroke:blue;" points="569.5,-1363 566,-1353 562.5,-1363 569.5,-1363"/>
+</g>
+<!-- k4 -->
+<g id="node60" class="node"><title>k4</title>
+<polygon style="fill:none;stroke:blue;" points="749.848,-1247.38 749.848,-1262.62 642.152,-1273.38 489.848,-1273.38 382.152,-1262.62 382.152,-1247.38 489.848,-1236.62 642.152,-1236.62 749.848,-1247.38"/>
+<polygon style="fill:none;stroke:blue;" points="753.848,-1243.76 753.848,-1266.24 642.352,-1277.38 489.648,-1277.38 378.152,-1266.24 378.152,-1243.76 489.648,-1232.62 642.352,-1232.62 753.848,-1243.76"/>
+<polygon style="fill:none;stroke:blue;" points="757.848,-1240.14 757.848,-1269.86 642.551,-1281.38 489.449,-1281.38 374.152,-1269.86 374.152,-1240.14 489.449,-1228.62 642.551,-1228.62 757.848,-1240.14"/>
+<text text-anchor="middle" x="566" y="-1250.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Force pipeline run from this task</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge67" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M566,-1317C566,-1309 566,-1301 566,-1292"/>
+<polygon style="fill:blue;stroke:blue;" points="569.5,-1292 566,-1282 562.5,-1292 569.5,-1292"/>
+</g>
+<!-- k5 -->
+<g id="node61" class="node"><title>k5</title>
+<polygon style="fill:none;stroke:gray;" points="698.229,-1167.38 698.229,-1182.62 620.771,-1193.38 511.229,-1193.38 433.771,-1182.62 433.771,-1167.38 511.229,-1156.62 620.771,-1156.62 698.229,-1167.38"/>
+<polygon style="fill:none;stroke:gray;" points="702.229,-1163.9 702.229,-1186.1 621.048,-1197.38 510.952,-1197.38 429.771,-1186.1 429.771,-1163.9 510.952,-1152.62 621.048,-1152.62 702.229,-1163.9"/>
+<polygon style="fill:none;stroke:gray;" points="706.229,-1160.42 706.229,-1189.58 621.325,-1201.38 510.675,-1201.38 425.771,-1189.58 425.771,-1160.42 510.675,-1148.62 621.325,-1148.62 706.229,-1160.42"/>
+<text text-anchor="middle" x="566" y="-1170.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date Final target</text>
+</g>
+<!-- k4->k5 -->
+<g id="edge68" class="edge"><title>k4->k5</title>
+<path style="fill:none;stroke:blue;" d="M566,-1228C566,-1222 566,-1217 566,-1211"/>
+<polygon style="fill:blue;stroke:blue;" points="569.5,-1211 566,-1201 562.5,-1211 569.5,-1211"/>
+</g>
+<!-- k6 -->
+<g id="node62" class="node"><title>k6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="742.07,-1087.38 742.07,-1102.62 638.93,-1113.38 493.07,-1113.38 389.93,-1102.62 389.93,-1087.38 493.07,-1076.62 638.93,-1076.62 742.07,-1087.38"/>
+<polygon style="fill:none;stroke:olivedrab;" points="746.07,-1083.78 746.07,-1106.22 639.139,-1117.38 492.861,-1117.38 385.93,-1106.22 385.93,-1083.78 492.861,-1072.62 639.139,-1072.62 746.07,-1083.78"/>
+<polygon style="fill:none;stroke:olivedrab;" points="750.07,-1080.18 750.07,-1109.82 639.347,-1121.38 492.653,-1121.38 381.93,-1109.82 381.93,-1080.18 492.653,-1068.62 639.347,-1068.62 750.07,-1080.18"/>
+<text text-anchor="middle" x="566" y="-1090.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Up-to-date task forced to rerun</text>
+</g>
+<!-- k5->k6 -->
+<g id="edge70" class="edge"><title>k5->k6</title>
+<path style="fill:none;stroke:gray;" d="M566,-1148C566,-1142 566,-1137 566,-1131"/>
+<polygon style="fill:gray;stroke:gray;" points="569.5,-1131 566,-1121 562.5,-1131 569.5,-1131"/>
+</g>
+<!-- k7 -->
+<g id="node63" class="node"><title>k7</title>
+<polygon style="fill:olivedrab;stroke:olivedrab;" points="658.839,-1019.38 658.839,-1034.62 604.455,-1045.38 527.545,-1045.38 473.161,-1034.62 473.161,-1019.38 527.545,-1008.62 604.455,-1008.62 658.839,-1019.38"/>
+<text text-anchor="middle" x="566" y="-1022.5" style="font-family:Times New Roman;font-size:15.00;">Up-to-date task</text>
+</g>
+<!-- k6->k7 -->
+<g id="edge71" class="edge"><title>k6->k7</title>
+<path style="fill:none;stroke:gray;" d="M566,-1069C566,-1065 566,-1060 566,-1056"/>
+<polygon style="fill:gray;stroke:gray;" points="569.5,-1056 566,-1046 562.5,-1056 569.5,-1056"/>
+</g>
+<!-- k8 -->
+<g id="node64" class="node"><title>k8</title>
+<polygon style="fill:white;stroke:gray;" points="703.179,-959.385 703.179,-974.615 622.821,-985.385 509.179,-985.385 428.821,-974.615 428.821,-959.385 509.179,-948.615 622.821,-948.615 703.179,-959.385"/>
+<text text-anchor="middle" x="566" y="-962.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date dependence</text>
+</g>
+<!-- k7->k8 -->
+<g id="edge72" class="edge"><title>k7->k8</title>
+<path style="fill:none;stroke:gray;" d="M566,-1008C566,-1004 566,-1000 566,-995"/>
+<polygon style="fill:gray;stroke:gray;" points="569.5,-995 566,-985 562.5,-995 569.5,-995"/>
+</g>
+</g>
+</svg>
diff --git a/doc/complex_dags/svg/task17_from_task9.svg b/doc/complex_dags/svg/task17_from_task9.svg
new file mode 100644
index 0000000..2352940
--- /dev/null
+++ b/doc/complex_dags/svg/task17_from_task9.svg
@@ -0,0 +1,381 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Mar 30 10:09:11 UTC 2009)
+ For user: (lg) leo goodstadt -->
+<!-- Title: tree Pages: 1 -->
+<svg width="283pt" height="576pt"
+ viewBox="0.00 0.00 282.93 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.361809 0.361809) rotate(0) translate(4 1588)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1588 778,-1588 778,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1567 358,-1567 358,-16 8,-16"/>
+<text text-anchor="middle" x="183" y="-1536" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="366,-940 366,-1576 766,-1576 766,-940 366,-940"/>
+<text text-anchor="middle" x="566" y="-1545" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<text text-anchor="middle" x="224" y="-1491" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task1</text>
+</g>
+<!-- t2 -->
+<g id="node3" class="node"><title>t2</title>
+<polygon style="fill:none;stroke:olivedrab;" points="218.912,-1349.63 218.912,-1368.37 189.088,-1381.63 146.912,-1381.63 117.088,-1368.37 117.088,-1349.63 146.912,-1336.37 189.088,-1336.37 218.912,-1349.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="222.912,-1347.03 222.912,-1370.97 189.937,-1385.63 146.063,-1385.63 113.088,-1370.97 113.088,-1347.03 146.063,-1332.37 189.937,-1332.37 222.912,-1347.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="226.912,-1344.43 226.912,-1373.57 190.786,-1389.63 145.214,-1389.63 109.088,-1373.57 109.088,-1344.43 145.214,-1328.37 190.786,-1328.37 226.912,-1344.43"/>
+<text text-anchor="middle" x="168" y="-1353" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task7</text>
+</g>
+<!-- t0->t2 -->
+<g id="edge3" class="edge"><title>t0->t2</title>
+<path style="fill:none;stroke:blue;" d="M217,-1479C208,-1459 194,-1426 184,-1399"/>
+<polygon style="fill:blue;stroke:blue;" points="187.26,-1397.72 180,-1390 180.863,-1400.56 187.26,-1397.72"/>
+</g>
+<!-- t1 -->
+<g id="node13" class="node"><title>t1</title>
+<text text-anchor="middle" x="281" y="-1353" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task2</text>
+</g>
+<!-- t0->t1 -->
+<g id="edge13" class="edge"><title>t0->t1</title>
+<path style="fill:none;stroke:blue;" d="M231,-1479C241,-1456 259,-1414 270,-1387"/>
+<polygon style="fill:blue;stroke:blue;" points="273.536,-1387.58 274,-1377 267.036,-1384.98 273.536,-1387.58"/>
+</g>
+<!-- t3 -->
+<g id="node5" class="node"><title>t3</title>
+<polygon style="fill:none;stroke:olivedrab;" points="218.912,-1189.63 218.912,-1208.37 189.088,-1221.63 146.912,-1221.63 117.088,-1208.37 117.088,-1189.63 146.912,-1176.37 189.088,-1176.37 218.912,-1189.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="222.912,-1187.03 222.912,-1210.97 189.937,-1225.63 146.063,-1225.63 113.088,-1210.97 113.088,-1187.03 146.063,-1172.37 189.937,-1172.37 222.912,-1187.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="226.912,-1184.43 226.912,-1213.57 190.786,-1229.63 145.214,-1229.63 109.088,-1213.57 109.088,-1184.43 145.214,-1168.37 190.786,-1168.37 226.912,-1184.43"/>
+<text text-anchor="middle" x="168" y="-1193" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task8</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge5" class="edge"><title>t2->t3</title>
+<path style="fill:none;stroke:blue;" d="M168,-1328C168,-1303 168,-1267 168,-1240"/>
+<polygon style="fill:blue;stroke:blue;" points="171.5,-1240 168,-1230 164.5,-1240 171.5,-1240"/>
+</g>
+<!-- t4 -->
+<g id="node7" class="node"><title>t4</title>
+<polygon style="fill:none;stroke:blue;" points="218.912,-1029.63 218.912,-1048.37 189.088,-1061.63 146.912,-1061.63 117.088,-1048.37 117.088,-1029.63 146.912,-1016.37 189.088,-1016.37 218.912,-1029.63"/>
+<polygon style="fill:none;stroke:blue;" points="222.912,-1027.03 222.912,-1050.97 189.937,-1065.63 146.063,-1065.63 113.088,-1050.97 113.088,-1027.03 146.063,-1012.37 189.937,-1012.37 222.912,-1027.03"/>
+<polygon style="fill:none;stroke:blue;" points="226.912,-1024.43 226.912,-1053.57 190.786,-1069.63 145.214,-1069.63 109.088,-1053.57 109.088,-1024.43 145.214,-1008.37 190.786,-1008.37 226.912,-1024.43"/>
+<text text-anchor="middle" x="168" y="-1033" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task9</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge7" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:blue;" d="M168,-1168C168,-1143 168,-1107 168,-1080"/>
+<polygon style="fill:blue;stroke:blue;" points="171.5,-1080 168,-1070 164.5,-1080 171.5,-1080"/>
+</g>
+<!-- t5 -->
+<g id="node9" class="node"><title>t5</title>
+<text text-anchor="middle" x="103" y="-902" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task10</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge9" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:blue;" d="M153,-1008C142,-986 127,-956 117,-935"/>
+<polygon style="fill:blue;stroke:blue;" points="119.916,-933.042 112,-926 113.797,-936.441 119.916,-933.042"/>
+</g>
+<!-- t15 -->
+<g id="node30" class="node"><title>t15</title>
+<text text-anchor="middle" x="81" y="-425" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task15</text>
+</g>
+<!-- t4->t15 -->
+<g id="edge33" class="edge"><title>t4->t15</title>
+<path style="fill:none;stroke:blue;" d="M169,-1008C170,-982 171,-942 171,-908 171,-908 171,-908 171,-867 171,-828 174,-817 161,-782 139,-718 110,-714 80,-654 61,-614 55,-604 47,-562 43,-534 41,-526 47,-500 50,-485 57,-470 64,-458"/>
+<polygon style="fill:blue;stroke:blue;" points="67.2031,-459.441 69,-449 61.084,-456.042 67.2031,-459.441"/>
+</g>
+<!-- t6 -->
+<g id="node11" class="node"><title>t6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="144.104,-803.627 144.104,-822.373 108.896,-835.627 59.1041,-835.627 23.8959,-822.373 23.8959,-803.627 59.1041,-790.373 108.896,-790.373 144.104,-803.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="148.104,-800.859 148.104,-825.141 109.624,-839.627 58.3761,-839.627 19.8959,-825.141 19.8959,-800.859 58.3761,-786.373 109.624,-786.373 148.104,-800.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="152.104,-798.091 152.104,-827.909 110.352,-843.627 57.6481,-843.627 15.8959,-827.909 15.8959,-798.091 57.6481,-782.373 110.352,-782.373 152.104,-798.091"/>
+<text text-anchor="middle" x="84" y="-807" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task11</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge11" class="edge"><title>t5->t6</title>
+<path style="fill:none;stroke:blue;" d="M99,-890C97,-880 95,-866 92,-854"/>
+<polygon style="fill:blue;stroke:blue;" points="95.3933,-853.119 90,-844 88.5292,-854.492 95.3933,-853.119"/>
+</g>
+<!-- t7 -->
+<g id="node23" class="node"><title>t7</title>
+<text text-anchor="middle" x="205" y="-712" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task12</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge25" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:blue;" d="M119,-786C136,-772 157,-756 174,-742"/>
+<polygon style="fill:blue;stroke:blue;" points="176.1,-744.8 182,-736 171.9,-739.2 176.1,-744.8"/>
+</g>
+<!-- t16 -->
+<g id="node33" class="node"><title>t16</title>
+<text text-anchor="middle" x="81" y="-333" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task16</text>
+</g>
+<!-- t6->t16 -->
+<g id="edge37" class="edge"><title>t6->t16</title>
+<path style="fill:none;stroke:blue;" d="M50,-785C34,-767 17,-744 17,-718 17,-718 17,-718 17,-431 17,-405 34,-381 51,-364"/>
+<polygon style="fill:blue;stroke:blue;" points="53.4038,-366.546 58,-357 48.454,-361.596 53.4038,-366.546"/>
+</g>
+<!-- t8 -->
+<g id="node15" class="node"><title>t8</title>
+<text text-anchor="middle" x="281" y="-1193" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task3</text>
+</g>
+<!-- t1->t8 -->
+<g id="edge15" class="edge"><title>t1->t8</title>
+<path style="fill:none;stroke:blue;" d="M281,-1341C281,-1313 281,-1260 281,-1227"/>
+<polygon style="fill:blue;stroke:blue;" points="284.5,-1227 281,-1217 277.5,-1227 284.5,-1227"/>
+</g>
+<!-- t9 -->
+<g id="node17" class="node"><title>t9</title>
+<text text-anchor="middle" x="281" y="-1033" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task4</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge17" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:blue;" d="M281,-1181C281,-1153 281,-1100 281,-1067"/>
+<polygon style="fill:blue;stroke:blue;" points="284.5,-1067 281,-1057 277.5,-1067 284.5,-1067"/>
+</g>
+<!-- t21 -->
+<g id="node44" class="node"><title>t21</title>
+<polygon style="fill:white;stroke:gray;" points="330.104,-329.627 330.104,-348.373 294.896,-361.627 245.104,-361.627 209.896,-348.373 209.896,-329.627 245.104,-316.373 294.896,-316.373 330.104,-329.627"/>
+<text text-anchor="middle" x="270" y="-333" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task21</text>
+</g>
+<!-- t8->t21 -->
+<g id="edge49" class="edge"><title>t8->t21</title>
+<path style="fill:none;stroke:gray;" d="M294,-1181C314,-1152 349,-1094 349,-1039 349,-1039 349,-1039 349,-431 349,-405 330,-381 310,-365"/>
+<polygon style="fill:gray;stroke:gray;" points="312.1,-362.2 302,-359 307.9,-367.8 312.1,-362.2"/>
+</g>
+<!-- t10 -->
+<g id="node19" class="node"><title>t10</title>
+<text text-anchor="middle" x="243" y="-902" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task5</text>
+</g>
+<!-- t9->t10 -->
+<g id="edge19" class="edge"><title>t9->t10</title>
+<path style="fill:none;stroke:blue;" d="M276,-1021C269,-999 258,-962 251,-936"/>
+<polygon style="fill:blue;stroke:blue;" points="254.226,-934.573 248,-926 247.521,-936.584 254.226,-934.573"/>
+</g>
+<!-- t11 -->
+<g id="node21" class="node"><title>t11</title>
+<text text-anchor="middle" x="236" y="-807" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task6</text>
+</g>
+<!-- t10->t11 -->
+<g id="edge21" class="edge"><title>t10->t11</title>
+<path style="fill:none;stroke:blue;" d="M242,-890C241,-876 240,-857 238,-841"/>
+<polygon style="fill:blue;stroke:blue;" points="241.478,-840.602 237,-831 234.512,-841.299 241.478,-840.602"/>
+</g>
+<!-- t11->t7 -->
+<g id="edge23" class="edge"><title>t11->t7</title>
+<path style="fill:none;stroke:blue;" d="M230,-795C226,-781 219,-762 214,-746"/>
+<polygon style="fill:blue;stroke:blue;" points="217.226,-744.573 211,-736 210.521,-746.584 217.226,-744.573"/>
+</g>
+<!-- t12 -->
+<g id="node26" class="node"><title>t12</title>
+<text text-anchor="middle" x="131" y="-625" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task13</text>
+</g>
+<!-- t7->t12 -->
+<g id="edge27" class="edge"><title>t7->t12</title>
+<path style="fill:none;stroke:blue;" d="M190,-700C179,-687 165,-671 153,-657"/>
+<polygon style="fill:blue;stroke:blue;" points="155.8,-654.9 147,-649 150.2,-659.1 155.8,-654.9"/>
+</g>
+<!-- t13 -->
+<g id="node38" class="node"><title>t13</title>
+<polygon style="fill:white;stroke:gray;" points="324.104,-621.627 324.104,-640.373 288.896,-653.627 239.104,-653.627 203.896,-640.373 203.896,-621.627 239.104,-608.373 288.896,-608.373 324.104,-621.627"/>
+<text text-anchor="middle" x="264" y="-625" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task18</text>
+</g>
+<!-- t7->t13 -->
+<g id="edge41" class="edge"><title>t7->t13</title>
+<path style="fill:none;stroke:gray;" d="M217,-700C224,-689 234,-675 243,-662"/>
+<polygon style="fill:gray;stroke:gray;" points="245.8,-664.1 249,-654 240.2,-659.9 245.8,-664.1"/>
+</g>
+<!-- t14 -->
+<g id="node28" class="node"><title>t14</title>
+<polygon style="fill:none;stroke:olivedrab;" points="184.104,-521.627 184.104,-540.373 148.896,-553.627 99.1041,-553.627 63.8959,-540.373 63.8959,-521.627 99.1041,-508.373 148.896,-508.373 184.104,-521.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="188.104,-518.859 188.104,-543.141 149.624,-557.627 98.3761,-557.627 59.8959,-543.141 59.8959,-518.859 98.3761,-504.373 149.624,-504.373 188.104,-518.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="192.104,-516.091 192.104,-545.909 150.352,-561.627 97.6481,-561.627 55.8959,-545.909 55.8959,-516.091 97.6481,-500.373 150.352,-500.373 192.104,-516.091"/>
+<text text-anchor="middle" x="124" y="-525" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task14</text>
+</g>
+<!-- t12->t14 -->
+<g id="edge29" class="edge"><title>t12->t14</title>
+<path style="fill:none;stroke:blue;" d="M130,-613C129,-602 128,-586 127,-572"/>
+<polygon style="fill:blue;stroke:blue;" points="130.478,-571.602 126,-562 123.512,-572.299 130.478,-571.602"/>
+</g>
+<!-- t14->t15 -->
+<g id="edge31" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:blue;" d="M111,-500C105,-486 99,-471 93,-458"/>
+<polygon style="fill:blue;stroke:blue;" points="96.2598,-456.717 89,-449 89.8631,-459.56 96.2598,-456.717"/>
+</g>
+<!-- t15->t16 -->
+<g id="edge35" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:blue;" d="M81,-413C81,-400 81,-382 81,-367"/>
+<polygon style="fill:blue;stroke:blue;" points="84.5001,-367 81,-357 77.5001,-367 84.5001,-367"/>
+</g>
+<!-- t17 -->
+<g id="node36" class="node"><title>t17</title>
+<polygon style="fill:none;stroke:orange;" points="154.104,-229.627 154.104,-248.373 118.896,-261.627 69.1041,-261.627 33.8959,-248.373 33.8959,-229.627 69.1041,-216.373 118.896,-216.373 154.104,-229.627"/>
+<polygon style="fill:none;stroke:orange;" points="158.104,-226.859 158.104,-251.141 119.624,-265.627 68.3761,-265.627 29.8959,-251.141 29.8959,-226.859 68.3761,-212.373 119.624,-212.373 158.104,-226.859"/>
+<polygon style="fill:none;stroke:orange;" points="162.104,-224.091 162.104,-253.909 120.352,-269.627 67.6481,-269.627 25.8959,-253.909 25.8959,-224.091 67.6481,-208.373 120.352,-208.373 162.104,-224.091"/>
+<text text-anchor="middle" x="94" y="-233" style="font-family:Times New Roman;font-size:20.00;fill:orange;">task17</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge39" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:blue;" d="M83,-321C85,-310 87,-294 89,-280"/>
+<polygon style="fill:blue;stroke:blue;" points="92.4875,-280.299 90,-270 85.5222,-279.602 92.4875,-280.299"/>
+</g>
+<!-- t18 -->
+<g id="node51" class="node"><title>t18</title>
+<polygon style="fill:white;stroke:gray;" points="162.104,-129.627 162.104,-148.373 126.896,-161.627 77.1041,-161.627 41.8959,-148.373 41.8959,-129.627 77.1041,-116.373 126.896,-116.373 162.104,-129.627"/>
+<text text-anchor="middle" x="102" y="-133" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task24</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge55" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:gray;" d="M96,-208C98,-197 98,-184 99,-172"/>
+<polygon style="fill:gray;stroke:gray;" points="102.488,-172.299 100,-162 95.5222,-171.602 102.488,-172.299"/>
+</g>
+<!-- t19 -->
+<g id="node40" class="node"><title>t19</title>
+<polygon style="fill:white;stroke:gray;" points="330.104,-521.627 330.104,-540.373 294.896,-553.627 245.104,-553.627 209.896,-540.373 209.896,-521.627 245.104,-508.373 294.896,-508.373 330.104,-521.627"/>
+<text text-anchor="middle" x="270" y="-525" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task19</text>
+</g>
+<!-- t13->t19 -->
+<g id="edge43" class="edge"><title>t13->t19</title>
+<path style="fill:none;stroke:gray;" d="M265,-608C266,-595 267,-579 268,-564"/>
+<polygon style="fill:gray;stroke:gray;" points="271.488,-564.299 269,-554 264.522,-563.602 271.488,-564.299"/>
+</g>
+<!-- t20 -->
+<g id="node42" class="node"><title>t20</title>
+<polygon style="fill:white;stroke:gray;" points="330.104,-421.627 330.104,-440.373 294.896,-453.627 245.104,-453.627 209.896,-440.373 209.896,-421.627 245.104,-408.373 294.896,-408.373 330.104,-421.627"/>
+<text text-anchor="middle" x="270" y="-425" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task20</text>
+</g>
+<!-- t19->t20 -->
+<g id="edge45" class="edge"><title>t19->t20</title>
+<path style="fill:none;stroke:gray;" d="M270,-508C270,-495 270,-479 270,-464"/>
+<polygon style="fill:gray;stroke:gray;" points="273.5,-464 270,-454 266.5,-464 273.5,-464"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge47" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:gray;" d="M270,-408C270,-397 270,-384 270,-372"/>
+<polygon style="fill:gray;stroke:gray;" points="273.5,-372 270,-362 266.5,-372 273.5,-372"/>
+</g>
+<!-- t22 -->
+<g id="node47" class="node"><title>t22</title>
+<polygon style="fill:white;stroke:gray;" points="305.104,-229.627 305.104,-248.373 269.896,-261.627 220.104,-261.627 184.896,-248.373 184.896,-229.627 220.104,-216.373 269.896,-216.373 305.104,-229.627"/>
+<text text-anchor="middle" x="245" y="-233" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task22</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge51" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:gray;" d="M264,-316C261,-303 257,-286 253,-272"/>
+<polygon style="fill:gray;stroke:gray;" points="256.393,-271.119 251,-262 249.529,-272.492 256.393,-271.119"/>
+</g>
+<!-- t23 -->
+<g id="node49" class="node"><title>t23</title>
+<polygon style="fill:white;stroke:gray;" points="301.104,-129.627 301.104,-148.373 265.896,-161.627 216.104,-161.627 180.896,-148.373 180.896,-129.627 216.104,-116.373 265.896,-116.373 301.104,-129.627"/>
+<text text-anchor="middle" x="241" y="-133" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task23</text>
+</g>
+<!-- t22->t23 -->
+<g id="edge53" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:gray;" d="M244,-216C243,-203 243,-187 243,-172"/>
+<polygon style="fill:gray;stroke:gray;" points="246.478,-171.602 242,-162 239.512,-172.299 246.478,-171.602"/>
+</g>
+<!-- t24 -->
+<g id="node53" class="node"><title>t24</title>
+<polygon style="fill:white;stroke:gray;" points="202.104,-37.6274 202.104,-56.3726 166.896,-69.6274 117.104,-69.6274 81.8959,-56.3726 81.8959,-37.6274 117.104,-24.3726 166.896,-24.3726 202.104,-37.6274"/>
+<text text-anchor="middle" x="142" y="-41" style="font-family:Times New Roman;font-size:20.00;fill:gray;">task25</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge57" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:gray;" d="M217,-116C204,-104 188,-90 175,-77"/>
+<polygon style="fill:gray;stroke:gray;" points="176.831,-73.9511 167,-70 172.221,-79.2191 176.831,-73.9511"/>
+</g>
+<!-- t18->t24 -->
+<g id="edge59" class="edge"><title>t18->t24</title>
+<path style="fill:none;stroke:gray;" d="M112,-116C117,-105 123,-91 128,-79"/>
+<polygon style="fill:gray;stroke:gray;" points="131.137,-80.5596 132,-70 124.74,-77.7166 131.137,-80.5596"/>
+</g>
+<!-- k1 -->
+<g id="node57" class="node"><title>k1</title>
+<polygon style="fill:none;stroke:orange;" points="638.125,-1489.38 638.125,-1504.62 595.875,-1515.38 536.125,-1515.38 493.875,-1504.62 493.875,-1489.38 536.125,-1478.62 595.875,-1478.62 638.125,-1489.38"/>
+<polygon style="fill:none;stroke:orange;" points="642.125,-1486.28 642.125,-1507.72 596.377,-1519.38 535.623,-1519.38 489.875,-1507.72 489.875,-1486.28 535.623,-1474.62 596.377,-1474.62 642.125,-1486.28"/>
+<polygon style="fill:none;stroke:orange;" points="646.125,-1483.17 646.125,-1510.83 596.879,-1523.38 535.121,-1523.38 485.875,-1510.83 485.875,-1483.17 535.121,-1470.62 596.879,-1470.62 646.125,-1483.17"/>
+<text text-anchor="middle" x="566" y="-1492.5" style="font-family:Times New Roman;font-size:15.00;fill:orange;">Final target</text>
+</g>
+<!-- k2 -->
+<g id="node58" class="node"><title>k2</title>
+<polygon style="fill:red;stroke:black;" points="622,-1448 510,-1448 510,-1412 622,-1412 622,-1448"/>
+<text text-anchor="middle" x="566" y="-1425.5" style="font-family:Times New Roman;font-size:15.00;">Vicious cycle</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge62" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:red;" d="M559,-1471C559,-1467 559,-1462 559,-1458"/>
+<polygon style="fill:red;stroke:red;" points="562.488,-1458.3 560,-1448 555.522,-1457.6 562.488,-1458.3"/>
+</g>
+<!-- k2->k1 -->
+<g id="edge64" class="edge"><title>k2->k1</title>
+<path style="fill:none;stroke:red;" d="M572,-1448C573,-1452 573,-1456 573,-1461"/>
+<polygon style="fill:red;stroke:red;" points="569.5,-1461 573,-1471 576.5,-1461 569.5,-1461"/>
+</g>
+<!-- k3 -->
+<g id="node59" class="node"><title>k3</title>
+<text text-anchor="middle" x="566" y="-1354.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Task to run</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge66" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M566,-1412C566,-1404 566,-1396 566,-1387"/>
+<polygon style="fill:blue;stroke:blue;" points="569.5,-1387 566,-1377 562.5,-1387 569.5,-1387"/>
+</g>
+<!-- k4 -->
+<g id="node60" class="node"><title>k4</title>
+<polygon style="fill:none;stroke:blue;" points="749.848,-1271.38 749.848,-1286.62 642.152,-1297.38 489.848,-1297.38 382.152,-1286.62 382.152,-1271.38 489.848,-1260.62 642.152,-1260.62 749.848,-1271.38"/>
+<polygon style="fill:none;stroke:blue;" points="753.848,-1267.76 753.848,-1290.24 642.352,-1301.38 489.648,-1301.38 378.152,-1290.24 378.152,-1267.76 489.648,-1256.62 642.352,-1256.62 753.848,-1267.76"/>
+<polygon style="fill:none;stroke:blue;" points="757.848,-1264.14 757.848,-1293.86 642.551,-1305.38 489.449,-1305.38 374.152,-1293.86 374.152,-1264.14 489.449,-1252.62 642.551,-1252.62 757.848,-1264.14"/>
+<text text-anchor="middle" x="566" y="-1274.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Force pipeline run from this task</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge67" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M566,-1341C566,-1333 566,-1325 566,-1316"/>
+<polygon style="fill:blue;stroke:blue;" points="569.5,-1316 566,-1306 562.5,-1316 569.5,-1316"/>
+</g>
+<!-- k5 -->
+<g id="node61" class="node"><title>k5</title>
+<polygon style="fill:none;stroke:gray;" points="698.229,-1191.38 698.229,-1206.62 620.771,-1217.38 511.229,-1217.38 433.771,-1206.62 433.771,-1191.38 511.229,-1180.62 620.771,-1180.62 698.229,-1191.38"/>
+<polygon style="fill:none;stroke:gray;" points="702.229,-1187.9 702.229,-1210.1 621.048,-1221.38 510.952,-1221.38 429.771,-1210.1 429.771,-1187.9 510.952,-1176.62 621.048,-1176.62 702.229,-1187.9"/>
+<polygon style="fill:none;stroke:gray;" points="706.229,-1184.42 706.229,-1213.58 621.325,-1225.38 510.675,-1225.38 425.771,-1213.58 425.771,-1184.42 510.675,-1172.62 621.325,-1172.62 706.229,-1184.42"/>
+<text text-anchor="middle" x="566" y="-1194.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date Final target</text>
+</g>
+<!-- k4->k5 -->
+<g id="edge68" class="edge"><title>k4->k5</title>
+<path style="fill:none;stroke:blue;" d="M566,-1252C566,-1246 566,-1241 566,-1235"/>
+<polygon style="fill:blue;stroke:blue;" points="569.5,-1235 566,-1225 562.5,-1235 569.5,-1235"/>
+</g>
+<!-- k6 -->
+<g id="node62" class="node"><title>k6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="742.07,-1111.38 742.07,-1126.62 638.93,-1137.38 493.07,-1137.38 389.93,-1126.62 389.93,-1111.38 493.07,-1100.62 638.93,-1100.62 742.07,-1111.38"/>
+<polygon style="fill:none;stroke:olivedrab;" points="746.07,-1107.78 746.07,-1130.22 639.139,-1141.38 492.861,-1141.38 385.93,-1130.22 385.93,-1107.78 492.861,-1096.62 639.139,-1096.62 746.07,-1107.78"/>
+<polygon style="fill:none;stroke:olivedrab;" points="750.07,-1104.18 750.07,-1133.82 639.347,-1145.38 492.653,-1145.38 381.93,-1133.82 381.93,-1104.18 492.653,-1092.62 639.347,-1092.62 750.07,-1104.18"/>
+<text text-anchor="middle" x="566" y="-1114.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Up-to-date task forced to rerun</text>
+</g>
+<!-- k5->k6 -->
+<g id="edge70" class="edge"><title>k5->k6</title>
+<path style="fill:none;stroke:gray;" d="M566,-1172C566,-1166 566,-1161 566,-1155"/>
+<polygon style="fill:gray;stroke:gray;" points="569.5,-1155 566,-1145 562.5,-1155 569.5,-1155"/>
+</g>
+<!-- k7 -->
+<g id="node63" class="node"><title>k7</title>
+<polygon style="fill:olivedrab;stroke:olivedrab;" points="658.839,-1031.38 658.839,-1046.62 604.455,-1057.38 527.545,-1057.38 473.161,-1046.62 473.161,-1031.38 527.545,-1020.62 604.455,-1020.62 658.839,-1031.38"/>
+<text text-anchor="middle" x="566" y="-1034.5" style="font-family:Times New Roman;font-size:15.00;">Up-to-date task</text>
+</g>
+<!-- k6->k7 -->
+<g id="edge71" class="edge"><title>k6->k7</title>
+<path style="fill:none;stroke:gray;" d="M566,-1092C566,-1084 566,-1076 566,-1068"/>
+<polygon style="fill:gray;stroke:gray;" points="569.5,-1068 566,-1058 562.5,-1068 569.5,-1068"/>
+</g>
+<!-- k8 -->
+<g id="node64" class="node"><title>k8</title>
+<polygon style="fill:white;stroke:gray;" points="703.179,-959.385 703.179,-974.615 622.821,-985.385 509.179,-985.385 428.821,-974.615 428.821,-959.385 509.179,-948.615 622.821,-948.615 703.179,-959.385"/>
+<text text-anchor="middle" x="566" y="-962.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date dependence</text>
+</g>
+<!-- k7->k8 -->
+<g id="edge72" class="edge"><title>k7->k8</title>
+<path style="fill:none;stroke:gray;" d="M566,-1020C566,-1013 566,-1004 566,-996"/>
+<polygon style="fill:gray;stroke:gray;" points="569.5,-996 566,-986 562.5,-996 569.5,-996"/>
+</g>
+</g>
+</svg>
diff --git a/doc/complex_dags/svg/task25_from_task9.svg b/doc/complex_dags/svg/task25_from_task9.svg
new file mode 100644
index 0000000..d227604
--- /dev/null
+++ b/doc/complex_dags/svg/task25_from_task9.svg
@@ -0,0 +1,376 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Mar 30 10:09:11 UTC 2009)
+ For user: (lg) leo goodstadt -->
+<!-- Title: tree Pages: 1 -->
+<svg width="274pt" height="576pt"
+ viewBox="0.00 0.00 274.04 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.367347 0.367347) rotate(0) translate(4 1564)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1564 742,-1564 742,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1543 322,-1543 322,-16 8,-16"/>
+<text text-anchor="middle" x="165" y="-1512" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="330,-916 330,-1552 730,-1552 730,-916 330,-916"/>
+<text text-anchor="middle" x="530" y="-1521" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<text text-anchor="middle" x="197" y="-1467" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task1</text>
+</g>
+<!-- t2 -->
+<g id="node3" class="node"><title>t2</title>
+<polygon style="fill:none;stroke:olivedrab;" points="191.912,-1325.63 191.912,-1344.37 162.088,-1357.63 119.912,-1357.63 90.0883,-1344.37 90.0883,-1325.63 119.912,-1312.37 162.088,-1312.37 191.912,-1325.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="195.912,-1323.03 195.912,-1346.97 162.937,-1361.63 119.063,-1361.63 86.0883,-1346.97 86.0883,-1323.03 119.063,-1308.37 162.937,-1308.37 195.912,-1323.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="199.912,-1320.43 199.912,-1349.57 163.786,-1365.63 118.214,-1365.63 82.0883,-1349.57 82.0883,-1320.43 118.214,-1304.37 163.786,-1304.37 199.912,-1320.43"/>
+<text text-anchor="middle" x="141" y="-1329" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task7</text>
+</g>
+<!-- t0->t2 -->
+<g id="edge3" class="edge"><title>t0->t2</title>
+<path style="fill:none;stroke:blue;" d="M190,-1455C181,-1435 167,-1402 157,-1375"/>
+<polygon style="fill:blue;stroke:blue;" points="160.26,-1373.72 153,-1366 153.863,-1376.56 160.26,-1373.72"/>
+</g>
+<!-- t1 -->
+<g id="node13" class="node"><title>t1</title>
+<text text-anchor="middle" x="254" y="-1329" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task2</text>
+</g>
+<!-- t0->t1 -->
+<g id="edge13" class="edge"><title>t0->t1</title>
+<path style="fill:none;stroke:blue;" d="M204,-1455C214,-1432 232,-1390 243,-1363"/>
+<polygon style="fill:blue;stroke:blue;" points="246.536,-1363.58 247,-1353 240.036,-1360.98 246.536,-1363.58"/>
+</g>
+<!-- t3 -->
+<g id="node5" class="node"><title>t3</title>
+<polygon style="fill:none;stroke:olivedrab;" points="191.912,-1165.63 191.912,-1184.37 162.088,-1197.63 119.912,-1197.63 90.0883,-1184.37 90.0883,-1165.63 119.912,-1152.37 162.088,-1152.37 191.912,-1165.63"/>
+<polygon style="fill:none;stroke:olivedrab;" points="195.912,-1163.03 195.912,-1186.97 162.937,-1201.63 119.063,-1201.63 86.0883,-1186.97 86.0883,-1163.03 119.063,-1148.37 162.937,-1148.37 195.912,-1163.03"/>
+<polygon style="fill:none;stroke:olivedrab;" points="199.912,-1160.43 199.912,-1189.57 163.786,-1205.63 118.214,-1205.63 82.0883,-1189.57 82.0883,-1160.43 118.214,-1144.37 163.786,-1144.37 199.912,-1160.43"/>
+<text text-anchor="middle" x="141" y="-1169" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task8</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge5" class="edge"><title>t2->t3</title>
+<path style="fill:none;stroke:blue;" d="M141,-1304C141,-1279 141,-1243 141,-1216"/>
+<polygon style="fill:blue;stroke:blue;" points="144.5,-1216 141,-1206 137.5,-1216 144.5,-1216"/>
+</g>
+<!-- t4 -->
+<g id="node7" class="node"><title>t4</title>
+<polygon style="fill:none;stroke:blue;" points="191.912,-1005.63 191.912,-1024.37 162.088,-1037.63 119.912,-1037.63 90.0883,-1024.37 90.0883,-1005.63 119.912,-992.373 162.088,-992.373 191.912,-1005.63"/>
+<polygon style="fill:none;stroke:blue;" points="195.912,-1003.03 195.912,-1026.97 162.937,-1041.63 119.063,-1041.63 86.0883,-1026.97 86.0883,-1003.03 119.063,-988.373 162.937,-988.373 195.912,-1003.03"/>
+<polygon style="fill:none;stroke:blue;" points="199.912,-1000.43 199.912,-1029.57 163.786,-1045.63 118.214,-1045.63 82.0883,-1029.57 82.0883,-1000.43 118.214,-984.373 163.786,-984.373 199.912,-1000.43"/>
+<text text-anchor="middle" x="141" y="-1009" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task9</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge7" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:blue;" d="M141,-1144C141,-1119 141,-1083 141,-1056"/>
+<polygon style="fill:blue;stroke:blue;" points="144.5,-1056 141,-1046 137.5,-1056 144.5,-1056"/>
+</g>
+<!-- t5 -->
+<g id="node9" class="node"><title>t5</title>
+<text text-anchor="middle" x="103" y="-878" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task10</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge9" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:blue;" d="M132,-984C126,-963 117,-933 111,-912"/>
+<polygon style="fill:blue;stroke:blue;" points="114.226,-910.573 108,-902 107.521,-912.584 114.226,-910.573"/>
+</g>
+<!-- t15 -->
+<g id="node43" class="node"><title>t15</title>
+<text text-anchor="middle" x="81" y="-403" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task15</text>
+</g>
+<!-- t4->t15 -->
+<g id="edge47" class="edge"><title>t4->t15</title>
+<path style="fill:none;stroke:blue;" d="M152,-984C161,-958 171,-919 171,-884 171,-884 171,-884 171,-843 171,-804 177,-792 161,-758 150,-732 135,-735 119,-712 76,-645 63,-625 47,-548 42,-521 42,-512 47,-486 51,-469 58,-451 65,-436"/>
+<polygon style="fill:blue;stroke:blue;" points="68.2031,-437.441 70,-427 62.084,-434.042 68.2031,-437.441"/>
+</g>
+<!-- t6 -->
+<g id="node11" class="node"><title>t6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="144.104,-779.627 144.104,-798.373 108.896,-811.627 59.1041,-811.627 23.8959,-798.373 23.8959,-779.627 59.1041,-766.373 108.896,-766.373 144.104,-779.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="148.104,-776.859 148.104,-801.141 109.624,-815.627 58.3761,-815.627 19.8959,-801.141 19.8959,-776.859 58.3761,-762.373 109.624,-762.373 148.104,-776.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="152.104,-774.091 152.104,-803.909 110.352,-819.627 57.6481,-819.627 15.8959,-803.909 15.8959,-774.091 57.6481,-758.373 110.352,-758.373 152.104,-774.091"/>
+<text text-anchor="middle" x="84" y="-783" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task11</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge11" class="edge"><title>t5->t6</title>
+<path style="fill:none;stroke:blue;" d="M99,-866C97,-856 95,-842 92,-830"/>
+<polygon style="fill:blue;stroke:blue;" points="95.3933,-829.119 90,-820 88.5292,-830.492 95.3933,-829.119"/>
+</g>
+<!-- t7 -->
+<g id="node23" class="node"><title>t7</title>
+<text text-anchor="middle" x="170" y="-688" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task12</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge25" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:blue;" d="M111,-759C122,-747 136,-732 147,-720"/>
+<polygon style="fill:blue;stroke:blue;" points="150.049,-721.831 154,-712 144.781,-717.221 150.049,-721.831"/>
+</g>
+<!-- t16 -->
+<g id="node46" class="node"><title>t16</title>
+<text text-anchor="middle" x="81" y="-308" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task16</text>
+</g>
+<!-- t6->t16 -->
+<g id="edge51" class="edge"><title>t6->t16</title>
+<path style="fill:none;stroke:blue;" d="M50,-761C34,-743 17,-720 17,-694 17,-694 17,-694 17,-409 17,-382 35,-357 52,-339"/>
+<polygon style="fill:blue;stroke:blue;" points="54.4038,-341.546 59,-332 49.454,-336.596 54.4038,-341.546"/>
+</g>
+<!-- t8 -->
+<g id="node15" class="node"><title>t8</title>
+<text text-anchor="middle" x="254" y="-1169" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task3</text>
+</g>
+<!-- t1->t8 -->
+<g id="edge15" class="edge"><title>t1->t8</title>
+<path style="fill:none;stroke:blue;" d="M254,-1317C254,-1289 254,-1236 254,-1203"/>
+<polygon style="fill:blue;stroke:blue;" points="257.5,-1203 254,-1193 250.5,-1203 257.5,-1203"/>
+</g>
+<!-- t9 -->
+<g id="node17" class="node"><title>t9</title>
+<text text-anchor="middle" x="254" y="-1009" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task4</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge17" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:blue;" d="M254,-1157C254,-1129 254,-1076 254,-1043"/>
+<polygon style="fill:blue;stroke:blue;" points="257.5,-1043 254,-1033 250.5,-1043 257.5,-1043"/>
+</g>
+<!-- t21 -->
+<g id="node32" class="node"><title>t21</title>
+<text text-anchor="middle" x="226" y="-308" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task21</text>
+</g>
+<!-- t8->t21 -->
+<g id="edge35" class="edge"><title>t8->t21</title>
+<path style="fill:none;stroke:blue;" d="M265,-1157C283,-1128 313,-1069 313,-1015 313,-1015 313,-1015 313,-409 313,-379 290,-355 267,-338"/>
+<polygon style="fill:blue;stroke:blue;" points="269.1,-335.2 259,-332 264.9,-340.8 269.1,-335.2"/>
+</g>
+<!-- t10 -->
+<g id="node19" class="node"><title>t10</title>
+<text text-anchor="middle" x="230" y="-878" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task5</text>
+</g>
+<!-- t9->t10 -->
+<g id="edge19" class="edge"><title>t9->t10</title>
+<path style="fill:none;stroke:blue;" d="M251,-997C247,-975 240,-938 235,-912"/>
+<polygon style="fill:blue;stroke:blue;" points="238.393,-911.119 233,-902 231.529,-912.492 238.393,-911.119"/>
+</g>
+<!-- t11 -->
+<g id="node21" class="node"><title>t11</title>
+<text text-anchor="middle" x="227" y="-783" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task6</text>
+</g>
+<!-- t10->t11 -->
+<g id="edge21" class="edge"><title>t10->t11</title>
+<path style="fill:none;stroke:blue;" d="M229,-866C229,-852 228,-833 228,-817"/>
+<polygon style="fill:blue;stroke:blue;" points="231.5,-817 228,-807 224.5,-817 231.5,-817"/>
+</g>
+<!-- t11->t7 -->
+<g id="edge23" class="edge"><title>t11->t7</title>
+<path style="fill:none;stroke:blue;" d="M216,-771C208,-757 196,-737 186,-721"/>
+<polygon style="fill:blue;stroke:blue;" points="188.916,-719.042 181,-712 182.797,-722.441 188.916,-719.042"/>
+</g>
+<!-- t13 -->
+<g id="node26" class="node"><title>t13</title>
+<text text-anchor="middle" x="239" y="-606" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task18</text>
+</g>
+<!-- t7->t13 -->
+<g id="edge27" class="edge"><title>t7->t13</title>
+<path style="fill:none;stroke:blue;" d="M185,-676C195,-665 207,-651 217,-638"/>
+<polygon style="fill:blue;stroke:blue;" points="220.049,-639.831 224,-630 214.781,-635.221 220.049,-639.831"/>
+</g>
+<!-- t12 -->
+<g id="node39" class="node"><title>t12</title>
+<text text-anchor="middle" x="133" y="-606" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task13</text>
+</g>
+<!-- t7->t12 -->
+<g id="edge41" class="edge"><title>t7->t12</title>
+<path style="fill:none;stroke:blue;" d="M162,-676C157,-666 151,-652 145,-639"/>
+<polygon style="fill:blue;stroke:blue;" points="148.26,-637.717 141,-630 141.863,-640.56 148.26,-637.717"/>
+</g>
+<!-- t19 -->
+<g id="node28" class="node"><title>t19</title>
+<text text-anchor="middle" x="252" y="-511" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task19</text>
+</g>
+<!-- t13->t19 -->
+<g id="edge29" class="edge"><title>t13->t19</title>
+<path style="fill:none;stroke:blue;" d="M242,-594C244,-580 246,-561 248,-545"/>
+<polygon style="fill:blue;stroke:blue;" points="251.488,-545.299 249,-535 244.522,-544.602 251.488,-545.299"/>
+</g>
+<!-- t20 -->
+<g id="node30" class="node"><title>t20</title>
+<polygon style="fill:none;stroke:olivedrab;" points="286.104,-399.627 286.104,-418.373 250.896,-431.627 201.104,-431.627 165.896,-418.373 165.896,-399.627 201.104,-386.373 250.896,-386.373 286.104,-399.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="290.104,-396.859 290.104,-421.141 251.624,-435.627 200.376,-435.627 161.896,-421.141 161.896,-396.859 200.376,-382.373 251.624,-382.373 290.104,-396.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="294.104,-394.091 294.104,-423.909 252.352,-439.627 199.648,-439.627 157.896,-423.909 157.896,-394.091 199.648,-378.373 252.352,-378.373 294.104,-394.091"/>
+<text text-anchor="middle" x="226" y="-403" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task20</text>
+</g>
+<!-- t19->t20 -->
+<g id="edge31" class="edge"><title>t19->t20</title>
+<path style="fill:none;stroke:blue;" d="M248,-499C245,-486 240,-467 236,-450"/>
+<polygon style="fill:blue;stroke:blue;" points="239.226,-448.573 233,-440 232.521,-450.584 239.226,-448.573"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge33" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:blue;" d="M226,-378C226,-367 226,-354 226,-342"/>
+<polygon style="fill:blue;stroke:blue;" points="229.5,-342 226,-332 222.5,-342 229.5,-342"/>
+</g>
+<!-- t22 -->
+<g id="node35" class="node"><title>t22</title>
+<text text-anchor="middle" x="195" y="-226" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task22</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge37" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:blue;" d="M219,-296C215,-286 210,-272 205,-259"/>
+<polygon style="fill:blue;stroke:blue;" points="208.483,-258.38 202,-250 201.842,-260.594 208.483,-258.38"/>
+</g>
+<!-- t23 -->
+<g id="node37" class="node"><title>t23</title>
+<text text-anchor="middle" x="187" y="-144" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task23</text>
+</g>
+<!-- t22->t23 -->
+<g id="edge39" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:blue;" d="M193,-214C192,-204 191,-190 190,-178"/>
+<polygon style="fill:blue;stroke:blue;" points="193.478,-177.602 189,-168 186.512,-178.299 193.478,-177.602"/>
+</g>
+<!-- t24 -->
+<g id="node53" class="node"><title>t24</title>
+<polygon style="fill:none;stroke:orange;" points="174.104,-45.6274 174.104,-64.3726 138.896,-77.6274 89.1041,-77.6274 53.8959,-64.3726 53.8959,-45.6274 89.1041,-32.3726 138.896,-32.3726 174.104,-45.6274"/>
+<polygon style="fill:none;stroke:orange;" points="178.104,-42.8592 178.104,-67.1408 139.624,-81.6274 88.3761,-81.6274 49.8959,-67.1408 49.8959,-42.8592 88.3761,-28.3726 139.624,-28.3726 178.104,-42.8592"/>
+<polygon style="fill:none;stroke:orange;" points="182.104,-40.091 182.104,-69.909 140.352,-85.6274 87.6481,-85.6274 45.8959,-69.909 45.8959,-40.091 87.6481,-24.3726 140.352,-24.3726 182.104,-40.091"/>
+<text text-anchor="middle" x="114" y="-49" style="font-family:Times New Roman;font-size:20.00;fill:orange;">task25</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge57" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:blue;" d="M173,-132C165,-121 154,-107 144,-94"/>
+<polygon style="fill:blue;stroke:blue;" points="146.8,-91.9 138,-86 141.2,-96.1 146.8,-91.9"/>
+</g>
+<!-- t14 -->
+<g id="node41" class="node"><title>t14</title>
+<polygon style="fill:none;stroke:olivedrab;" points="184.104,-507.627 184.104,-526.373 148.896,-539.627 99.1041,-539.627 63.8959,-526.373 63.8959,-507.627 99.1041,-494.373 148.896,-494.373 184.104,-507.627"/>
+<polygon style="fill:none;stroke:olivedrab;" points="188.104,-504.859 188.104,-529.141 149.624,-543.627 98.3761,-543.627 59.8959,-529.141 59.8959,-504.859 98.3761,-490.373 149.624,-490.373 188.104,-504.859"/>
+<polygon style="fill:none;stroke:olivedrab;" points="192.104,-502.091 192.104,-531.909 150.352,-547.627 97.6481,-547.627 55.8959,-531.909 55.8959,-502.091 97.6481,-486.373 150.352,-486.373 192.104,-502.091"/>
+<text text-anchor="middle" x="124" y="-511" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task14</text>
+</g>
+<!-- t12->t14 -->
+<g id="edge43" class="edge"><title>t12->t14</title>
+<path style="fill:none;stroke:blue;" d="M131,-594C130,-584 129,-571 128,-558"/>
+<polygon style="fill:blue;stroke:blue;" points="131.478,-557.602 127,-548 124.512,-558.299 131.478,-557.602"/>
+</g>
+<!-- t14->t15 -->
+<g id="edge45" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:blue;" d="M112,-486C105,-470 98,-451 92,-436"/>
+<polygon style="fill:blue;stroke:blue;" points="95.2598,-434.717 88,-427 88.8631,-437.56 95.2598,-434.717"/>
+</g>
+<!-- t15->t16 -->
+<g id="edge49" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:blue;" d="M81,-391C81,-377 81,-358 81,-342"/>
+<polygon style="fill:blue;stroke:blue;" points="84.5001,-342 81,-332 77.5001,-342 84.5001,-342"/>
+</g>
+<!-- t17 -->
+<g id="node49" class="node"><title>t17</title>
+<text text-anchor="middle" x="83" y="-226" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task17</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge53" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:blue;" d="M81,-296C82,-286 82,-272 82,-260"/>
+<polygon style="fill:blue;stroke:blue;" points="85.4875,-260.299 83,-250 78.5222,-259.602 85.4875,-260.299"/>
+</g>
+<!-- t18 -->
+<g id="node51" class="node"><title>t18</title>
+<text text-anchor="middle" x="83" y="-144" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task24</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge55" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:blue;" d="M83,-214C83,-204 83,-190 83,-178"/>
+<polygon style="fill:blue;stroke:blue;" points="86.5001,-178 83,-168 79.5001,-178 86.5001,-178"/>
+</g>
+<!-- t18->t24 -->
+<g id="edge59" class="edge"><title>t18->t24</title>
+<path style="fill:none;stroke:blue;" d="M89,-132C92,-122 97,-108 101,-96"/>
+<polygon style="fill:blue;stroke:blue;" points="104.479,-96.584 104,-86 97.7741,-94.5725 104.479,-96.584"/>
+</g>
+<!-- k1 -->
+<g id="node57" class="node"><title>k1</title>
+<polygon style="fill:none;stroke:orange;" points="602.125,-1465.38 602.125,-1480.62 559.875,-1491.38 500.125,-1491.38 457.875,-1480.62 457.875,-1465.38 500.125,-1454.62 559.875,-1454.62 602.125,-1465.38"/>
+<polygon style="fill:none;stroke:orange;" points="606.125,-1462.28 606.125,-1483.72 560.377,-1495.38 499.623,-1495.38 453.875,-1483.72 453.875,-1462.28 499.623,-1450.62 560.377,-1450.62 606.125,-1462.28"/>
+<polygon style="fill:none;stroke:orange;" points="610.125,-1459.17 610.125,-1486.83 560.879,-1499.38 499.121,-1499.38 449.875,-1486.83 449.875,-1459.17 499.121,-1446.62 560.879,-1446.62 610.125,-1459.17"/>
+<text text-anchor="middle" x="530" y="-1468.5" style="font-family:Times New Roman;font-size:15.00;fill:orange;">Final target</text>
+</g>
+<!-- k2 -->
+<g id="node58" class="node"><title>k2</title>
+<polygon style="fill:red;stroke:black;" points="586,-1424 474,-1424 474,-1388 586,-1388 586,-1424"/>
+<text text-anchor="middle" x="530" y="-1401.5" style="font-family:Times New Roman;font-size:15.00;">Vicious cycle</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge62" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:red;" d="M523,-1447C523,-1443 523,-1438 523,-1434"/>
+<polygon style="fill:red;stroke:red;" points="526.488,-1434.3 524,-1424 519.522,-1433.6 526.488,-1434.3"/>
+</g>
+<!-- k2->k1 -->
+<g id="edge64" class="edge"><title>k2->k1</title>
+<path style="fill:none;stroke:red;" d="M536,-1424C537,-1428 537,-1432 537,-1437"/>
+<polygon style="fill:red;stroke:red;" points="533.5,-1437 537,-1447 540.5,-1437 533.5,-1437"/>
+</g>
+<!-- k3 -->
+<g id="node59" class="node"><title>k3</title>
+<text text-anchor="middle" x="530" y="-1330.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Task to run</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge66" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M530,-1388C530,-1380 530,-1372 530,-1363"/>
+<polygon style="fill:blue;stroke:blue;" points="533.5,-1363 530,-1353 526.5,-1363 533.5,-1363"/>
+</g>
+<!-- k4 -->
+<g id="node60" class="node"><title>k4</title>
+<polygon style="fill:none;stroke:blue;" points="713.848,-1247.38 713.848,-1262.62 606.152,-1273.38 453.848,-1273.38 346.152,-1262.62 346.152,-1247.38 453.848,-1236.62 606.152,-1236.62 713.848,-1247.38"/>
+<polygon style="fill:none;stroke:blue;" points="717.848,-1243.76 717.848,-1266.24 606.352,-1277.38 453.648,-1277.38 342.152,-1266.24 342.152,-1243.76 453.648,-1232.62 606.352,-1232.62 717.848,-1243.76"/>
+<polygon style="fill:none;stroke:blue;" points="721.848,-1240.14 721.848,-1269.86 606.551,-1281.38 453.449,-1281.38 338.152,-1269.86 338.152,-1240.14 453.449,-1228.62 606.551,-1228.62 721.848,-1240.14"/>
+<text text-anchor="middle" x="530" y="-1250.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Force pipeline run from this task</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge67" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M530,-1317C530,-1309 530,-1301 530,-1292"/>
+<polygon style="fill:blue;stroke:blue;" points="533.5,-1292 530,-1282 526.5,-1292 533.5,-1292"/>
+</g>
+<!-- k5 -->
+<g id="node61" class="node"><title>k5</title>
+<polygon style="fill:none;stroke:gray;" points="662.229,-1167.38 662.229,-1182.62 584.771,-1193.38 475.229,-1193.38 397.771,-1182.62 397.771,-1167.38 475.229,-1156.62 584.771,-1156.62 662.229,-1167.38"/>
+<polygon style="fill:none;stroke:gray;" points="666.229,-1163.9 666.229,-1186.1 585.048,-1197.38 474.952,-1197.38 393.771,-1186.1 393.771,-1163.9 474.952,-1152.62 585.048,-1152.62 666.229,-1163.9"/>
+<polygon style="fill:none;stroke:gray;" points="670.229,-1160.42 670.229,-1189.58 585.325,-1201.38 474.675,-1201.38 389.771,-1189.58 389.771,-1160.42 474.675,-1148.62 585.325,-1148.62 670.229,-1160.42"/>
+<text text-anchor="middle" x="530" y="-1170.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date Final target</text>
+</g>
+<!-- k4->k5 -->
+<g id="edge68" class="edge"><title>k4->k5</title>
+<path style="fill:none;stroke:blue;" d="M530,-1228C530,-1222 530,-1217 530,-1211"/>
+<polygon style="fill:blue;stroke:blue;" points="533.5,-1211 530,-1201 526.5,-1211 533.5,-1211"/>
+</g>
+<!-- k6 -->
+<g id="node62" class="node"><title>k6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="706.07,-1087.38 706.07,-1102.62 602.93,-1113.38 457.07,-1113.38 353.93,-1102.62 353.93,-1087.38 457.07,-1076.62 602.93,-1076.62 706.07,-1087.38"/>
+<polygon style="fill:none;stroke:olivedrab;" points="710.07,-1083.78 710.07,-1106.22 603.139,-1117.38 456.861,-1117.38 349.93,-1106.22 349.93,-1083.78 456.861,-1072.62 603.139,-1072.62 710.07,-1083.78"/>
+<polygon style="fill:none;stroke:olivedrab;" points="714.07,-1080.18 714.07,-1109.82 603.347,-1121.38 456.653,-1121.38 345.93,-1109.82 345.93,-1080.18 456.653,-1068.62 603.347,-1068.62 714.07,-1080.18"/>
+<text text-anchor="middle" x="530" y="-1090.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Up-to-date task forced to rerun</text>
+</g>
+<!-- k5->k6 -->
+<g id="edge70" class="edge"><title>k5->k6</title>
+<path style="fill:none;stroke:gray;" d="M530,-1148C530,-1142 530,-1137 530,-1131"/>
+<polygon style="fill:gray;stroke:gray;" points="533.5,-1131 530,-1121 526.5,-1131 533.5,-1131"/>
+</g>
+<!-- k7 -->
+<g id="node63" class="node"><title>k7</title>
+<polygon style="fill:olivedrab;stroke:olivedrab;" points="622.839,-1007.38 622.839,-1022.62 568.455,-1033.38 491.545,-1033.38 437.161,-1022.62 437.161,-1007.38 491.545,-996.615 568.455,-996.615 622.839,-1007.38"/>
+<text text-anchor="middle" x="530" y="-1010.5" style="font-family:Times New Roman;font-size:15.00;">Up-to-date task</text>
+</g>
+<!-- k6->k7 -->
+<g id="edge71" class="edge"><title>k6->k7</title>
+<path style="fill:none;stroke:gray;" d="M530,-1068C530,-1060 530,-1052 530,-1044"/>
+<polygon style="fill:gray;stroke:gray;" points="533.5,-1044 530,-1034 526.5,-1044 533.5,-1044"/>
+</g>
+<!-- k8 -->
+<g id="node64" class="node"><title>k8</title>
+<polygon style="fill:white;stroke:gray;" points="667.179,-935.385 667.179,-950.615 586.821,-961.385 473.179,-961.385 392.821,-950.615 392.821,-935.385 473.179,-924.615 586.821,-924.615 667.179,-935.385"/>
+<text text-anchor="middle" x="530" y="-938.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date dependence</text>
+</g>
+<!-- k7->k8 -->
+<g id="edge72" class="edge"><title>k7->k8</title>
+<path style="fill:none;stroke:gray;" d="M530,-996C530,-989 530,-980 530,-972"/>
+<polygon style="fill:gray;stroke:gray;" points="533.5,-972 530,-962 526.5,-972 533.5,-972"/>
+</g>
+</g>
+</svg>
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 0000000..467682e
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,329 @@
+# -*- coding: utf-8 -*-
+#
+# ruffus documentation build configuration file, created by
+# sphinx-quickstart on Tue Apr 21 16:55:56 2009.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+sys.path.insert(0, os.path.abspath(os.path.join("..")))
+print sys.path
+import ruffus, ruffus.task, ruffus.ruffus_version
+
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo',
+ 'sphinx.ext.coverage', 'sphinx.ext.pngmath', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'contents'
+
+# General information about the project.
+project = u'ruffus'
+copyright = u'2009-2013 Leo Goodstadt'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version=ruffus.ruffus_version.__version #major.minor[.patch[.sub]]
+# The full version, without betaincluding alpha/beta/rc tags.
+import re
+release = re.subn("([\d\.]+).*", r"\1", version)[0]
+print version, release
+
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The style sheet to use for HTML and HTML Help pages. A file of that name
+# must exist either in Sphinx' static/ path, or in one of the custom paths
+# given in html_static_path.
+import os
+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+if not on_rtd:
+ html_style = 'ruffus.css'
+#html_style = 'ruffus.css'
+
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['static_data']
+#html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {
+# '**': ['globaltoc.html', 'sourcelink.html', 'searchbox.html'],
+#}
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+html_additional_pages = { 'index' : 'index.html'}
+
+# If false, no module index is generated.
+html_domain_indices = False
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'ruffusdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+'papersize': 'A4',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('contents', 'ruffus.tex', u'ruffus Documentation',
+ u'Leo Goodstadt', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('contents', 'ruffus', u'ruffus Documentation',
+ [u'Leo Goodstadt'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ ('contents', 'ruffus', u'ruffus Documentation',
+ u'Leo Goodstadt', 'ruffus', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
+
+
+# -- Options for Epub output ---------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = u'ruffus'
+epub_author = u'Llew S. Goodstadt'
+epub_publisher = u'Llew S. Goodstadt'
+epub_copyright = u'2013, Llew S. Goodstadt'
+
+# The language of the text. It defaults to the language option
+# or en if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# A tuple containing the cover image and cover page html template filenames.
+#epub_cover = ()
+
+# A sequence of (type, uri, title) tuples for the guide element of content.opf.
+#epub_guide = ()
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files shat should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+#epub_exclude_files = []
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
+
+# Fix unsupported image types using the PIL.
+#epub_fix_images = False
+
+# Scale large images.
+#epub_max_image_width = 0
+
+# If 'no', URL addresses will not be shown.
+#epub_show_urls = 'inline'
+
+# If false, no index is generated.
+#epub_use_index = True
+
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'http://docs.python.org/': None}
diff --git a/doc/contents.rst b/doc/contents.rst
new file mode 100644
index 0000000..0e91439
--- /dev/null
+++ b/doc/contents.rst
@@ -0,0 +1,197 @@
+.. include:: global.inc
+***************************
+**Ruffus** documentation
+***************************
+==========================================
+Start Here:
+==========================================
+.. toctree::
+ :maxdepth: 2
+
+ installation.rst
+ tutorials/new_tutorial/manual_contents.rst
+ tutorials/new_tutorial/introduction.rst
+ tutorials/new_tutorial/transform.rst
+ tutorials/new_tutorial/transform_in_parallel.rst
+ tutorials/new_tutorial/originate.rst
+ tutorials/new_tutorial/pipeline_printout.rst
+ tutorials/new_tutorial/command_line.rst
+ tutorials/new_tutorial/pipeline_printout_graph.rst
+ tutorials/new_tutorial/output_file_names.rst
+ tutorials/new_tutorial/mkdir.rst
+ tutorials/new_tutorial/checkpointing.rst
+ tutorials/new_tutorial/decorators_compendium.rst
+ tutorials/new_tutorial/split.rst
+ tutorials/new_tutorial/merge.rst
+ tutorials/new_tutorial/multiprocessing.rst
+ tutorials/new_tutorial/logging.rst
+ tutorials/new_tutorial/subdivide_collate.rst
+ tutorials/new_tutorial/combinatorics.rst
+ tutorials/new_tutorial/active_if.rst
+ tutorials/new_tutorial/posttask.rst
+ tutorials/new_tutorial/inputs.rst
+ tutorials/new_tutorial/onthefly.rst
+ tutorials/new_tutorial/parallel.rst
+ tutorials/new_tutorial/check_if_uptodate.rst
+ tutorials/new_tutorial/flowchart_colours.rst
+ tutorials/new_tutorial/dependencies.rst
+ tutorials/new_tutorial/exceptions.rst
+ tutorials/new_tutorial/list_of_ruffus_names.rst
+ tutorials/new_tutorial/deprecated_files.rst
+ tutorials/new_tutorial/deprecated_files_re.rst
+
+
+Example code for:
+
+.. toctree::
+ :maxdepth: 1
+
+ tutorials/new_tutorial/introduction_code.rst
+ tutorials/new_tutorial/transform_code.rst
+ tutorials/new_tutorial/transform_in_parallel_code.rst
+ tutorials/new_tutorial/originate_code.rst
+ tutorials/new_tutorial/pipeline_printout_code.rst
+ tutorials/new_tutorial/pipeline_printout_graph_code.rst
+ tutorials/new_tutorial/output_file_names_code.rst
+ tutorials/new_tutorial/mkdir_code.rst
+ tutorials/new_tutorial/checkpointing_code.rst
+ tutorials/new_tutorial/split_code.rst
+ tutorials/new_tutorial/merge_code.rst
+ tutorials/new_tutorial/multiprocessing_code.rst
+ tutorials/new_tutorial/logging_code.rst
+ tutorials/new_tutorial/subdivide_collate_code.rst
+ tutorials/new_tutorial/combinatorics_code.rst
+ tutorials/new_tutorial/inputs_code.rst
+ tutorials/new_tutorial/onthefly_code.rst
+ tutorials/new_tutorial/flowchart_colours_code.rst
+
+
+
+=====================
+Overview:
+=====================
+.. toctree::
+ :maxdepth: 2
+
+ cheatsheet.rst
+ pipeline_functions.rst
+ drmaa_wrapper_functions.rst
+ installation.rst
+ design.rst
+ Bugs and Updates <history>
+ Future plans <todo>
+ Implementation_notes <implementation_notes.rst>
+ faq.rst
+ glossary.rst
+ gallery.rst
+ why_ruffus.rst
+
+=====================
+Examples
+=====================
+.. toctree::
+ :maxdepth: 2
+
+ examples/bioinformatics/index.rst
+ examples/bioinformatics/part2.rst
+ examples/bioinformatics/part1_code.rst
+ examples/bioinformatics/part2_code.rst
+ examples/paired_end_data.py.rst
+
+
+
+=====================
+Reference:
+=====================
+######################
+Decorators
+######################
+.. toctree::
+ :maxdepth: 1
+
+ decorators/decorators.rst
+ decorators/indicator_objects.rst
+
+
+.. topic::
+ Core
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/originate.rst
+ decorators/split.rst
+ decorators/transform.rst
+ decorators/merge.rst
+
+.. topic::
+ For advanced users
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/subdivide.rst
+ decorators/transform_ex.rst
+ decorators/collate.rst
+ decorators/collate_ex.rst
+ decorators/graphviz.rst
+ decorators/mkdir.rst
+ decorators/jobs_limit.rst
+ decorators/posttask.rst
+ decorators/active_if.rst
+ decorators/follows.rst
+
+.. topic::
+ Combinatorics
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/product.rst
+ decorators/permutations.rst
+ decorators/combinations.rst
+ decorators/combinations_with_replacement.rst
+
+.. topic::
+ Esoteric
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/files_ex.rst
+ decorators/check_if_uptodate.rst
+ decorators/parallel.rst
+
+.. topic::
+ Deprecated
+
+ .. toctree::
+ :maxdepth: 1
+
+ decorators/files.rst
+ decorators/files_re.rst
+
+
+######################
+Modules:
+######################
+
+.. toctree::
+ :maxdepth: 2
+
+ task.rst
+ proxy_logger.rst
+
+.. comment
+ graph.rst
+ print_dependencies.rst
+ adjacent_pairs_iterate.rst
+
+
+=====================
+Indices and tables
+=====================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/doc/decorators/active_if.rst b/doc/decorators/active_if.rst
new file mode 100644
index 0000000..1b02e1f
--- /dev/null
+++ b/doc/decorators/active_if.rst
@@ -0,0 +1,110 @@
+.. include:: ../global.inc
+.. _decorators.active_if:
+.. index::
+ pair: @active_if; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+ * More on @active_if in the ``Ruffus`` :ref:`Manual <new_manual.active_if>`
+
+
+############
+ at active_if
+############
+
+.. Comment. These are parameter names
+
+.. |on_or_off| replace:: `on_or_off`
+.. _on_or_off: `decorators.active_if.on_or_off`_
+
+***************************************************************************************************************************************************
+*@active_if*\ (on_or_off1, [on_or_off2,...])
+***************************************************************************************************************************************************
+ **Purpose:**
+
+ * Switches tasks on and off at run time depending on its parameters
+ * Evaluated each time ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+ * The Design and initial implementation were contributed by Jacob Biesinger
+ * Dormant tasks behave as if they are up to date and have no output.
+
+ **Example**:
+
+ .. code-block:: python
+ :emphasize-lines: 20
+
+ from ruffus import *
+ run_if_true_1 = True
+ run_if_true_2 = False
+ run_if_true_3 = True
+
+
+ #
+ # task1
+ #
+ @originate(['a.foo', 'b.foo'])
+ def create_files(outfile):
+ """
+ create_files
+ """
+ open(outfile, "w").write(outfile + "\n")
+
+ #
+ # Only runs if all three run_if_true conditions are met
+ #
+ # @active_if determines if task is active
+ @active_if(run_if_true_1, lambda: run_if_true_2)
+ @active_if(run_if_true_3)
+ @transform(create_files, suffix(".foo"), ".bar")
+ def this_task_might_be_inactive(infile, outfile):
+ open(outfile, "w").write("%s -> %s\n" % (infile, outfile))
+
+
+ # @active_if switches off task because run_if_true_2 == False
+ pipeline_run(verbose = 3)
+
+ # @active_if switches on task because all run_if_true conditions are met
+ run_if_true_2 = True
+ pipeline_run(verbose = 3)
+
+
+ Produces the following output:
+
+ .. code-block:: pycon
+ :emphasize-lines: 1,13
+
+ >>> # @active_if switches off task "this_task_might_be_inactive" because run_if_true_2 == False
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = create_files
+ create_files
+ Job = [None -> a.foo] Missing file [a.foo]
+ Job = [None -> b.foo] Missing file [b.foo]
+ Job = [None -> a.foo] completed
+ Job = [None -> b.foo] completed
+ Completed Task = create_files
+ Inactive Task = this_task_might_be_inactive
+
+ >>> # @active_if switches on task "this_task_might_be_inactive" because all run_if_true conditions are met
+ >>> run_if_true_2 = True
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = this_task_might_be_inactive
+
+ Job = [a.foo -> a.bar] Missing file [a.bar]
+ Job = [b.foo -> b.bar] Missing file [b.bar]
+ Job = [a.foo -> a.bar] completed
+ Job = [b.foo -> b.bar] completed
+ Completed Task = this_task_might_be_inactive
+
+
+ **Parameters:**
+
+.. _decorators.active_if.on_or_off:
+
+ * *on_or_off*:
+ A comma separated list of boolean conditions. These can be values, functions or callable objects which return True / False
+
+ Multiple ``@active_if`` decorators can be stacked for clarity as in the example
+
+
diff --git a/doc/decorators/check_if_uptodate.rst b/doc/decorators/check_if_uptodate.rst
new file mode 100644
index 0000000..893f430
--- /dev/null
+++ b/doc/decorators/check_if_uptodate.rst
@@ -0,0 +1,68 @@
+.. include:: ../global.inc
+.. _decorators.check_if_uptodate:
+
+.. index::
+ pair: @check_if_uptodate; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+.. |dependency_checking_function| replace:: `dependency_checking_function`
+.. _dependency_checking_function: `decorators.check_if_uptodate.dependency_checking_function`_
+
+########################
+ at check_if_uptodate
+########################
+
+*******************************************************************************************
+*@check_if_uptodate* (|dependency_checking_function|_)
+*******************************************************************************************
+
+ **Purpose:**
+ Checks to see if a job is up to date, and needs to be run.
+
+ Usually used in conjunction with :ref:`@parallel() <decorators.parallel>`
+
+ **Example**::
+
+ from ruffus import *
+ import os
+ def check_file_exists(input_file, output_file):
+ if not os.path.exists(output_file):
+ return True, "Missing file %s" % output_file
+ else:
+ return False, "File %s exists" % output_file
+
+ @parallel([[None, "a.1"]])
+ @check_if_uptodate(check_file_exists)
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+ Is equivalent to::
+
+ from ruffus import *
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+ Both produce the same output::
+
+ Task = create_if_necessary
+ Job = [null, "a.1"] completed
+
+ **Parameters:**
+
+.. _decorators.check_if_uptodate.dependency_checking_function:
+
+ * *dependency_checking_function*:
+ returns two parameters: if job needs to be run, and a message explaining why
+
+ dependency_checking_func() needs to handle the same number of parameters as the
+ task function e.g. ``input_file`` and ``output_file`` above.
+
+
diff --git a/doc/decorators/collate.rst b/doc/decorators/collate.rst
new file mode 100644
index 0000000..1121925
--- /dev/null
+++ b/doc/decorators/collate.rst
@@ -0,0 +1,154 @@
+.. include:: ../global.inc
+.. _decorators.collate:
+.. index::
+ pair: @collate; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at collate
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.collate.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.collate.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.collate.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.collate.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.collate.matching_formatter`_
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@collate* ( |tasks_or_file_names|_, :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+ Groups / collates sets of input files, each into a separate summary.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names.
+
+ String replacement occurs either through suffix matches via :ref:`suffix<decorators.suffix>` or
+ the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators.
+
+ ``@collate`` groups together all **Input** which result in identical **Output** and **extra**
+ parameters.
+
+ It is a **many to fewer** operation.
+
+
+ **Example**:
+ ``regex(r".*(\..+)"), "\1.summary"`` creates a separate summary file for each suffix::
+
+ animal_files = "a.fish", "b.fish", "c.mammals", "d.mammals"
+ # summarise by file suffix:
+ @collate(animal_files, regex(r"\.(.+)$"), r'\1.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ **Parameters:**
+
+
+.. _decorators.collate.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.collate.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+.. _decorators.collate.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.collate.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.collate.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are passed verbatim to the task function
+
+ #. *outputs* and optional extra parameters are passed to the functions after string
+ substitution in any strings. Non-string values are passed through unchanged.
+ #. Each collate job consists of input files which are aggregated by string substitution
+ to a single set of output / extra parameter matches
+ #. In the above cases, ``a.fish`` and ``b.fish`` both produce ``fish.summary`` after regular
+ expression subsitution, and are collated into a single job:
+ ``["a.fish", "b.fish" -> "fish.summary"]``
+ while ``c.mammals``, ``d.mammals`` both produce ``mammals.summary``, are collated in a separate job:
+ ``["c.mammals", "d.mammals" -> "mammals.summary"]``
+
+ **Example2**:
+
+ Suppose we had the following files::
+
+ cows.mammals.animal
+ horses.mammals.animal
+ sheep.mammals.animal
+
+ snake.reptile.animal
+ lizard.reptile.animal
+ crocodile.reptile.animal
+
+ pufferfish.fish.animal
+
+ and we wanted to end up with three different resulting output::
+
+ cow.mammals.animal
+ horse.mammals.animal
+ sheep.mammals.animal
+ -> mammals.results
+
+ snake.reptile.animal
+ lizard.reptile.animal
+ crocodile.reptile.animal
+ -> reptile.results
+
+ pufferfish.fish.animal
+ -> fish.results
+
+ This is the ``@collate`` code required::
+
+ animals = [ "cows.mammals.animal",
+ "horses.mammals.animal",
+ "sheep.mammals.animal",
+ "snake.reptile.animal",
+ "lizard.reptile.animal",
+ "crocodile.reptile.animal",
+ "pufferfish.fish.animal"]
+
+ @collate(animals, regex(r"(.+)\.(.+)\.animal"), r"\2.results")
+ # \1 = species [cow, horse]
+ # \2 = phylogenetics group [mammals, reptile, fish]
+ def summarize_animals_into_groups(species_file, result_file):
+ " ... more code here"
+ pass
+
+
+
+See :ref:`@merge <decorators.merge>` for an alternative way to summarise files.
diff --git a/doc/decorators/collate_ex.rst b/doc/decorators/collate_ex.rst
new file mode 100644
index 0000000..50175f4
--- /dev/null
+++ b/doc/decorators/collate_ex.rst
@@ -0,0 +1,145 @@
+.. include:: ../global.inc
+.. _decorators.collate_ex:
+.. index::
+ pair: @collate (Advanced Usage); Syntax
+ pair: @collate, inputs(...); Syntax
+ pair: @collate, add_inputs(...); Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+####################################################
+ at collate with ``add_inputs`` and ``inputs``
+####################################################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.collate_ex.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.collate_ex.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.collate_ex.output_pattern`_
+.. |input_pattern_or_glob| replace:: `input_pattern_or_glob`
+.. _input_pattern_or_glob: `decorators.collate_ex.input_pattern_or_glob`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.collate_ex.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.collate_ex.matching_formatter`_
+
+
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+*@collate* ( |tasks_or_file_names|_, :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, [:ref:`inputs<decorators.inputs>`\ *(*\ |input_pattern_or_glob|_\ *)* | :ref:`add_inputs<decorators.add_inputs>`\ *(*\ |input_pattern_or_glob|_\ *)*\] , |output_pattern|_, [|extra_parameters|_,...] )
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+ **Purpose:**
+ Groups / collates sets of input files, each into a separate summary.
+
+ This variant of ``@collate`` allows additional inputs or dependencies to be added
+ dynamically to the task.
+
+ Output file names are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names.
+
+ This variant of ``@collate`` allows input file names to be derived in the same way.
+
+ :ref:`add_inputs<decorators.add_inputs>` nests the the original input parameters in a list before adding additional dependencies.
+
+ :ref:`inputs<decorators.inputs>` replaces the original input parameters wholescale.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Example of** :ref:`add_inputs<decorators.add_inputs>`
+
+ ``regex(r".*(\..+)"), "\1.summary"`` creates a separate summary file for each suffix.
+ But we also add date of birth data for each species::
+
+ animal_files = "tuna.fish", "shark.fish", "dog.mammals", "cat.mammals"
+ # summarise by file suffix:
+ @collate(animal_files, regex(r".+\.(.+)$"), add_inputs(r"\1.date_of_birth"), r'\1.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ This results in the following equivalent function calls::
+
+ summarize([ ["shark.fish", "fish.date_of_birth" ],
+ ["tuna.fish", "fish.date_of_birth" ] ], "fish.summary")
+ summarize([ ["cat.mammals", "mammals.date_of_birth"],
+ ["dog.mammals", "mammals.date_of_birth"] ], "mammals.summary")
+
+ **Example of** :ref:`add_inputs<decorators.inputs>`
+
+ using ``inputs(...)`` will summarise only the dates of births for each species group::
+
+ animal_files = "tuna.fish", "shark.fish", "dog.mammals", "cat.mammals"
+ # summarise by file suffix:
+ @collate(animal_files, regex(r".+\.(.+)$"), inputs(r"\1.date_of_birth"), r'\1.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ This results in the following equivalent function calls::
+
+ summarize(["fish.date_of_birth" ], "fish.summary")
+ summarize(["mammals.date_of_birth"], "mammals.summary")
+
+ **Parameters:**
+
+
+.. _decorators.collate_ex.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.collate_ex.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+.. _decorators.collate_ex.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.collate_ex.input_pattern_or_glob:
+
+ * *input_pattern*
+ Specifies the resulting input(s) to each job.
+ Must be wrapped in an :ref:`inputs<decorators.inputs>` or an :ref:`inputs<decorators.add_inputs>` indicator object.
+
+ Can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ Strings will be subject to substitution.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+
+.. _decorators.collate_ex.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.collate_ex.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are passed verbatim to the task function
+
+ #. *outputs* and optional extra parameters are passed to the functions after string
+ substitution in any strings. Non-string values are passed through unchanged.
+ #. Each collate job consists of input files which are aggregated by string substitution
+ to a single set of output / extra parameter matches
+
+
+See :ref:`@collate <decorators.collate>` for more straightforward ways to use collate.
diff --git a/doc/decorators/combinations.rst b/doc/decorators/combinations.rst
new file mode 100644
index 0000000..65f9ec9
--- /dev/null
+++ b/doc/decorators/combinations.rst
@@ -0,0 +1,153 @@
+.. include:: ../global.inc
+.. _decorators.combinations:
+.. index::
+ pair: @combinations; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at combinations
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.combinations.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.combinations.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.combinations.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.combinations.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@combinations* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the **combinations**, between all the elements of a set of **Input** (e.g. **A B C D**),
+ i.e. r-length tuples of *input* elements with no repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.combinations>`__
+ function of the same name:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations
+ >>> # combinations('ABCD', 3) --> ABC ABD ACD BCD
+ >>> [ "".join(a) for a in combinations("ABCD", 3)]
+ ['ABC', 'ABD', 'ACD', 'BCD']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* in each tuple of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ **Example**:
+
+ Calculates the **@combinations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations
+ @combinations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 3 at a time
+ 3,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}_vs_"
+ "{basename[2][1]}.combinations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def combinations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - B - C
+ A - B - D
+ A - C - D
+ B - C - D
+
+
+ **Parameters:**
+
+
+.. _decorators.combinations.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.combinations.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.combinations.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.combinations.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/decorators/combinations_with_replacement.rst b/doc/decorators/combinations_with_replacement.rst
new file mode 100644
index 0000000..fbb0daf
--- /dev/null
+++ b/doc/decorators/combinations_with_replacement.rst
@@ -0,0 +1,157 @@
+.. include:: ../global.inc
+.. _decorators.combinations_with_replacement:
+.. index::
+ pair: @combinations_with_replacement; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+################################################
+ at combinations_with_replacement
+################################################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.combinations_with_replacement.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.combinations_with_replacement.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.combinations_with_replacement.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.combinations_with_replacement.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@combinations_with_replacement* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the **combinations_with_replacement**, between all the elements of a set of **Input** (e.g. **A B C D**),
+ i.e. r-length tuples of *input* elements included repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement>`__
+ function of the same name:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations_with_replacement
+ >>> # combinations_with_replacement('ABCD', 2) --> AA AB AC AD BB BC BD CC CD DD
+ >>> [ "".join(a) for a in combinations_with_replacement('ABCD', 2)]
+ ['AA', 'AB', 'AC', 'AD', 'BB', 'BC', 'BD', 'CC', 'CD', 'DD']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* in each tuple of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ **Example**:
+
+ Calculates the **@combinations_with_replacement** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations_with_replacement
+ @combinations_with_replacement(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.combinations_with_replacement",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2rd
+ ])
+ def combinations_with_replacement_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - A
+ A - B
+ A - C
+ A - D
+ B - B
+ B - C
+ B - D
+ C - C
+ C - D
+ D - D
+
+
+ **Parameters:**
+
+
+.. _decorators.combinations_with_replacement.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.combinations_with_replacement.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.combinations_with_replacement.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.combinations_with_replacement.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/decorators/decorators.rst b/doc/decorators/decorators.rst
new file mode 100644
index 0000000..5d70df8
--- /dev/null
+++ b/doc/decorators/decorators.rst
@@ -0,0 +1,296 @@
+.. include:: ../global.inc
+#######################
+Ruffus Decorators
+#######################
+
+.. seealso::
+ :ref:`Indicator objects <decorators.indicator_objects>`
+
+.. _decorators:
+
+
+=============================================
+*Core*
+=============================================
+.. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@originate** (:ref:`Summary <decorators.originate>` / :ref:`Manual <new_manual.originate>`)
+
+ - Creates (originates) a set of starting file without dependencies from scratch (*ex nihilo*!)
+ - Only called to create files which do not exist.
+ - Invoked onces (a job created) per item in the ``output_files`` list.
+
+ ", "
+ * :ref:`@originate <decorators.originate>` ( ``output_files``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@split** (:ref:`Summary <decorators.split>` / :ref:`Manual <new_manual.split>`)
+
+ - Splits a single input into multiple output
+ - Globs in ``output`` can specify an indeterminate number of files.
+
+ ", "
+ * :ref:`@split <decorators.split>` ( ``tasks_or_file_names``, ``output_files``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@transform** (:ref:`Summary <decorators.transform>` / :ref:`Manual <new_manual.transform>`)
+
+ - Applies the task function to transform input data to output.
+
+ ", "
+ * :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`suffix <decorators.transform.suffix_string>`\ *(*\ ``suffix_string``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`regex <decorators.transform.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@transform <decorators.transform>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.transform.matching_formatter>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@merge** (:ref:`Summary <decorators.merge>` / :ref:`Manual <new_manual.merge>`)
+
+ - Merges multiple input files into a single output.
+
+ ", "
+ * :ref:`@merge <decorators.merge>` (``tasks_or_file_names``, ``output``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+
+.. _decorators.combinatorics:
+
+=============================================
+*Combinatorics*
+=============================================
+.. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@product** (:ref:`Summary <decorators.product>` / :ref:`Manual <new_manual.product>`)
+
+ - Generates the **product**, i.e. all vs all comparisons, between sets of input files.
+ ", "
+ * :ref:`@product <decorators.product>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])* ,*[* ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *]), ]*, ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@permutations** (:ref:`Summary <decorators.permutations>` / :ref:`Manual <new_manual.permutations>`)
+
+ - Generates the **permutations**, between all the elements of a set of **Input**
+ - Analogous to the python `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ - permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+
+ ", "
+ * :ref:`@permutations <decorators.permutations>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])*, ``tuple_size``, ``output_pattern``, [``extra_parameters``,...] )
+ \
+ ", ""
+ "**@combinations** (:ref:`Summary <decorators.combinations>` / :ref:`Manual <new_manual.combinations>`)
+
+ - Generates the **permutations**, between all the elements of a set of **Input**
+ - Analogous to the python `itertools.combinations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ - combinations('ABCD', 3) --> ABC ABD ACD BCD
+ - Generates the **combinations**, between all the elements of a set of **Input**:
+ i.e. r-length tuples of *input* elements with no repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ ", "
+ * :ref:`@combinations <decorators.permutations>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])*, ``tuple_size``, ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@combinations_with_replacement** (:ref:`Summary <decorators.combinations_with_replacement>` / :ref:`Manual <new_manual.combinations_with_replacement>`)
+
+ - Generates the **permutations**, between all the elements of a set of **Input**
+ - Analogous to the python `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ - combinations('ABCD', 3) --> ABC ABD ACD BCD
+ - Generates the **combinations_with_replacement**, between all the elements of a set of **Input**:
+ i.e. r-length tuples of *input* elements with no repeated elements (**A A**)
+ and where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ ", "
+ * :ref:`@combinations_with_replacement <decorators.permutations>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.product.matching_formatter>` *([* ``regex_pattern`` *])*, ``tuple_size``, ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+
+=============================================
+*Advanced*
+=============================================
+ .. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@subdivide** (:ref:`Summary <decorators.subdivide>` / :ref:`Manual <new_manual.subdivide>`)
+ - Subdivides a set of *Inputs* each further into multiple *Outputs*.
+ - The number of files in each *Output* can be set at runtime by the use of globs.
+ - **Many to Even More** operator.
+ - The use of **split** is a synonym for subdivide is deprecated.
+
+ ", "
+ * :ref:`@subdivide <decorators.subdivide>` ( ``tasks_or_file_names``, :ref:`regex <decorators.subdivide.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@subdivide <decorators.subdivide>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.subdivide.matching_formatter>`\ *(*\ [``regex_pattern``] *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@transform** (:ref:`Summary <decorators.transform_ex>` / :ref:`Manual <new_manual.inputs>`)
+
+ - Infers input as well as output from regular expression substitutions
+ - Useful for adding additional file dependencies
+
+ ", "
+ * :ref:`@transform <decorators.transform_ex>` ( ``tasks_or_file_names``, :ref:`regex <decorators.transform.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@transform <decorators.transform_ex>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.transform.matching_formatter>`\ *(*\ ``regex_pattern``\ *)*\ , [ :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ] ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@collate** (:ref:`Summary <decorators.collate>` / :ref:`Manual <new_manual.collate>`)
+
+ - Groups multiple input files using regular expression matching
+ - Input resulting in the same output after substitution will be collated together.
+
+ ", "
+ * :ref:`@collate <decorators.collate>` (``tasks_or_file_names``, :ref:`regex <decorators.collate.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@collate <decorators.collate_ex>` (``tasks_or_file_names``, :ref:`regex <decorators.collate_ex.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@collate <decorators.collate>` (``tasks_or_file_names``, :ref:`formatter <decorators.collate.matching_formatter>`\ *(*\ ``formatter_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+ * :ref:`@collate <decorators.collate_ex>` (``tasks_or_file_names``, :ref:`formatter <decorators.collate_ex.matching_formatter>`\ *(*\ ``formatter_pattern``\ *)*\ , :ref:`inputs <decorators.inputs>` | :ref:`add_inputs <decorators.add_inputs>`\ *(*\ ``input_pattern``\ *)*\ , ``output_pattern``, [``extra_parameters``,...] )
+ \
+
+ ", ""
+ "**@follows** (:ref:`Summary <decorators.follows>` / :ref:`Manual <new_manual.follows>`)
+
+ - Indicates task dependency
+ - optional :ref:`mkdir <decorators.follows.directory_name>` prerequisite (:ref:`see Manual <new_manual.follows.mkdir>`)
+
+ ", "
+ * :ref:`@follows <decorators.follows>` ( ``task1``, ``'task2'`` ))
+ \
+ * :ref:`@follows <decorators.follows>` ( ``task1``, :ref:`mkdir <decorators.follows.directory_name>`\ ( ``'my/directory/'`` ))
+ \
+
+ ", ""
+ "**@posttask** (:ref:`Summary <decorators.posttask>` / :ref:`Manual <new_manual.posttask>`)
+
+ - Calls function after task completes
+ - Optional :ref:`touch_file <decorators.posttask.file_name>` indicator (:ref:`Manual <new_manual.posttask.touch_file>`)
+
+ ", "
+ * :ref:`@posttask <decorators.posttask>` ( ``signal_task_completion_function`` )
+ \
+ * :ref:`@posttask <decorators.posttask>` (:ref:`touch_file <decorators.touch_file>`\ ( ``'task1.completed'`` ))
+ \
+
+ ", ""
+ "**@active_if** (:ref:`Summary <decorators.active_if>` / :ref:`Manual <new_manual.active_if>`)
+
+ - Switches tasks on and off at run time depending on its parameters
+ - Evaluated each time :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`, :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` or :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>` is called.
+ - Dormant tasks behave as if they are up to date and have no output.
+
+ ", "
+ * :ref:`@active_if <decorators.active_if>` ( ``on_or_off1, [on_or_off2, ...]`` )
+ \
+
+ ", ""
+ "**@jobs_limit** (:ref:`Summary <decorators.jobs_limit>` / :ref:`Manual <new_manual.jobs_limit>`)
+
+ - Limits the amount of multiprocessing for the specified task
+ - Ensures that fewer than N jobs for this task are run in parallel
+ - Overrides ``multiprocess`` parameter in :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`
+ ", "
+ * :ref:`@jobs_limit <decorators.jobs_limit>` ( ``NUMBER_OF_JOBS_RUNNING_CONCURRENTLY`` )
+ \
+
+ ", ""
+ "**@mkdir** (:ref:`Summary <decorators.mkdir>` / :ref:`Manual <new_manual.mkdir>`)
+
+ - Generates paths for `os.makedirs <http://docs.python.org/2/library/os.html#os.makedirs>`__
+
+ ", "
+ * :ref:`@mkdir <decorators.mkdir>` ( ``tasks_or_file_names``, :ref:`suffix <decorators.mkdir.suffix_string>`\ *(*\ ``suffix_string``\ *)*\ , ``output_pattern`` )
+ \
+ * :ref:`@mkdir <decorators.mkdir>` ( ``tasks_or_file_names``, :ref:`regex <decorators.mkdir.matching_regex>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern`` )
+ \
+ * :ref:`@mkdir <decorators.mkdir>` ( ``tasks_or_file_names``, :ref:`formatter <decorators.mkdir.matching_formatter>`\ *(*\ ``regex_pattern``\ *)*\ , ``output_pattern``)
+ \
+
+ ", ""
+ "**@graphviz** (:ref:`Summary <decorators.graphviz>` / :ref:`Manual <new_manual.pipeline_printout_graph>`)
+
+ - Customise the graphic for each task in printed flowcharts
+
+ ", "
+ * :ref:`@graphviz <decorators.graphviz>` ( ``graphviz_parameter = XXX``, ``[graphviz_parameter2 = YYY ...]``)
+ \
+
+ ", ""
+
+
+
+=============================================
+*Esoteric!*
+=============================================
+ .. csv-table::
+ :header: "Decorator", "Examples"
+ :widths: 400, 600,1
+
+ "**@files** (:ref:`Summary <decorators.files>` / :ref:`Manual <new_manual.deprecated_files>`)
+
+ - I/O parameters
+ - skips up-to-date jobs
+ - Should use :ref:`@transform <decorators.transform>` etc instead
+
+ ", "
+ * :ref:`@files <decorators.files>`\ ( ``parameter_list`` )
+ \
+ * :ref:`@files <decorators.files>`\ ( ``parameter_generating_function`` )
+ \
+ * :ref:`@files <decorators.files>` ( ``input_file``, ``output_file``, ``other_params``, ... )
+ \
+
+ ", ""
+ "**@parallel** (:ref:`Summary <decorators.parallel>` / :ref:`Manual <new_manual.deprecated_parallel>`)
+
+ - By default, does not check if jobs are up to date
+ - Best used in conjuction with :ref:`@check_if_uptodate <decorators.check_if_uptodate>`
+
+ ", "
+ * :ref:`@parallel <decorators.parallel>` ( ``parameter_list`` ) (:ref:`see Manual <new_manual.deprecated_parallel>`)
+ \
+ * :ref:`@parallel <decorators.parallel>` ( ``parameter_generating_function`` ) (:ref:`see Manual <new_manual.on_the_fly>`)
+ \
+
+ ", ""
+ "**@check_if_uptodate** (:ref:`Summary <decorators.check_if_uptodate>` / :ref:`Manual <new_manual.check_if_uptodate>`)
+
+ - Custom function to determine if jobs need to be run
+
+ ", "
+ * :ref:`@check_if_uptodate <decorators.check_if_uptodate>` ( ``is_task_up_to_date_function`` )
+ \
+
+ ", ""
+ ".. tip::
+ The use of this overly complicated function is discouraged.
+ **@files_re** (:ref:`Summary <decorators.files_re>`)
+
+ - I/O file names via regular
+ expressions
+ - start from lists of file names
+ or |glob|_ results
+ - skips up-to-date jobs
+ ", "
+ * :ref:`@files_re <decorators.files_re>` ( ``tasks_or_file_names``, ``matching_regex``, [``input_pattern``,] ``output_pattern``, ``...`` )
+ ``input_pattern``/``output_pattern`` are regex patterns
+ used to create input/output file names from the starting
+ list of either glob_str or file names
+
+ ", ""
+
diff --git a/doc/decorators/files.rst b/doc/decorators/files.rst
new file mode 100644
index 0000000..8f73ba5
--- /dev/null
+++ b/doc/decorators/files.rst
@@ -0,0 +1,155 @@
+.. include:: ../global.inc
+.. _decorators.files:
+.. index::
+ pair: @files; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+.. |input| replace:: `input`
+.. _input: `decorators.files.input`_
+.. |input1| replace:: `input1`
+.. _input1: `decorators.files.input1`_
+.. |output| replace:: `output`
+.. _output: `decorators.files.output`_
+.. |output1| replace:: `output1`
+.. _output1: `decorators.files.output1`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.files.extra_parameters`_
+.. |extra_parameters1| replace:: `extra_parameters1`
+.. _extra_parameters1: `decorators.files.extra_parameters1`_
+
+
+########################
+ at files
+########################
+
+*******************************************************************************************
+*@files* (|input1|_, |output1|_, [|extra_parameters1|_, ...])
+*******************************************************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ at files for single jobs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ **Purpose:**
+ Provides parameters to run a task.
+
+ The first two parameters in each set represent the input and output which are
+ used to see if the job is out of date and needs to be (re-)run.
+
+ By default, out of date checking uses input/output file timestamps.
+ (On some file systems, timestamps have a resolution in seconds.)
+ See :ref:`@check_if_uptodate() <decorators.check_if_uptodate>` for alternatives.
+
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ @files('a.1', 'a.2', 'A file')
+ def transform_files(infile, outfile, text):
+ pass
+ pipeline_run([transform_files])
+
+ If ``a.2`` is missing or was created before ``a.1``, then the following will be called:
+ ::
+
+ transform_files('a.1', 'a.2', 'A file')
+
+ **Parameters:**
+
+.. _decorators.files.input1:
+
+ * *input*
+ Input file names
+
+
+.. _decorators.files.output1:
+
+ * *output*
+ Output file names
+
+
+.. _decorators.files.extra_parameters1:
+
+ * *extra_parameters*
+ optional ``extra_parameters`` are passed verbatim to each job.
+
+
+ **Checking if jobs are up to date:**
+ Strings in ``input`` and ``output`` (including in nested sequences) are interpreted as file names and
+ used to check if jobs are up-to-date.
+
+ See :ref:`above <decorators.files.check_up_to_date>` for more details
+
+
+*******************************************************************************************
+*@files* ( *((* |input|_, |output|_, [|extra_parameters|_,...] *), (...), ...)* )
+*******************************************************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ at files in parallel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ **Purpose:**
+
+ Passes each set of parameters to separate jobs which can run in parallel
+
+ The first two parameters in each set represent the input and output which are
+ used to see if the job is out of date and needs to be (re-)run.
+
+ By default, out of date checking uses input/output file timestamps.
+ (On some file systems, timestamps have a resolution in seconds.)
+ See :ref:`@check_if_uptodate() <decorators.check_if_uptodate>` for alternatives.
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ parameters = [
+ [ 'a.1', 'a.2', 'A file'], # 1st job
+ [ 'b.1', 'b.2', 'B file'], # 2nd job
+ ]
+
+ @files(parameters)
+ def parallel_io_task(infile, outfile, text):
+ pass
+ pipeline_run([parallel_io_task])
+
+ is the equivalent of calling:
+ ::
+
+ parallel_io_task('a.1', 'a.2', 'A file')
+ parallel_io_task('b.1', 'b.2', 'B file')
+
+ **Parameters:**
+
+.. _decorators.files.input:
+
+ * *input*
+ Input file names
+
+
+.. _decorators.files.output:
+
+ * *output*
+ Output file names
+
+
+.. _decorators.files.extra_parameters:
+
+ * *extra_parameters*
+ optional ``extra_parameters`` are passed verbatim to each job.
+
+.. _decorators.files.check_up_to_date:
+
+ **Checking if jobs are up to date:**
+ #. Strings in ``input`` and ``output`` (including in nested sequences) are interpreted as file names and
+ used to check if jobs are up-to-date.
+ #. In the absence of input files (e.g. ``input == None``), the job will run if any output file is missing.
+ #. In the absence of output files (e.g. ``output == None``), the job will always run.
+ #. If any of the output files is missing, the job will run.
+ #. If any of the input files is missing when the job is run, a
+ ``MissingInputFileError`` exception will be raised.
+
+
diff --git a/doc/decorators/files_ex.rst b/doc/decorators/files_ex.rst
new file mode 100644
index 0000000..d200d85
--- /dev/null
+++ b/doc/decorators/files_ex.rst
@@ -0,0 +1,77 @@
+.. include:: ../global.inc
+.. _decorators.files_on_the_fly:
+.. index::
+ pair: @files (on-the-fly parameter generation); Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+.. |custom_function| replace:: `custom_function`
+.. _custom_function: `decorators.files.custom_function`_
+
+
+################################################
+Generating parameters on the fly for @files
+################################################
+
+*******************************************************************************************
+*@files* (|custom_function|_)
+*******************************************************************************************
+ **Purpose:**
+
+ Uses a custom function to generate sets of parameters to separate jobs which can run in parallel.
+
+ The first two parameters in each set represent the input and output which are
+ used to see if the job is out of date and needs to be (re-)run.
+
+ By default, out of date checking uses input/output file timestamps.
+ (On some file systems, timestamps have a resolution in seconds.)
+ See :ref:`@check_if_uptodate() <decorators.check_if_uptodate>` for alternatives.
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ def generate_parameters_on_the_fly():
+ parameters = [
+ ['input_file1', 'output_file1', 1, 2], # 1st job
+ ['input_file2', 'output_file2', 3, 4], # 2nd job
+ ['input_file3', 'output_file3', 5, 6], # 3rd job
+ ]
+ for job_parameters in parameters:
+ yield job_parameters
+
+ @files(generate_parameters_on_the_fly)
+ def parallel_io_task(input_file, output_file, param1, param2):
+ pass
+
+ pipeline_run([parallel_task])
+
+ is the equivalent of calling:
+ ::
+
+ parallel_io_task('input_file1', 'output_file1', 1, 2)
+ parallel_io_task('input_file2', 'output_file2', 3, 4)
+ parallel_io_task('input_file3', 'output_file3', 5, 6)
+
+
+ **Parameters:**
+
+
+.. _decorators.files.custom_function:
+
+ * *custom_function*:
+ Generator function which yields each time a complete set of parameters for one job
+
+ **Checking if jobs are up to date:**
+ Strings in ``input`` and ``output`` (including in nested sequences) are interpreted as file names and
+ used to check if jobs are up-to-date.
+
+ See :ref:`above <decorators.files.check_up_to_date>` for more details
+
+
+
+
+
diff --git a/doc/decorators/files_re.rst b/doc/decorators/files_re.rst
new file mode 100644
index 0000000..8d3df68
--- /dev/null
+++ b/doc/decorators/files_re.rst
@@ -0,0 +1,130 @@
+.. include:: ../global.inc
+.. _decorators.files_re:
+
+.. index::
+ pair: @files_re; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at files_re
+########################
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.files_re.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.files_re.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.files_re.output_pattern`_
+.. |input_pattern| replace:: `input_pattern`
+.. _input_pattern: `decorators.files_re.input_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.files_re.matching_regex`_
+
+*****************************************************************************************************************************************
+*@files_re* (|tasks_or_file_names|_, |matching_regex|_, [|input_pattern|_], |output_pattern|_, [|extra_parameters|_,...])
+*****************************************************************************************************************************************
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Legacy design now deprecated. We suggest using :ref:`@transform() <decorators.transform>` instead
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ **Purpose:**
+
+ All singing, all dancing decorator which can do everything that :ref:`@merge() <decorators.merge>` and
+ :ref:`@transform() <decorators.transform>` can do.
+
+ Applies the task function to transform data from input to output files.
+
+ Output file names are determined from |tasks_or_file_names|_, i.e. from the output
+ of specified tasks, or a list of file names, using regular expression pattern substitutions.
+
+ Only out of date tasks (comparing input and output files) will be run.
+
+ **Example**:
+ ::
+
+ from ruffus import *
+ #
+ # convert all files ending in ".1" into files ending in ".2"
+ #
+ @files_re('*.1', '(.*).1', r'\1.2')
+ def transform_func(infile, outfile):
+ open(outfile, "w").write(open(infile).read() + "\nconverted\n")
+
+ pipeline_run([task_re])
+
+ If the following files are present ``a.1``, ``b.1``, ``c.1``, this will result in the following function calls:
+ ::
+
+ transform_func("a.1", "a.2")
+ transform_func("b.1", "b.2")
+ transform_func("c.1", "c.2")
+
+ **Parameters:**
+
+.. _decorators.files_re.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_ .
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.files_re.matching_regex:
+
+ * *matching_regex*
+ a python regular expression string.
+
+ | See python `regular expression (re) <http://docs.python.org/library/re.html>`_ documentation for details of regular expression syntax
+ | Each output file name is created using regular expression substitution with |output_pattern|_
+
+.. _decorators.files_re.input_pattern:
+
+ * *input_pattern*
+ Optionally specifies the resulting input file name(s).
+
+.. _decorators.files_re.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.files_re.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed to the task function.
+
+ | Regular expression substitution is first applied to (even nested) string parameters.
+ | Other data types are passed verbatim.
+
+ For example:
+ ::
+
+ from ruffus import *
+ #
+ # convert all files ending in ".1" into files ending in ".2"
+ #
+ @files_re('*.1', '(.*).1', r'\1.2', [r'\1', 55], 17)
+ def transform_func(infile, outfile, extras, extra3):
+ extra1, extra2 = extras
+ open(outfile, "w").write(open(infile).read() + "\nconverted%s\n" % (extra1, extra2, extra3))
+
+ pipeline_run([transform_func])
+
+ If the following files are present ``a.1``, ``b.1``, ``c.1``, this will result in the following function calls:
+ ::
+
+ transform_func("a.1", "a.2", ["a", 55], 17)
+ transform_func("b.1", "b.2", ["b", 55], 17)
+ transform_func("c.1", "c.2", ["c", 55], 17)
+
+
+
+
+
+
diff --git a/doc/decorators/follows.rst b/doc/decorators/follows.rst
new file mode 100644
index 0000000..58e6d5c
--- /dev/null
+++ b/doc/decorators/follows.rst
@@ -0,0 +1,82 @@
+.. include:: ../global.inc
+.. _decorators.follows:
+.. index::
+ pair: @follows; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+ * More on @follows in the ``Ruffus`` :ref:`Manual <new_manual.follows>`
+
+ .. note::
+
+ Only missing directories are created.
+
+ In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+ Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+ directories are always created or available *before* the pipeline runs.
+
+
+############
+ at follows
+############
+
+.. _decorators.follows.mkdir:
+
+.. |task| replace:: `task`
+.. _task: `decorators.follows.task`_
+.. |task_name| replace:: `"task_name"`
+.. _task_name: `decorators.follows.task_name`_
+.. |directory_name| replace:: `directory_name`
+.. _directory_name: `decorators.follows.directory_name`_
+
+***************************************************************************************************************************************************
+*@follows*\ (|task|_ | |task_name|_ | :ref:`mkdir<decorators.mkdir>` (|directory_name|_), [more_tasks, ...])
+***************************************************************************************************************************************************
+ **Purpose:**
+
+ Indicates either
+
+ * task dependencies
+ * that the task requires a directory to be created first *if necessary*. (Existing directories will not be overwritten)
+
+
+ **Example**::
+
+ def task1():
+ print "doing task 1"
+
+ @follows(task1)
+ def task2():
+ print "doing task 2"
+
+
+ **Parameters:**
+
+.. _decorators.follows.task:
+
+ * *task*:
+ a list of tasks which have to be run **before** this function
+
+.. _decorators.follows.task_name:
+
+ * *"task_name"*:
+ Dependencies can be quoted function names.
+ Quoted function names allow dependencies to be added before the function is defined.
+
+ Functions in other modules need to be fully qualified.
+
+
+.. _decorators.follows.directory_name:
+
+ * *directory_name*:
+ Directories which need to be created (*only if they don't exist*) before
+ the task is run can be specified via a ``mkdir`` indicator object:
+
+ ::
+
+ @follows(task_x, mkdir("/output/directory") ...)
+ def task():
+ pass
+
+
diff --git a/doc/decorators/graphviz.rst b/doc/decorators/graphviz.rst
new file mode 100644
index 0000000..ebda4df
--- /dev/null
+++ b/doc/decorators/graphviz.rst
@@ -0,0 +1,92 @@
+.. include:: ../global.inc
+.. _decorators.graphviz:
+.. index::
+ pair: @graphviz; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at graphviz
+########################
+
+.. |graphviz_parameters| replace:: `graphviz_parameters`
+.. _graphviz_parameters: `decorators.graphviz.graphviz_parameters`_
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@graphviz* ( |graphviz_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ *Contributed by Sean Davis, with improved syntax via Jake Biesinger*
+
+ **Purpose:**
+ Customise the graphic for each task in printed flowcharts by adding
+ `graphviz attributes <http://www.graphviz.org/doc/info/attrs.html>`__,
+ (URL, shape, colour) to that node.
+
+ * This allows HTML formatting in the task names (using the ``label`` parameter as in the following example).
+ HTML labels **must** be enclosed in ``<`` and ``>``. E.g.
+
+ .. code-block:: python
+
+ label = "<Line <BR/> wrapped task_name()>"
+
+ * You can also opt to keep the task name and wrap it with a prefix and suffix:
+
+ .. code-block:: python
+
+ label_suffix = "??? ", label_prefix = ": What is this?"
+
+ * The ``URL`` attribute allows the generation of clickable svg, and also client / server
+ side image maps usable in web pages.
+ See `Graphviz documentation <http://www.graphviz.org/content/output-formats#dimap>`__
+
+
+ **Example**:
+ .. code-block:: python
+
+
+ @graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ # Can use dictionary if you wish...
+ graphviz_params = {"URL":"http://cnn.com", "fontcolor": '"#FF00FF"'}
+ @graphviz(**graphviz_params)
+ def myTask(input,output):
+ pass
+
+ .. **
+
+ .. image:: ../images/history_html_flowchart.png
+ :scale: 30
+
+ **Parameters:**
+
+
+.. _decorators.graphviz.graphviz_parameters:
+
+ * named *graphviz_parameters*
+
+ Including among others:
+
+ * URL (e.g. ``"www.ruffus.org.uk"``)
+ * fillcolor
+ * color
+ * pencolor
+ * fontcolor
+ * label_suffix (appended to task name)
+ * label_prefix (precedes task name)
+ * label (replaces task name)
+ * shape (e.g. ``"component", "box", "diamond", "doubleoctagon"`` etc., see `graphviz <http://www.graphviz.org/doc/info/shapes.html>`__ )
+ * height
+ * peripheries (Number of borders)
+ * style (e.g. ``"solid", "wedged", "dashed"`` etc., see `graphviz <http://www.graphviz.org/doc/info/attrs.html#k:style>`__ )
+
+ Colours may specified as ``'"#FFCCCC"', 'red', 'red:blue', '/bugn9/7'`` etc. see `color names <http://www.graphviz.org/doc/info/attrs.html#k:color>`__ and `colour schemes <http://www.graphviz.org/doc/info/colors.html>`__
diff --git a/doc/decorators/indicator_objects.rst b/doc/decorators/indicator_objects.rst
new file mode 100644
index 0000000..e41cc4c
--- /dev/null
+++ b/doc/decorators/indicator_objects.rst
@@ -0,0 +1,547 @@
+.. include:: ../global.inc
+
+
+.. seealso::
+ :ref:`Decorators <decorators>`
+
+.. index::
+ single: Indicator Object (Disambiguating parameters)
+
+.. _decorators.indicator_objects:
+
+
+########################
+Indicator Objects
+########################
+
+
+
+ How *ruffus* disambiguates certain parameters to decorators.
+
+ They are like `keyword arguments <http://docs.python.org/tutorial/controlflow.html#keyword-arguments>`_ in python, a little more verbose but they make the syntax much simpler.
+
+ Indicator objects are also "self-documenting" so you can see
+ exactly what is happening clearly.
+
+
+.. index::
+ pair: formatter; Indicator Object (Disambiguating parameters)
+
+.. _decorators.formatter:
+
+
+*********************************************
+*formatter*
+*********************************************
+
+ **formatter([** ``regex | None`` **, regex | None...])**
+
+ * The optional enclosed parameters are a python regular expression strings
+ * Each regular expression matches a corresponding *Input* file name string
+ * *formatter* parses each file name string into path and regular expression components
+ * Parsing fails altogether if the regular expression is not matched
+
+ Path components include:
+
+ * ``basename``: The `base name <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ *excluding* `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``"file.name"``
+ * ``ext`` : The `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``".ext"``
+ * ``path`` : The `dirname <http://docs.python.org/2/library/os.path.html#os.path.dirname>`__, ``"/directory/to/a"``
+ * ``subdir`` : A list of sub-directories in the ``path`` in reverse order, ``["a", "to", "directory", "/"]``
+ * ``subpath`` : A list of descending sub-paths in reverse order, ``["/directory/to/a", "/directory/to", "/directory", "/"]``
+
+ The replacement string refers to these components using python `string.format <http://docs.python.org/2/library/string.html#string-formatting>`__ style curly braces. ``{NAME}``
+
+ We refer to an element from the Nth input string by index, for example:
+
+ * ``"{ext[0]}"`` is the extension of the first input string.
+ * ``"{basename[1]}"`` is the basename of the second input string.
+ * ``"{basename[1][0:3]}"`` are the first three letters from the basename of the second input string.
+
+ **Used by:**
+ * :ref:`@split <decorators.split>`
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@merge <decorators.merge>`
+ * :ref:`@subdivide <decorators.subdivide>`
+ * :ref:`@collate <decorators.collate>`
+ * :ref:`@product <decorators.product>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+
+ **@transform example**:
+
+ .. code-block:: python
+ :emphasize-lines: 14, 18,19
+
+ from ruffus import *
+
+ # create initial file pairs
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"])
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+ This produces:
+
+ .. code-block:: pycon
+
+ input_parameters = ['job1.a.start',
+ 'job1.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs1.output.a.1',
+ '/home/lg/src/temp/jobs1.output.b.1', 45]
+
+ input_parameters = ['job2.a.start',
+ 'job2.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs2.output.a.1',
+ '/home/lg/src/temp/jobs2.output.b.1', 45]
+
+ **@permutations example**:
+
+ Combinatoric decorators such as :ref:`@product <decorators.product>` or
+ :ref:`@product <decorators.permutations>` behave much
+ like nested for loops in enumerating, combining, and permutating the original sets
+ of inputs.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ .. code-block:: python
+ :emphasize-lines: 14, 18,19
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # create initial files
+ @originate([ 'a.start', 'b.start', 'c.start'])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+ @permutations(create_initial_files, # Input
+
+ formatter("(.start)$"), # match input file in permutations
+ 2,
+
+ "{path[0][0]}/{basename[0][0]}_vs_{basename[1][0]}.product", # Output Replacement string
+ "{path[0][0]}", # path for 1st set of files, 1st file name
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}"]) # basename for 2nd set of files, 1st file name
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter
+ print "shared_path = ", shared_path
+ print "basenames = ", basenames
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+ This produces:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ input_parameter = ('a.start', 'b.start')
+ output_parameter = /home/lg/src/oss/ruffus/a_vs_b.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['a', 'b']
+
+ input_parameter = ('a.start', 'c.start')
+ output_parameter = /home/lg/src/oss/ruffus/a_vs_c.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['a', 'c']
+
+ input_parameter = ('b.start', 'a.start')
+ output_parameter = /home/lg/src/oss/ruffus/b_vs_a.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['b', 'a']
+
+ input_parameter = ('b.start', 'c.start')
+ output_parameter = /home/lg/src/oss/ruffus/b_vs_c.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['b', 'c']
+
+ input_parameter = ('c.start', 'a.start')
+ output_parameter = /home/lg/src/oss/ruffus/c_vs_a.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['c', 'a']
+
+ input_parameter = ('c.start', 'b.start')
+ output_parameter = /home/lg/src/oss/ruffus/c_vs_b.product
+ shared_path = /home/lg/src/oss/ruffus
+ basenames = ['c', 'b']
+
+
+
+.. index::
+ pair: suffix; Indicator Object (Disambiguating parameters)
+
+.. _decorators.suffix:
+
+
+*********************************************
+*suffix*
+*********************************************
+
+ **suffix(** ``string`` **)**
+
+ The enclosed parameter is a string which must match *exactly* to the end
+ of a file name.
+
+
+ **Used by:**
+ * :ref:`@transform <decorators.transform>`
+
+ **Example**:
+ ::
+
+ #
+ # Transforms ``*.c`` to ``*.o``::
+ #
+ @transform(previous_task, suffix(".c"), ".o")
+ def compile(infile, outfile):
+ pass
+
+.. index::
+ pair: regex; Indicator Object (Disambiguating parameters)
+
+.. _decorators.regex:
+
+*********************************************
+*regex*
+*********************************************
+
+ **regex(** ``regular_expression`` **)**
+
+
+ The enclosed parameter is a python regular expression string,
+ which must be wrapped in a ``regex`` indicator object.
+
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+
+ **Used by:**
+
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@subdivide <decorators.subdivide>`
+ * :ref:`@collate <decorators.collate>`
+ * The deprecated :ref:`@files_re <decorators.files_re>`
+
+ **Example**:
+ ::
+
+ @transform(previous_task, regex(r".c$"), ".o")
+ def compile(infile, outfile):
+ pass
+
+.. index::
+ pair: add_inputs; Indicator Object (Adding additional input parameters)
+
+.. _decorators.add_inputs:
+
+***********************************************
+*add_inputs*
+***********************************************
+
+ **add_inputs(** ``input_file_pattern`` **)**
+
+ The enclosed parameter(s) are pattern strings or a nested structure which is added to the
+ input for each job.
+
+ **Used by:**
+ * :ref:`@transform <decorators.transform_ex>`
+ * :ref:`@collate <decorators.transform_ex>`
+ * :ref:`@subdivide <decorators.subdivide>`
+
+ **Example @transform with suffix(...)**
+
+ A common task in compiling C code is to include the corresponding header file for the source.
+ To compile ``*.c`` to ``*.o``, adding ``*.h`` and the common header ``universal.h``:
+
+ ::
+
+ @transform(["1.c", "2.c"], suffix(".c"), add_inputs([r"\1.h", "universal.h"]), ".o")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ | The starting files names are ``1.c`` and ``2.c``.
+ | ``suffix(".c")`` matches ".c" so ``\1`` stands for the unmatched prefices ``"1"`` and ``"2"``
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.c", "1.h", "universal.h"], "1.o")
+ compile(["2.c", "2.h", "universal.h"], "2.o")
+
+
+ A string like ``universal.h`` in ``add_inputs`` will added *as is*.
+ ``r"\1.h"``, however, performs suffix substitution, with the special form ``r"\1"`` matching everything up to the suffix.
+ Remember to 'escape' ``r"\1"`` otherwise Ruffus will complain and throw an ``Exception`` to remind you.
+ The most convenient way is to use a python "raw" string.
+
+ **Example of add_inputs(...) with regex(...)**
+
+ The suffix match (``suffix(...)``) is exactly equivalent to the following code using regular expression (``regex(...)``):
+ ::
+
+ @transform(["1.c", "2.c"], regex(r"^(.+)\.c$"), add_inputs([r"\1.h", "universal.h"]), r"\1.o")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ The ``suffix(..)`` code is much simpler but the regular expression allows more complex substitutions.
+
+ **add_inputs(...) preserves original inputs**
+
+ ``add_inputs`` nests the the original input parameters in a list before adding additional dependencies.
+
+ This can be seen in the following example:
+ ::
+
+ @transform([ ["1.c", "A.c", 2]
+ ["2.c", "B.c", "C.c", 3]],
+ suffix(".c"), add_inputs([r"\1.h", "universal.h"]), ".o")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile([["1.c", "A.c", 2], "1.h", "universal.h"], "1.o")
+ compile([["3.c", "B.c", "C.c", 3], "2.h", "universal.h"], "2.o")
+
+
+ The original parameters are retained unchanged as the first item in a list
+
+
+
+
+.. index::
+ pair: inputs; Indicator Object (Replacing input parameters)
+
+.. _decorators.inputs:
+
+***************************************
+*inputs*
+***************************************
+
+ **inputs(** ``input_file_pattern`` **)**
+
+ **Used by:**
+ * :ref:`@transform <decorators.transform_ex>`
+ * :ref:`@collate <decorators.transform_ex>`
+ * :ref:`@subdivide <decorators.subdivide>`
+
+ The enclosed single parameter is a pattern string or a nested structure which is
+ used to construct the input for each job.
+
+ If more than one argument is supplied to inputs, an exception will be raised.
+
+ Use a tuple or list (as in the following example) to send multiple input arguments to each job.
+
+ **Used by:**
+ * The advanced form of :ref:`@transform <decorators.transform_ex>`
+
+ **inputs(...) replaces original inputs**
+
+ ``inputs(...)`` allows the original input parameters to be replaced wholescale.
+
+ This can be seen in the following example:
+ ::
+
+ @transform([ ["1.c", "A.c", 2]
+ ["2.c", "B.c", "C.c", 3]],
+ suffix(".c"), inputs([r"\1.py", "docs.rst"]), ".pyc")
+ def compile(infile, outfile):
+ # do something here
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.py", "docs.rst"], "1.pyc")
+ compile(["2.py", "docs.rst"], "2.pyc")
+
+ In this example, the corresponding python files have been sneakily substituted
+ without trace in the place of the C source files.
+
+
+.. index::
+ single: @follows; mkdir (Syntax)
+ single: mkdir; @follows (Syntax)
+ single: Indicator Object (Disambiguating parameters); mkdir
+
+.. _decorators.indicator_objects.mkdir:
+
+
+******************************************************************************************
+*mkdir*
+******************************************************************************************
+
+ **mkdir(** ``directory_name1`` **, [** ``directory_name2`` **, ...] )**
+
+ The enclosed parameter is a directory name or a sequence of directory names.
+ These directories will be created as part of the prerequisites of running a task.
+
+ **Used by:**
+ * :ref:`@follows <decorators.follows>`
+
+ **Example:**
+ ::
+
+ @follows(mkdir("/output/directory"))
+ def task():
+ pass
+
+
+.. index::
+ single: @posttask; touch_file (Syntax)
+ single: touch_file; @posttask (Syntax)
+ single: Indicator Object (Disambiguating parameters); touch_file
+
+.. _decorators.touch_file:
+
+
+******************************************************************************************
+*touch_file*
+******************************************************************************************
+
+ **touch_file(** ``file_name`` **)**
+
+ The enclosed parameter is a file name. This file will be ``touch``\ -ed after a
+ task is executed.
+
+ This will change the date/time stamp of the ``file_name`` to the current date/time.
+ If the file does not exist, an empty file will be created.
+
+
+ **Used by:**
+ * :ref:`@posttask <decorators.posttask>`
+
+ **Example:**
+ ::
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def do_task(input_file, output_file):
+ pass
+
+
+.. index::
+ pair: output_from; Indicator Object (Disambiguating parameters)
+
+.. _decorators.output_from:
+
+******************************************************************************************
+*output_from*
+******************************************************************************************
+
+ **output_from (** ``file_name_string1`` **[,** ``file_name_string1`` **, ...] )**
+
+ Indicates that any enclosed strings are not file names but refer to task functions.
+
+ **Used by:**
+ * :ref:`@split <decorators.split>`
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@merge <decorators.merge>`
+ * :ref:`@collate <decorators.collate>`
+ * :ref:`@subdivide <decorators.subdivide>`
+ * :ref:`@product <decorators.product>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+ * :ref:`@files <decorators.files>`
+
+ **Example:**
+ ::
+
+ @split(["a.file", ("b.file", output_from("task1", 76, "task2"))], "*.split")
+ def task2(input, output):
+ pass
+
+
+ is equivalent to:
+
+ ::
+
+ @split(["a.file", ("b.file", (task1, 76, task2))], "*.split")
+ def task2(input, output):
+ pass
+
+
+
+
+.. index::
+ single: @files_re; combine (Deprecated Syntax)
+ single: combine; @follows (Deprecated Syntax)
+ single: Indicator Object (Disambiguating parameters); combine
+
+.. _decorators.combine:
+
+******************************************************************************************
+*combine*
+******************************************************************************************
+
+ **combine(** ``arguments`` **)**
+
+ .. warning::
+
+ This is deprecated syntax.
+
+ Please do not use!
+
+ :ref:`@merge <decorators.merge>` and :ref:`@collate <decorators.collate>` are more powerful
+ and have straightforward syntax.
+
+ Indicates that the *inputs* of :ref:`@files_re <decorators.files_re>` will be collated
+ or summarised into *outputs* by category. See the :ref:`Manual <new_manual.files_re.combine>` or
+ :ref:` @collate <new_manual.collate>` for examples.
+
+
+ **Used by:**
+ * :ref:`@files_re <new_manual.files_re.combine>`
+
+ **Example:**
+ ::
+
+ @files_re('*.animals', # inputs = all *.animal files
+ r'mammals.([^.]+)', # regular expression
+ combine(r'\1/animals.in_my_zoo'), # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
diff --git a/doc/decorators/jobs_limit.rst b/doc/decorators/jobs_limit.rst
new file mode 100644
index 0000000..1e0ee41
--- /dev/null
+++ b/doc/decorators/jobs_limit.rst
@@ -0,0 +1,73 @@
+.. include:: ../global.inc
+.. _decorators.jobs_limit:
+.. index::
+ pair: @jobs_limit; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at jobs_limit
+########################
+
+.. |maximum_num_of_jobs| replace:: `maximum_num_of_jobs`
+.. _maximum_num_of_jobs: `decorators.jobs_limit.maximum_num_of_jobs`_
+.. |name| replace:: `name`
+.. _name: `decorators.jobs_limit.name`_
+
+*****************************************************************************************************************************************
+*@jobs_limit* ( |maximum_num_of_jobs|_, [ |name|_ ])
+*****************************************************************************************************************************************
+ **Purpose:**
+ | Manages the resources available for a task.
+ | Limits the number of concurrent jobs which can be run in parallel for this task
+ | Overrides the value for ``multiprocess`` in :ref:`pipeline_run <pipeline_functions.pipeline_run>`
+ | If an optional ``name`` is given, the same limit is shared across all tasks with the same @job_limit name.
+
+
+ **Parameters:**
+
+.. _decorators.jobs_limit.maximum_num_of_jobs:
+
+
+ * *maximum_num_of_jobs*
+ The maximum number of concurrent jobs for this task. Must be an integer number
+ greater than or equal to 1.
+
+.. _decorators.jobs_limit.name:
+
+ * *name*
+ Optional name for the limit. All tasks with the same name share the same limit if they
+ are running concurrently.
+
+ **Example**
+ ::
+
+ from ruffus import *
+
+ # make list of 10 files
+ @split(None, "*.stage1")
+ def make_files(input_file, output_files):
+ for i in range(10):
+ open("%d.stage1" % i, "w")
+
+ @jobs_limit(2)
+ @transform(make_files, suffix(".stage1"), ".stage2")
+ def stage1(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(stage1, suffix(".stage2"), ".stage3")
+ def stage2(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([stage2], multiprocess = 5)
+
+ will run the 10 jobs of ``stage1`` 2 at a time, while `` stage2`` will
+ run 5 at a time (from ``multiprocess = 5``):
+
+ .. image:: ../images/jobs_limit.png
+
+
+
diff --git a/doc/decorators/merge.rst b/doc/decorators/merge.rst
new file mode 100644
index 0000000..bd85ee8
--- /dev/null
+++ b/doc/decorators/merge.rst
@@ -0,0 +1,64 @@
+.. include:: ../global.inc
+.. _decorators.merge:
+.. index::
+ pair: @merge; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.merge.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.merge.extra_parameters`_
+.. |output_file| replace:: `output_file`
+.. _output_file: `decorators.merge.output_file`_
+
+########################
+ at merge
+########################
+
+************************************************************************************
+*@merge* ( |tasks_or_file_names|_, |output_file|_, [|extra_parameters|_,...] )
+************************************************************************************
+ **Purpose:**
+ Merges multiple input files into a single output.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Example**::
+
+ @merge(previous_task, 'all.summary')
+ def summarize(infiles, summary_file):
+ pass
+
+ **Parameters:**
+
+
+.. _decorators.merge.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.merge.output_file:
+
+ * *output_file*
+ Specifies the resulting output file name(s).
+
+.. _decorators.merge.extra_parameters:
+
+ * *extra_parameters, ...*
+ Any optional extra parameters are passed verbatim to the task function
+
+
+
+See :ref:`here <decorators.collate>` for more advanced uses of merging.
+
+
diff --git a/doc/decorators/mkdir.rst b/doc/decorators/mkdir.rst
new file mode 100644
index 0000000..b516b1a
--- /dev/null
+++ b/doc/decorators/mkdir.rst
@@ -0,0 +1,220 @@
+.. include:: ../global.inc
+.. _decorators.mkdir:
+.. index::
+ pair: @mkdir; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+ * More on @mkdir in the ``Ruffus`` :ref:`Manual <new_manual.mkdir>`
+ * :ref:`@follows(mkdir("dir")) <decorators.follows>` specifies the creation of a *single* directory as a task pre-requisite.
+
+########################
+ at mkdir
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.mkdir.tasks_or_file_names`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.mkdir.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.mkdir.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.mkdir.matching_formatter`_
+.. |suffix_string| replace:: `suffix_string`
+.. _suffix_string: `decorators.mkdir.suffix_string`_
+
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@mkdir* ( |tasks_or_file_names|_, :ref:`suffix<decorators.suffix>`\ *(*\ |suffix_string|_\ *)*\ | :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_)
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ * Prepares directories to receive *Output* files
+ * Used when *Output* path names are generated at runtime from *Inputs*. **mkdir** can make sure these runtime specified paths exist.
+ * Directory names are generated from **Input** using string substitution via :ref:`formatter() <decorators.formatter>`, :ref:`suffix() <decorators.suffix>` or :ref:`regex() <decorators.regex>`.
+ * Behaves essentially like ``@transform`` but with its own (internal) function which does the actual work of making a directory
+ * Does *not* invoke the host task function to which it is attached
+ * Makes specified directories using `os.makedirs <http://docs.python.org/2/library/os.html#os.makedirs>`__
+ * Multiple directories can be created in a list
+
+ .. note::
+
+ Only missing directories are created.
+
+ In other words, the same directory can be specified multiple times safely without, for example, being recreated repeatedly.
+
+ Sometimes, for pipelines with multiple entry points, this is the only way to make sure that certain working or output
+ directories are always created or available *before* the pipeline runs.
+
+ **Simple Example**
+
+ Creates multiple directories per job to hold the results of :ref:`@transform<decorators.transform>`
+
+ .. code-block:: python
+ :emphasize-lines: 10,20
+
+ from ruffus import *
+
+ # initial files
+ @originate([ 'A.start',
+ 'B.start'])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # create files without making directories -> ERROR
+ @transform( create_initial_files,
+ formatter(),
+ ["{path[0]}/{basename[0]}/processed.txt",
+ "{path[0]}/{basename[0]}.tmp/tmp.processed.txt"])
+ def create_files_without_mkdir(input_file, output_files):
+ open(output_files[0], "w")
+ open(output_files[1], "w")
+
+
+ # create files after making corresponding directories
+ @mkdir( create_initial_files,
+ formatter(),
+ ["{path[0]}/{basename[0]}", # create directory
+ "{path[0]}/{basename[0]}.tmp"]) # create directory.tmp
+ @transform( create_initial_files,
+ formatter(),
+ ["{path[0]}/{basename[0]}/processed.txt",
+ "{path[0]}/{basename[0]}.tmp/tmp.processed.txt"])
+ def create_files_with_mkdir(input_file, output_files):
+ open(output_files[0], "w")
+ open(output_files[1], "w")
+
+ pipeline_run([create_files_without_mkdir])
+ pipeline_run([create_files_with_mkdir])
+
+ Running without making the directories first gives errors:
+
+ .. code-block:: python
+ :emphasize-lines: 14-19
+
+ >>> pipeline_run([create_files_without_mkdir])
+ Job = [None -> A.start] completed
+ Job = [None -> B.start] completed
+ Completed Task = create_initial_files
+
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 3738, in pipeline_run
+ raise job_errors
+ ruffus.ruffus_exceptions.RethrownJobError:
+
+ Original exception:
+
+ >>> # Exception #1
+ >>> # 'exceptions.IOError([Errno 2] No such file or directory: 'A/processed.txt')' raised in ...
+ >>> # Task = def create_files_without_mkdir(...):
+ >>> # Job = [A.start -> [processed.txt, tmp.processed.txt]]
+
+
+ Running after making the directories first:
+
+ .. code-block:: python
+ :emphasize-lines: 15
+
+ >>> pipeline_run([create_files_with_mkdir])
+ Job = [None -> A.start] completed
+ Job = [None -> B.start] completed
+ Completed Task = create_initial_files
+ Make directories [A, A.tmp] completed
+ Make directories [B, B.tmp] completed
+ Completed Task = (mkdir 1) before create_files_with_mkdir
+ Job = [A.start -> [processed.txt, tmp.processed.txt]] completed
+ Job = [B.start -> [processed.txt, tmp.processed.txt]] completed
+ Completed Task = create_files_with_mkdir
+
+
+
+ **Escaping regular expression patterns**
+
+ A string like ``universal.h`` in ``add_inputs`` will added *as is*.
+ ``r"\1.h"``, however, performs suffix substitution, with the special form ``r"\1"`` matching everything up to the suffix.
+ Remember to 'escape' ``r"\1"`` otherwise Ruffus will complain and throw an Exception to remind you.
+ The most convenient way is to use a python "raw" string.
+
+ **Parameters:**
+
+.. _decorators.mkdir.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.mkdir.suffix_string:
+
+ * *suffix_string*
+ must be wrapped in a :ref:`suffix<decorators.suffix>` indicator object.
+ The end of each input file name which matches ``suffix_string`` will be replaced by ``output_pattern``.
+
+ Input file names which do not match suffix_string will be ignored
+
+
+ The non-suffix part of the match can be referred to using the ``"\1"`` pattern. This
+ can be useful for putting the output in different directory, for example::
+
+
+ @mkdir(["1.c", "2.c"], suffix(".c"), r"my_path/\1.o")
+ def compile(infile, outfile):
+ pass
+
+ This results in the following function calls:
+
+ ::
+
+ # 1.c -> my_path/1.o
+ # 2.c -> my_path/2.o
+ compile("1.c", "my_path/1.o")
+ compile("2.c", "my_path/2.o")
+
+ For convenience and visual clarity, the ``"\1"`` can be omitted from the output parameter.
+ However, the ``"\1"`` is mandatory for string substitutions in additional parameters, ::
+
+
+ @mkdir(["1.c", "2.c"], suffix(".c"), [r"\1.o", ".o"], "Compiling \1", "verbatim")
+ def compile(infile, outfile):
+ pass
+
+ Results in the following function calls:
+
+ ::
+
+ compile("1.c", ["1.o", "1.o"], "Compiling 1", "verbatim")
+ compile("2.c", ["2.o", "2.o"], "Compiling 2", "verbatim")
+
+ Since r"\1" is optional for the output parameter, ``"\1.o"`` and ``".o"`` are equivalent.
+ However, strings in other parameters which do not contain r"\1" will be included verbatim, much
+ like the string ``"verbatim"`` in the above example.
+
+
+
+
+.. _decorators.mkdir.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>`\ indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+ Each output file name is created using regular expression substitution with ``output_pattern``
+
+.. _decorators.mkdir.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.mkdir.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
diff --git a/doc/decorators/originate.rst b/doc/decorators/originate.rst
new file mode 100644
index 0000000..513a285
--- /dev/null
+++ b/doc/decorators/originate.rst
@@ -0,0 +1,79 @@
+.. include:: ../global.inc
+.. _decorators.originate:
+.. index::
+ pair: @originate; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at originate
+########################
+
+.. |output_files| replace:: `output_files`
+.. _output_files: `decorators.originate.output_files`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.originate.extra_parameters`_
+
+
+***********************************************************************************************************************************************************
+*@originate* ( |output_files|_, [|extra_parameters|_,...] )
+***********************************************************************************************************************************************************
+ **Purpose:**
+ * Creates (originates) a set of starting file without dependencies from scratch (*ex nihilo*!)
+ * Only called to create files which do not exist.
+ * Invoked onces (a job created) per item in the ``output_files`` list.
+
+ .. note::
+
+ The first argument for the task function is the *Output*. There is by definition no
+ *Input* for ``@originate``
+
+ **Example**:
+
+ .. code-block:: python
+
+ from ruffus import *
+ @originate(["a", "b", "c", "d"], "extra")
+ def test(output_file, extra):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ .. code-block:: pycon
+ :emphasize-lines: 8,11
+
+ >>> pipeline_run()
+ Job = [None -> a, extra] completed
+ Job = [None -> b, extra] completed
+ Job = [None -> c, extra] completed
+ Job = [None -> d, extra] completed
+ Completed Task = test
+
+ >>> # all files exist: nothing to do
+ >>> pipeline_run()
+
+ >>> # delete 'a' so that it is missing
+ >>> import os
+ >>> os.unlink("a")
+
+ >>> pipeline_run()
+ Job = [None -> a, extra] completed
+ Completed Task = test
+
+ **Parameters:**
+
+
+.. _decorators.originate.output_files:
+
+ * *output_files*
+ * Can be a single file name or a list of files
+ * Each item in the list is treated as the *Output* of a separate job
+
+
+.. _decorators.originate.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are passed verbatim to the task function
+
diff --git a/doc/decorators/parallel.rst b/doc/decorators/parallel.rst
new file mode 100644
index 0000000..f16cd48
--- /dev/null
+++ b/doc/decorators/parallel.rst
@@ -0,0 +1,81 @@
+.. include:: ../global.inc
+.. _decorators.parallel:
+.. index::
+ pair: @parallel; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at parallel
+########################
+
+.. |job_params| replace:: `job_params`
+.. _job_params: `decorators.parallel.job_params`_
+.. |parameter_generating_function| replace:: `parameter_generating_function`
+.. _parameter_generating_function: `decorators.parallel.parameter_generating_function`_
+
+
+*****************************************************************************************************************************************
+*@parallel* ( [ [|job_params|_, ...], [|job_params|_, ...]...] | |parameter_generating_function|_)
+*****************************************************************************************************************************************
+ **Purpose:**
+ To apply the (task) function to a set of parameters in parallel without file dependency checking.
+
+ Most useful allied to :ref:`@check_if_uptodate() <decorators.check_if_uptodate>`
+
+ **Example**::
+
+ from ruffus import *
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\\n" % (param1, param2, param1 + param2))
+
+ pipeline_run([parallel_task])
+
+ **Parameters:**
+
+
+.. _decorators.parallel.job_params:
+
+ * *job_params*:
+ Requires a sequence of parameters, one set for each job.
+
+ Each set of parameters can be one or more items in a sequence which will be passed to
+ the decorated task function iteratively (or in parallel)
+
+ For example::
+
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ pass
+
+ Will result in the following function calls::
+
+ parallel_task('A', 1, 2)
+ parallel_task('B', 3, 4)
+ parallel_task('C', 5, 6)
+
+
+
+.. _decorators.parallel.parameter_generating_function:
+
+ * *parameter_generating_function*
+ #. A generator yielding set of parameters (as above) in turn and on the fly
+ #. A function returning a sequence of parameter sets, as above
+
+
+
diff --git a/doc/decorators/permutations.rst b/doc/decorators/permutations.rst
new file mode 100644
index 0000000..1ef39a6
--- /dev/null
+++ b/doc/decorators/permutations.rst
@@ -0,0 +1,158 @@
+.. include:: ../global.inc
+.. _decorators.permutations:
+.. index::
+ pair: @permutations; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at permutations
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.permutations.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.permutations.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.permutations.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.permutations.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@permutations* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the **permutations**, between all the elements of a set of **Input**
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__
+ function of the same name:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import permutations
+ >>> # permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+ >>> [ "".join(a) for a in permutations("ABCD", 2)]
+ ['AB', 'AC', 'AD', 'BA', 'BC', 'BD', 'CA', 'CB', 'CD', 'DA', 'DB', 'DC']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* in each tuple of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ **Example**:
+
+ Calculates the **@permutations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @permutations
+ @permutations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.permutations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ ])
+ def permutations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+
+ A - B
+ A - C
+ A - D
+ B - A
+ B - C
+ B - D
+ C - A
+ C - B
+ C - D
+ D - A
+ D - B
+ D - C
+
+
+ **Parameters:**
+
+
+.. _decorators.permutations.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.permutations.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.permutations.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.permutations.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/decorators/posttask.rst b/doc/decorators/posttask.rst
new file mode 100644
index 0000000..e0a6a2f
--- /dev/null
+++ b/doc/decorators/posttask.rst
@@ -0,0 +1,70 @@
+.. include:: ../global.inc
+.. _decorators.posttask:
+.. index::
+ pair: @posttask; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+.. |function| replace:: `function`
+.. _function: `decorators.posttask.function`_
+.. |file_name| replace:: `file_name`
+.. _file_name: `decorators.posttask.file_name`_
+
+########################
+ at posttask
+########################
+
+*****************************************************************************************************************************************
+*@posttask* (|function|_ | :ref:`touch_file<decorators.touch_file>`\ *(*\ |file_name|_\ *)*\)
+*****************************************************************************************************************************************
+ **Purpose:**
+ Calls functions to signal the completion of each task
+
+ **Example**::
+
+ from ruffus import *
+
+ def task_finished():
+ print "hooray"
+
+ @posttask(task_finished)
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+ **Parameters:**
+
+.. _decorators.posttask.function:
+
+ * *function*:
+ ``function()`` will be called when the ruffus passes through a task.
+
+ This may happen even if all of the jobs are up-to-date:
+ when a upstream task is out-of-date, and the execution passes through
+ this point in the pipeline
+
+.. _decorators.posttask.file_name:
+
+ * *file_name*
+ Files to be ``touch``\ -ed after the task is executed.
+
+ This will change the date/time stamp of the ``file_name`` to the current date/time.
+ If the file does not exist, an empty file will be created.
+
+ Requires to be wrapped in a :ref:`touch_file<decorators.touch_file>` indicator object::
+
+ from ruffus import *
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+
diff --git a/doc/decorators/product.rst b/doc/decorators/product.rst
new file mode 100644
index 0000000..5c77af4
--- /dev/null
+++ b/doc/decorators/product.rst
@@ -0,0 +1,192 @@
+.. include:: ../global.inc
+.. _decorators.product:
+.. index::
+ pair: @product; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at product
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.product.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.product.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.product.output_pattern`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.product.matching_formatter`_
+
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@product* ( |tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, [|tasks_or_file_names|_, :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, ... ], |output_pattern|_, [|extra_parameters|_,...] )
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ Generates the Cartesian **product**, i.e. all vs all comparisons, between sets of input files.
+
+ The effect is analogous to the python `itertools <http://docs.python.org/2/library/itertools.html#itertools.product>`__
+ function of the same name, i.e. a nested for loop.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import product
+ >>> # product('ABC', 'XYZ') --> AX AY AZ BX BY BZ CX CY CZ
+ >>> [ "".join(a) for a in product('ABC', 'XYZ')]
+ ['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names, after string replacement via
+ :ref:`formatter<decorators.formatter>`.
+
+ The replacement strings require an extra level of indirection to refer to
+ parsed components:
+
+ #. The first level refers to which *set* of inputs (e.g. **A,B** or **P,Q** or **X,Y**
+ in the following example.)
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+ For example, ``'{basename[2][0]}'`` is the `basename <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ for
+ * the third set of inputs (**X,Y**) and
+ * the first file name string in each **Input** of that set (``"x.1_start"`` and ``"y.1_start"``)
+
+ **Example**:
+
+ Calculates the **@product** of **A,B** and **P,Q** and **X, Y** files
+
+ .. code-block:: python
+ :emphasize-lines: 4,17,19,22,25,27,28,29,30,32,34,35,36
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # Three sets of initial files
+ @originate([ 'a.start', 'b.start'])
+ def create_initial_files_ab(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ 'p.start', 'q.start'])
+ def create_initial_files_pq(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ ['x.1_start', 'x.2_start'],
+ ['y.1_start', 'y.2_start'] ])
+ def create_initial_files_xy(output_file):
+ with open(output_file, "w") as oo: pass
+
+ # @product
+ @product( create_initial_files_ab, # Input
+ formatter("(.start)$"), # match input file set # 1
+
+ create_initial_files_pq, # Input
+ formatter("(.start)$"), # match input file set # 2
+
+ create_initial_files_xy, # Input
+ formatter("(.start)$"), # match input file set # 3
+
+ "{path[0][0]}/" # Output Replacement string
+ "{basename[0][0]}_vs_" #
+ "{basename[1][0]}_vs_" #
+ "{basename[2][0]}.product", #
+
+ "{path[0][0]}", # Extra parameter: path for 1st set of files, 1st file name
+
+ ["{basename[0][0]}", # Extra parameter: basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "# basenames = ", " ".join(basenames)
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter, "\n"
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6,10,14,18,22,26,30
+
+ >>> pipeline_run(verbose=0)
+
+ # basenames = a p x
+ input_parameter = ('a.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_x.product
+
+ # basenames = a p y
+ input_parameter = ('a.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_y.product
+
+ # basenames = a q x
+ input_parameter = ('a.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_x.product
+
+ # basenames = a q y
+ input_parameter = ('a.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_y.product
+
+ # basenames = b p x
+ input_parameter = ('b.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_x.product
+
+ # basenames = b p y
+ input_parameter = ('b.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_y.product
+
+ # basenames = b q x
+ input_parameter = ('b.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_x.product
+
+ # basenames = b q y
+ input_parameter = ('b.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_y.product
+
+
+ **Parameters:**
+
+
+.. _decorators.product.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.product.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+
+.. _decorators.product.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s) after string
+ substitution
+
+
+.. _decorators.product.extra_parameters:
+
+ * *extra_parameters*
+ Optional extra parameters are passed to the functions after string
+ substitution
+
diff --git a/doc/decorators/split.rst b/doc/decorators/split.rst
new file mode 100644
index 0000000..e0a6f5f
--- /dev/null
+++ b/doc/decorators/split.rst
@@ -0,0 +1,92 @@
+.. include:: ../global.inc
+.. _decorators.split:
+.. index::
+ pair: @split; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+
+########################
+ at split
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.split.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.split.extra_parameters`_
+.. |output_files| replace:: `output_files`
+.. _output_files: `decorators.split.output_files`_
+
+*****************************************************************************************************************************************
+*@split* ( |tasks_or_file_names|_, |output_files|_, [|extra_parameters|_,...] )
+*****************************************************************************************************************************************
+ **Purpose:**
+ | Splits a single set of input files into multiple output file names, where the number of
+ output files may not be known beforehand.
+ | Only out of date tasks (comparing input and output files) will be run
+
+ **Example**::
+
+ @split("big_file", '*.little_files')
+ def split_big_to_small(input_file, output_files):
+ print "input_file = %s" % input_file
+ print "output_file = %s" % output_file
+
+ .
+
+ will produce::
+
+ input_file = big_file
+ output_file = *.little_files
+
+
+ **Parameters:**
+
+.. _decorators.split.tasks_or_file_names:
+
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. (Nested) list of file name strings (as in the example above).
+
+ | File names containing ``*[]?`` will be expanded as a |glob|_.
+ | E.g.:``"a.*" => "a.1", "a.2"``
+
+ #. Task / list of tasks.
+
+ File names are taken from the output of the specified task(s)
+
+
+.. _decorators.split.output_files:
+
+ * *output_files*
+ Specifies the resulting output file name(s).
+
+ | These are used **only** to check if the task is up to date.
+ | Normally you would use either a |glob|_ (e.g. ``*.little_files`` as above) or a "sentinel file"
+ to indicate that the task has completed successfully.
+ | You can of course do both:
+
+ ::
+
+ @split("big_file", ["sentinel.file", "*.little_files"])
+ def split_big_to_small(input_file, output_files):
+ pass
+
+
+.. _decorators.split.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed verbatim to the task function
+
+
+
+########################################################################
+ at split with ``regex(...)``, ``add_inputs`` and ``inputs``
+########################################################################
+
+ This deprecated syntax is a synonym for :ref:`@subdivide <decorators.subdivide>`.
+
diff --git a/doc/decorators/subdivide.rst b/doc/decorators/subdivide.rst
new file mode 100644
index 0000000..b8df277
--- /dev/null
+++ b/doc/decorators/subdivide.rst
@@ -0,0 +1,189 @@
+.. include:: ../global.inc
+.. _decorators.subdivide:
+.. index::
+ pair: @subdivide; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at subdivide
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.subdivide.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.subdivide.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.subdivide.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.subdivide.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.subdivide.matching_formatter`_
+.. |input_pattern_or_glob| replace:: `input_pattern_or_glob`
+.. _input_pattern_or_glob: `decorators.subdivide.input_pattern_or_glob`_
+
+
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@subdivide* ( |tasks_or_file_names|_, :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, [ :ref:`inputs<decorators.inputs>` *(*\ |input_pattern_or_glob|_\ *)* | :ref:`add_inputs<decorators.add_inputs>` *(*\ |input_pattern_or_glob|_\ *)* ], |output_pattern|_, [|extra_parameters|_,...] )
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+
+ * Subdivides a set of *Inputs* each further into multiple *Outputs*.
+
+ * **Many to Even More** operator
+
+ * The number of files in each *Output* can be set at runtime by the use of globs
+
+ * Output file names are specified using the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators from |tasks_or_file_names|_, i.e. from the output
+ of specified tasks, or a list of file names, or a |glob|_ matching pattern.
+
+ * Additional inputs or dependencies can be added dynamically to the task:
+ :ref:`add_inputs<decorators.add_inputs>` nests the the original input parameters in a list before adding additional dependencies.
+
+ :ref:`inputs<decorators.inputs>` replaces the original input parameters wholescale.
+
+ * Only out of date tasks (comparing input and output files) will be run.
+
+ .. note::
+
+ The use of **split** is a synonym for subdivide is deprecated.
+
+
+ **Example**:
+
+ .. code-block:: python
+ :emphasize-lines: 12,13,20
+
+ from ruffus import *
+ from random import randint
+ from random import os
+
+ @originate(['0.start', '1.start', '2.start'])
+ def create_files(output_file):
+ with open(output_file, "w"):
+ pass
+
+
+ #
+ # Subdivide each of 3 start files further into [NNN1, NNN2, NNN3] number of files
+ # where NNN1, NNN2, NNN3 are determined at run time
+ #
+ @subdivide(create_files, formatter(),
+ "{path[0]}/{basename[0]}.*.step1", # Output parameter: Glob matches any number of output file names
+ "{path[0]}/{basename[0]}") # Extra parameter: Append to this for output file names
+ def subdivide_files(input_file, output_files, output_file_name_root):
+ #
+ # IMPORTANT: cleanup rubbish from previous run first
+ #
+ for oo in output_files:
+ os.unlink(oo)
+ # The number of output files is decided at run time
+ number_of_output_files = randint(2,4)
+ for ii in range(number_of_output_files):
+ output_file_name = "{output_file_name_root}.{ii}.step1".format(**locals())
+ with open(output_file_name, "w"):
+ pass
+
+
+ #
+ # Each output of subdivide_files results in a separate job for downstream tasks
+ #
+ @transform(subdivide_files, suffix(".step1"), ".step2")
+ def analyse_files(input_file, output_file_name):
+ with open(output_file_name, "w"):
+ pass
+
+ pipeline_run()
+
+ .. comment **
+
+ The Ruffus printout shows how each of the jobs in ``subdivide_files()`` spawns
+ multiple *Output* leading to more jobs in ``analyse_files()``
+
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [None -> 0.start] completed
+ Job = [None -> 1.start] completed
+ Job = [None -> 2.start] completed
+ Completed Task = create_files
+ Job = [0.start -> 0.*.step1, 0] completed
+ Job = [1.start -> 1.*.step1, 1] completed
+ Job = [2.start -> 2.*.step1, 2] completed
+ Completed Task = subdivide_files
+ Job = [0.0.step1 -> 0.0.step2] completed
+ Job = [0.1.step1 -> 0.1.step2] completed
+ Job = [0.2.step1 -> 0.2.step2] completed
+ Job = [1.0.step1 -> 1.0.step2] completed
+ Job = [1.1.step1 -> 1.1.step2] completed
+ Job = [1.2.step1 -> 1.2.step2] completed
+ Job = [1.3.step1 -> 1.3.step2] completed
+ Job = [2.0.step1 -> 2.0.step2] completed
+ Job = [2.1.step1 -> 2.1.step2] completed
+ Job = [2.2.step1 -> 2.2.step2] completed
+ Job = [2.3.step1 -> 2.3.step2] completed
+ Completed Task = analyse_files
+
+
+
+
+ **Parameters:**
+
+
+.. _decorators.subdivide.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+.. _decorators.subdivide.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+
+.. _decorators.subdivide.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.subdivide.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s). Can include glob patterns.
+ Strings are subject to :ref:`regex<decorators.regex>` or :ref:`formatter<decorators.formatter>`
+ substitution.
+
+.. _decorators.subdivide.input_pattern_or_glob:
+
+ * *input_pattern*
+ Specifies the resulting input(s) to each job.
+ Must be wrapped in an :ref:`inputs<decorators.inputs>` or an :ref:`inputs<decorators.add_inputs>` indicator object.
+
+ Can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+
+ Strings are subject to :ref:`regex<decorators.regex>` or :ref:`formatter<decorators.formatter>` substitution.
+
+
+.. _decorators.subdivide.extra_parameters:
+
+ * *extra_parameters*
+ Any extra parameters are consumed by the task function and not forwarded further down the pipeline.
+ Strings are subject to :ref:`regex<decorators.regex>` or :ref:`formatter<decorators.formatter>`
+ substitution.
diff --git a/doc/decorators/todo.sphinx b/doc/decorators/todo.sphinx
new file mode 100644
index 0000000..94245d9
--- /dev/null
+++ b/doc/decorators/todo.sphinx
@@ -0,0 +1,40 @@
+html_logo�
+
+ If given, this must be the name of an image file that is the logo of the docs. It is placed at the top of the sidebar; its width should therefore not exceed 200 pixels. Default: None.
+
+ New in version 0.4.1: The image file will be copied to the _static directory of the output HTML, so an already existing file with that name will be overwritten.
+
+html_favicon�
+html_show_sphinx�
+
+ If true, \u201cCreated using Sphinx\u201d is shown in the HTML footer. Default is True.
+
+ New in version 0.4
+
+:term:` `
+
+:file:` `
+
+.. literalinclude:: example.py
+ :lines: 1,3,5-10,20-
+
+
+.. warning::
+
+.. versionadded:: version�
+
+.. centered:: LICENSE AGREEMENT
+
+
+.. glossary::
+
+ environment
+ A structure where information about all documents under the root is
+ saved, and used for cross-referencing. The environment is pickled
+ after the parsing stage, so that successive runs only need to read
+ and parse new and changed documents.
+
+ source directory
+ The directory which, including its subdirectories, contains all
+ source files for one Sphinx project.
+
diff --git a/doc/decorators/transform.rst b/doc/decorators/transform.rst
new file mode 100644
index 0000000..494674e
--- /dev/null
+++ b/doc/decorators/transform.rst
@@ -0,0 +1,176 @@
+.. include:: ../global.inc
+.. _decorators.transform:
+.. index::
+ pair: @transform; Syntax
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+########################
+ at transform
+########################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.transform.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.transform.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.transform.output_pattern`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.transform.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.transform.matching_formatter`_
+.. |suffix_string| replace:: `suffix_string`
+.. _suffix_string: `decorators.transform.suffix_string`_
+
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+*@transform* ( |tasks_or_file_names|_, :ref:`suffix<decorators.suffix>`\ *(*\ |suffix_string|_\ *)*\ | :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, |output_pattern|_, [|extra_parameters|_,...] )
+******************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+ **Purpose:**
+ Applies the task function to transform data from input to output files.
+
+ Output file names are specified from |tasks_or_file_names|_, i.e. from the output
+ of specified tasks, or a list of file names, or a |glob|_ matching pattern.
+
+ String replacement occurs either through suffix matches via :ref:`suffix<decorators.suffix>` or
+ the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Simple Example**
+
+ Transforms ``*.c`` to ``*.o``::
+
+ @transform(["1.c", "2.c"], suffix(".c"), ".o")
+ def compile(infile, outfile):
+ pass
+
+ Same example with a regular expression::
+
+ @transform(["1.c", "2.c"], regex(r".c$"), ".o")
+ def compile(infile, outfile):
+ pass
+
+ Both result in the following function calls:
+
+ ::
+
+ # 1.c -> 1.o
+ # 2.c -> 2.o
+ compile("1.c", "1.o")
+ compile("2.c", "2.o")
+
+
+ **Escaping regular expression patterns**
+
+ A string like ``universal.h`` in ``add_inputs`` will added *as is*.
+ ``r"\1.h"``, however, performs suffix substitution, with the special form ``r"\1"`` matching everything up to the suffix.
+ Remember to 'escape' ``r"\1"`` otherwise Ruffus will complain and throw an Exception to remind you.
+ The most convenient way is to use a python "raw" string.
+
+ **Parameters:**
+
+.. _decorators.transform.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.transform.suffix_string:
+
+ * *suffix_string*
+ must be wrapped in a :ref:`suffix<decorators.suffix>` indicator object.
+ The end of each input file name which matches ``suffix_string`` will be replaced by ``output_pattern``.
+
+ Input file names which do not match suffix_string will be ignored
+
+
+ The non-suffix part of the match can be referred to using the ``"\1"`` pattern. This
+ can be useful for putting the output in different directory, for example::
+
+
+ @transform(["1.c", "2.c"], suffix(".c"), r"my_path/\1.o")
+ def compile(infile, outfile):
+ pass
+
+ This results in the following function calls:
+
+ ::
+
+ # 1.c -> my_path/1.o
+ # 2.c -> my_path/2.o
+ compile("1.c", "my_path/1.o")
+ compile("2.c", "my_path/2.o")
+
+ For convenience and visual clarity, the ``"\1"`` can be omitted from the output parameter.
+ However, the ``"\1"`` is mandatory for string substitutions in additional parameters, ::
+
+
+ @transform(["1.c", "2.c"], suffix(".c"), [r"\1.o", ".o"], "Compiling \1", "verbatim")
+ def compile(infile, outfile):
+ pass
+
+ Results in the following function calls:
+
+ ::
+
+ compile("1.c", ["1.o", "1.o"], "Compiling 1", "verbatim")
+ compile("2.c", ["2.o", "2.o"], "Compiling 2", "verbatim")
+
+ Since r"\1" is optional for the output parameter, ``"\1.o"`` and ``".o"`` are equivalent.
+ However, strings in other parameters which do not contain r"\1" will be included verbatim, much
+ like the string ``"verbatim"`` in the above example.
+
+
+
+
+.. _decorators.transform.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>`\ indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+ Each output file name is created using regular expression substitution with ``output_pattern``
+
+.. _decorators.transform.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.transform.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.transform.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed to the task function.
+
+ If ``regex(matching_regex)`` or ``formatter(...)``` is used, then substitution
+ is first applied to (even nested) string parameters. Other data types are passed
+ verbatim.
+
+ For example::
+
+ @transform(["a.c", "b.c"], regex(r"(.*).c"), r"\1.o", r"\1")
+ def compile(infile, outfile):
+ pass
+
+ will result in the following function calls::
+
+ compile("a.c", "a.o", "a")
+ compile("b.c", "b.o", "b")
+
+
+
+
+See :ref:`here <decorators.transform_ex>` for more advanced uses of transform.
diff --git a/doc/decorators/transform_ex.rst b/doc/decorators/transform_ex.rst
new file mode 100644
index 0000000..2436696
--- /dev/null
+++ b/doc/decorators/transform_ex.rst
@@ -0,0 +1,190 @@
+.. include:: ../global.inc
+.. _decorators.transform_ex:
+.. index::
+ pair: @transform, inputs(...); Syntax
+ pair: @transform, add_inputs(...); Syntax
+
+
+.. seealso::
+
+ * :ref:`Decorators <decorators>` for more decorators
+
+####################################################
+ at transform with ``add_inputs`` and ``inputs``
+####################################################
+
+.. |tasks_or_file_names| replace:: `tasks_or_file_names`
+.. _tasks_or_file_names: `decorators.transform.tasks_or_file_names`_
+.. |extra_parameters| replace:: `extra_parameters`
+.. _extra_parameters: `decorators.transform.extra_parameters`_
+.. |output_pattern| replace:: `output_pattern`
+.. _output_pattern: `decorators.transform.output_pattern`_
+.. |input_pattern_or_glob| replace:: `input_pattern_or_glob`
+.. _input_pattern_or_glob: `decorators.transform.input_pattern_or_glob`_
+.. |matching_regex| replace:: `matching_regex`
+.. _matching_regex: `decorators.transform.matching_regex`_
+.. |matching_formatter| replace:: `matching_formatter`
+.. _matching_formatter: `decorators.transform.matching_formatter`_
+.. |suffix_string| replace:: `suffix_string`
+.. _suffix_string: `decorators.transform.suffix_string`_
+
+
+
+
+
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+*@transform* ( |tasks_or_file_names|_, :ref:`suffix<decorators.suffix>`\ *(*\ |suffix_string|_\ *)*\ | :ref:`regex<decorators.regex>`\ *(*\ |matching_regex|_\ *)* | :ref:`formatter<decorators.formatter>`\ *(*\ |matching_formatter|_\ *)*\, :ref:`inputs<decorators.inputs>` | :ref:`add_inputs<decorators.add_inputs>`\ *(*\ |input_pattern_or_glob|_\ *)*\ , |output_pattern|_, [|extra_parameters|_,...] )
+************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************ [...]
+ **Purpose:**
+ This variant of ``@transform`` allows additional inputs or dependencies to be added
+ dynamically to the task.
+
+ Output file names and strings in the extra parameters
+ are determined from |tasks_or_file_names|_, i.e. from the output
+ of up stream tasks, or a list of file names.
+
+ This variant of ``@transform`` allows input file names to be derived in the same way.
+
+ String replacement occurs either through suffix matches via :ref:`suffix<decorators.suffix>` or
+ the :ref:`formatter<decorators.formatter>` or :ref:`regex<decorators.regex>` indicators.
+
+ ``@collate`` groups together all **Input** which result in identical **Output** and **extra**
+ parameters.
+
+ It is a **many to fewer** operation.
+
+ :ref:`add_inputs<decorators.add_inputs>` nests the the original input parameters in a list before adding additional dependencies.
+
+ :ref:`inputs<decorators.inputs>` replaces the original input parameters wholescale.
+
+ Only out of date tasks (comparing input and output files) will be run
+
+ **Example of** :ref:`add_inputs<decorators.add_inputs>`
+
+ A common task in compiling C code is to include the corresponding header file for the source.
+
+ To compile ``*.c`` to ``*.o``, adding ``*.h`` and the common header ``universal.h``:
+ ::
+
+ @transform(["1.c", "2.c"], suffix(".c"), add_inputs([r"\1.h", "universal.h"]), ".o")
+ def compile(infile, outfile):
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.c", "1.h", "universal.h"], "1.o")
+ compile(["2.c", "2.h", "universal.h"], "2.o")
+
+ **Example of** :ref:`inputs<decorators.inputs>`
+
+ ``inputs(...)`` allows the original input parameters to be replaced wholescale.
+
+ This can be seen in the following example:
+ ::
+
+ @transform([ ["1.c", "A.c", 2]
+ ["2.c", "B.c", "C.c", 3]],
+ suffix(".c"), inputs([r"\1.py", "docs.rst"]), ".pyc")
+ def compile(infile, outfile):
+ pass
+
+ This will result in the following functional calls:
+ ::
+
+ compile(["1.py", "docs.rst"], "1.pyc")
+ compile(["2.py", "docs.rst"], "2.pyc")
+
+
+
+ **Parameters:**
+
+.. _decorators.transform.tasks_or_file_names:
+
+ * *tasks_or_file_names*
+ can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+.. _decorators.transform.suffix_string:
+
+ * *suffix_string*
+ must be wrapped in a :ref:`suffix<decorators.suffix>` indicator object.
+ The end of each file name which matches suffix_string will be replaced by `output_pattern`.
+ Thus::
+
+ @transform(["a.c", "b.c"], suffix(".c"), ".o")
+ def compile(infile, outfile):
+ pass
+
+ will result in the following function calls::
+
+ compile("a.c", "a.o")
+ compile("b.c", "b.o")
+
+ File names which do not match suffix_string will be ignored
+
+.. _decorators.transform.matching_regex:
+
+ * *matching_regex*
+ is a python regular expression string, which must be wrapped in
+ a :ref:`regex<decorators.regex>` indicator object
+ See python `regular expression (re) <http://docs.python.org/library/re.html>`_
+ documentation for details of regular expression syntax
+ Each output file name is created using regular expression substitution with ``output_pattern``
+
+.. _decorators.transform.matching_formatter:
+
+ * *matching_formatter*
+ a :ref:`formatter<decorators.formatter>` indicator object containing optionally
+ a python `regular expression (re) <http://docs.python.org/library/re.html>`_.
+
+.. _decorators.transform.input_pattern_or_glob:
+
+ * *input_pattern*
+ Specifies the resulting input(s) to each job.
+ Must be wrapped in an :ref:`inputs<decorators.inputs>` or an :ref:`inputs<decorators.add_inputs>` indicator object.
+
+ Can be a:
+
+ #. Task / list of tasks (as in the example above).
+ File names are taken from the output of the specified task(s)
+ #. (Nested) list of file name strings.
+ Strings will be subject to substitution.
+ File names containing ``*[]?`` will be expanded as a |glob|_.
+ E.g.:``"a.*" => "a.1", "a.2"``
+
+
+
+.. _decorators.transform.output_pattern:
+
+ * *output_pattern*
+ Specifies the resulting output file name(s).
+
+.. _decorators.transform.extra_parameters:
+
+ * [*extra_parameters, ...*]
+ Any extra parameters are passed to the task function.
+
+ If the ``regex(...)`` or ``formatter(...)`` parameter is used, then substitution
+ is first applied to (even nested) string parameters. Other data types are passed
+ verbatim.
+
+ For example::
+
+ @transform(["a.c", "b.c"], regex(r"(.*).c"), inputs(r"\1.c", r"\1.h", "universal.h"), r"\1.o", r"\1")
+ def compile(infiles, outfile, file_name_root):
+ # do something here
+ pass
+
+ will result in the following function calls::
+
+ compile(["1.c", "1.h", "universal.h"], "1.o", "1")
+ compile(["2.c", "2.h", "universal.h"], "2.o", "2")
+
+
+See :ref:`here <decorators.transform>` for more straightforward ways to use transform.
diff --git a/doc/design.rst b/doc/design.rst
new file mode 100644
index 0000000..2dd0b12
--- /dev/null
+++ b/doc/design.rst
@@ -0,0 +1,304 @@
+.. Design:
+
+.. include:: global.inc
+
+.. index::
+ pair: Design; Ruffus
+
+###############################
+Design & Architecture
+###############################
+
+ The *ruffus* module has the following design goals:
+
+ * Simplicity.
+ * Intuitive
+ * Lightweight
+ * Unintrusive
+ * Flexible/Powerful
+
+
+ Computational pipelines, especially in science, are best thought of in terms of data
+ flowing through successive, dependent stages (**ruffus** calls these :term:`task`\ s).
+ Traditionally, files have been used to
+ link pipelined stages together. This means that computational pipelines can be managed
+ using traditional software construction (`build`) systems.
+
+=================================================
+`GNU Make`
+=================================================
+ The grand-daddy of these is UNIX `make <http://en.wikipedia.org/wiki/Make_(software)>`_.
+ `GNU make <http://www.gnu.org/software/make/>`_ is ubiquitous in the linux world for
+ installing and compiling software.
+ It has been widely used to build computational pipelines because it supports:
+
+ * Stopping and restarting computational processes
+ * Running multiple, even thousands of jobs in parallel
+
+.. _design.make_syntax_ugly:
+
+******************************************************
+Deficiencies of `make` / `gmake`
+******************************************************
+
+ However, make and `GNU make <http://www.gnu.org/software/make/>`_ use a specialised (domain-specific)
+ language, which has is been much criticised because of poor support for modern
+ programming languages features, such as variable scope, pattern matching, debugging.
+ Make scripts require large amounts of often obscure shell scripting
+ and makefiles can quickly become unmaintainable.
+
+.. _design.scons_and_rake:
+
+=================================================
+`Scons`, `Rake` and other `Make` alternatives
+=================================================
+
+ Many attempts have been made to produce a more modern version of make, with less of its
+ historical baggage. These include the Java-based `Apache ant <http://ant.apache.org/>`_ which is specified in xml.
+
+ More interesting are a new breed of build systems whose scripts are written in modern programming
+ languages, rather than a specially-invented "build" specificiation syntax.
+ These include the Python `scons <http://www.scons.org/>`_, Ruby `rake <http://rake.rubyforge.org/>`_ and
+ its python port `Smithy <http://packages.python.org/Smithy/>`_.
+
+ The great advantages are that computation pipelines do not need to be artificially parcelled out
+ between (the often second-class) workflow management code, and the logic which does the real computation
+ in the pipeline. It also means that workflow management can use all the standard language and library
+ features, for example, to read in directories, match file names using regular expressions and so on.
+
+ **Ruffus** is much like scons in that the modern dynamic programming language python is used seamlessly
+ throughout its pipeline scripts.
+
+.. _design.implicit_dependencies:
+
+**************************************************************************
+Implicit dependencies: disadvantages of `make` / `scons` / `rake`
+**************************************************************************
+
+ Although Python `scons <http://www.scons.org/>`_ and Ruby `rake <http://rake.rubyforge.org/>`_
+ are in many ways more powerful and easier to use for building software, they are still an
+ imperfect fit to the world of computational pipelines.
+
+ This is a result of the way dependencies are specified, an essential part of their design inherited
+ from `GNU make <http://www.gnu.org/software/make/>`_.
+
+ The order of operations in all of these tools is specified in a *declarative* rather than
+ *imperative* manner. This means that the sequence of steps that a build should take are
+ not spelled out explicity and directly. Instead recipes are provided for turning input files
+ of each type to another.
+
+ So, for example, knowing that ``a->b``, ``b->c``, ``c->d``, the build
+ system can infer how to get from ``a`` to ``d`` by performing the necessary operations in the correct order.
+
+ This is immensely powerful for three reasons:
+ #) The plumbing, such as dependency checking, passing output
+ from one stage to another, are handled automatically by the build system. (This is the whole point!)
+ #) The same *recipe* can be re-used at different points in the build.
+ #) | Intermediate files do not need to be retained.
+ | Given the automatic inference that ``a->b->c->d``,
+ we don't need to keep ``b`` and ``c`` files around once ``d`` has been produced.
+ |
+
+
+ The disadvantage is that because stages are specified only indirectly, in terms of
+ file name matches, the flow through a complex build or a pipeline can be difficult to trace, and nigh
+ impossible to debug when there are problems.
+
+
+.. _design.explicit_dependencies_in_ruffus:
+
+**************************************************************************
+Explicit dependencies in `Ruffus`
+**************************************************************************
+
+ **Ruffus** takes a different approach. The order of operations is specified explicitly rather than inferred
+ indirectly from the input and output types. So, for example, we would explicitly specify three successive and
+ linked operations ``a->b``, ``b->c``, ``c->d``. The build system knows that the operations always proceed in
+ this order.
+
+ Looking at a **Ruffus** script, it is always clear immediately what is the succession of computational steps
+ which will be taken.
+
+ **Ruffus** values clarity over syntactic cleverness.
+
+.. _design.static_dependencies:
+
+**************************************************************************
+Static dependencies: What `make` / `scons` / `rake` can't do (easily)
+**************************************************************************
+
+ `GNU make <http://www.gnu.org/software/make/>`_, `scons <http://www.scons.org/>`_ and `rake <http://rake.rubyforge.org/>`_
+ work by infer a static dependency (diacyclic) graph between all the files which
+ are used by a computational pipeline. These tools locate the target that they are supposed
+ to build and work backward through the dependency graph from that target,
+ rebuilding anything that is out of date.This is perfect for building software,
+ where the list of files data files can be computed **statically** at the beginning of the build.
+
+ This is not ideal matches for scientific computational pipelines because:
+
+ * | Though the *stages* of a pipeline (i.e. `compile` or `DNA alignment`) are
+ invariably well-specified in advance, the number of
+ operations (*job*\s) involved at each stage may not be.
+ |
+
+ * | A common approach is to break up large data sets into manageable chunks which
+ can be operated on in parallel in computational clusters or farms
+ (See `embarassingly parallel problems <http://en.wikipedia.org/wiki/Embarrassingly_parallel>`_).
+ | This means that the number of parallel operations or jobs varies with the data (the number of manageable chunks),
+ and dependency trees cannot be calculated statically beforehand.
+ |
+
+ Computational pipelines require **dynamic** dependencies which are not calculated up-front, but
+ at each stage of the pipeline
+
+ This is a *known* issue with traditional build systems each of which has partial strategies to work around
+ this problem:
+
+ * gmake always builds the dependencies when first invoked, so dynamic dependencies require (complex!) recursive calls to gmake
+ * `Rake dependencies unknown prior to running tasks <http://objectmix.com/ruby/759716-rake-dependencies-unknown-prior-running-tasks-2.html>`_.
+ * `Scons: Using a Source Generator to Add Targets Dynamically <http://www.scons.org/wiki/DynamicSourceGenerator>`_
+
+
+ **Ruffus** explicitly and straightforwardly handles tasks which produce an indeterminate (i.e. runtime dependent)
+ number of output, using its **@split**, **@transform**, **merge** function annotations.
+
+=============================================================================
+Managing pipelines stage-by-stage using **Ruffus**
+=============================================================================
+ **Ruffus** manages pipeline stages directly.
+
+ #) | The computational operations for each stage of the pipeline are written by you, in
+ separate python functions.
+ | (These correspond to `gmake pattern rules <http://www.gnu.org/software/make/manual/make.html#Pattern-Rules>`_)
+ |
+
+ #) | The dependencies between pipeline stages (python functions) are specified up-front.
+ | These can be displayed as a flow chart.
+
+ .. image:: images/front_page_flowchart.png
+
+ #) **Ruffus** makes sure pipeline stage functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if necessary.
+
+ #) Data file timestamps can be used to automatically determine if all or any parts
+ of the pipeline are out-of-date and need to be rerun.
+
+ #) Separate pipeline stages, and operations within each pipeline stage,
+ can be run in parallel provided they are not inter-dependent.
+
+ Another way of looking at this is that **ruffus** re-constructs datafile dependencies dynamically
+ on-the-fly when it gets to each stage of the pipeline, giving much more flexibility.
+
+**************************************************************************
+Disadvantages of the Ruffus design
+**************************************************************************
+ Are there any disadvantages to this trade-off for additional clarity?
+
+ #) Each pipeline stage needs to take the right input and output. For example if we specified the
+ steps in the wrong order: ``a->b``, ``c->d``, ``b->c``, then no useful output would be produced.
+ #) We cannot re-use the same recipes in different parts of the pipeline
+ #) Intermediate files need to be retained.
+
+
+ In our experience, it is always obvious when pipeline operations are in the wrong order, precisely because the
+ order of computation is the very essense of the design of each pipeline. Ruffus produces extra diagnostics when
+ no output is created in a pipeline stage (usually happens for incorrectly specified regular expressions.)
+
+ Re-use of recipes is as simple as an extra call to common function code.
+
+ Finally, some users have proposed future enhancements to **Ruffus** to handle unnecessary temporary / intermediate files.
+
+
+.. index::
+ pair: Design; Comparison of Ruffus with alternatives
+
+=================================================
+Alternatives to **Ruffus**
+=================================================
+
+ A comparison of more make-like tools is available from `Ian Holmes' group <http://biowiki.org/MakeComparison>`_.
+
+ Build systems include:
+
+ * `GNU make <http://www.gnu.org/software/make/>`_
+ * `scons <http://www.scons.org/>`_
+ * `ant <http://ant.apache.org/>`_
+ * `rake <http://rake.rubyforge.org/>`_
+
+ There are also complete workload managements systems such as Condor.
+ Various bioinformatics pipelines are also available, including that used by the
+ leading genome annotation website Ensembl, Pegasys, GPIPE, Taverna, Wildfire, MOWserv,
+ Triana, Cyrille2 etc. These all are either hardwired to specific databases, and tasks,
+ or have steep learning curves for both the scientist/developer and the IT system
+ administrators.
+
+ **Ruffus** is designed to be lightweight and unintrusive enough to use for writing pipelines
+ with just 10 lines of code.
+
+
+.. seealso::
+
+
+ **Bioinformatics workload managements systems**
+
+ Condor:
+ http://www.cs.wisc.edu/condor/description.html
+
+ Ensembl Analysis pipeline:
+ http://www.ncbi.nlm.nih.gov/pubmed/15123589
+
+
+ Pegasys:
+ http://www.ncbi.nlm.nih.gov/pubmed/15096276
+
+ GPIPE:
+ http://www.biomedcentral.com/pubmed/15096276
+
+ Taverna:
+ http://www.ncbi.nlm.nih.gov/pubmed/15201187
+
+ Wildfire:
+ http://www.biomedcentral.com/pubmed/15788106
+
+ MOWserv:
+ http://www.biomedcentral.com/pubmed/16257987
+
+ Triana:
+ http://dx.doi.org/10.1007/s10723-005-9007-3
+
+ Cyrille2:
+ http://www.biomedcentral.com/1471-2105/9/96
+
+
+.. index::
+ single: Acknowledgements
+
+**************************************************
+Acknowledgements
+**************************************************
+ * Bruce Eckel's insightful article on
+ `A Decorator Based Build System <http://www.artima.com/weblogs/viewpost.jsp?thread=241209>`_
+ was the obvious inspiration for the use of decorators in *Ruffus*.
+
+ The rest of the *Ruffus* takes uses a different approach. In particular:
+ #. *Ruffus* uses task-based not file-based dependencies
+ #. *Ruffus* tries to have minimal impact on the functions it decorates.
+
+ Bruce Eckel's design wraps functions in "rule" objects.
+
+ *Ruffus* tasks are added as attributes of the functions which can be still be
+ called normally. This is how *Ruffus* decorators can be layered in any order
+ onto the same task.
+
+ * Languages like c++ and Java would probably use a "mixin" approach.
+ Python's easy support for reflection and function references,
+ as well as the necessity of marshalling over process boundaries, dictated the
+ internal architecture of *Ruffus*.
+ * The `Boost Graph library <http://www.boost.org>`_ for text book implementations of directed
+ graph traversals.
+ * `Graphviz <http://www.graphviz.org/>`_. Just works. Wonderful.
+ * Andreas Heger, Christoffer Nellåker and Grant Belgard for driving Ruffus towards
+ ever simpler syntax.
+
+
+
diff --git a/doc/drmaa_wrapper_functions.rst b/doc/drmaa_wrapper_functions.rst
new file mode 100644
index 0000000..5ad9ddc
--- /dev/null
+++ b/doc/drmaa_wrapper_functions.rst
@@ -0,0 +1,234 @@
+.. include:: global.inc
+.. _drmaa_functions:
+
+.. comments: function name
+
+.. |run_job| replace:: `drmaa_wrapper.run_job`
+.. _run_job: `drmaa_wrapper.run_job`_
+
+.. comments: parameters
+
+.. |dw_cmd_str| replace:: `cmd_str`
+.. _dw_cmd_str: `drmaa_wrapper.run_job.cmd_str`_
+
+.. |dw_job_script_directory| replace:: `job_script_directory`
+.. _dw_job_script_directory: `drmaa_wrapper.run_job.job_script_directory`_
+.. |dw_job_environment| replace:: `job_environment`
+.. _dw_job_environment: `drmaa_wrapper.run_job.job_environment`_
+.. |dw_working_directory| replace:: `working_directory`
+.. _dw_working_directory: `drmaa_wrapper.run_job.working_directory`_
+.. |dw_retain_job_scripts| replace:: `retain_job_scripts`
+.. _dw_retain_job_scripts: `drmaa_wrapper.run_job.retain_job_scripts`_
+.. |dw_job_name| replace:: `job_name`
+.. _dw_job_name: `drmaa_wrapper.run_job.job_name`_
+.. |dw_job_other_options| replace:: `job_other_options`
+.. _dw_job_other_options: `drmaa_wrapper.run_job.job_other_options`_
+.. |dw_logger| replace:: `logger`
+.. _dw_logger: `drmaa_wrapper.run_job.logger`_
+.. |dw_drmaa_session| replace:: `drmaa_session`
+.. _dw_drmaa_session: `drmaa_wrapper.run_job.drmaa_session`_
+.. |dw_run_locally| replace:: `run_locally`
+.. _dw_run_locally: `drmaa_wrapper.run_job.run_locally`_
+.. |dw_output_files| replace:: `output_files`
+.. _dw_output_files: `drmaa_wrapper.run_job.output_files`_
+.. |dw_touch_only| replace:: `touch_only`
+.. _dw_touch_only: `drmaa_wrapper.run_job.touch_only`_
+
+
+################################################
+drmaa functions
+################################################
+
+ ``drmaa_wrapper`` is not exported automatically by ruffus and must be specified explicitly:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+
+ # imported ruffus.drmaa_wrapper explicitly
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+
+.. _drmaa_wrapper.run_job:
+
+.. index::
+ single: drmaa ; run_job
+ pair: run_job; Run drmaa
+
+
+************************************************************************************************************************************************************************************************************************************************************************************
+*run_job*
+************************************************************************************************************************************************************************************************************************************************************************************
+**run_job** (|dw_cmd_str|_, |dw_job_name|_ = None, |dw_job_other_options|_ = None, |dw_job_script_directory|_ = None, |dw_job_environment|_ = None, |dw_working_directory|_ = None, |dw_logger|_ = None, |dw_drmaa_session|_ = None, |dw_retain_job_scripts|_ = False, |dw_run_locally|_ = False, |dw_output_files|_ = None, |dw_touch_only|_ = False)
+
+ **Purpose:**
+
+ ``ruffus.drmaa_wrapper.run_job`` dispatches a command with arguments to a cluster or Grid Engine node and waits for the command to complete.
+
+ It is the semantic equivalent of calling `os.system <http://docs.python.org/2/library/os.html#os.system>`__ or
+ `subprocess.check_output <http://docs.python.org/2/library/subprocess.html#subprocess.check_call>`__.
+
+ **Example**:
+
+ .. code-block:: python
+
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+ import drmaa
+ my_drmaa_session = drmaa.Session()
+ my_drmaa_session.initialize()
+
+ run_job("ls",
+ job_name = "test",
+ job_other_options="-P mott-flint.prja -q short.qa",
+ job_script_directory = "test_dir",
+ job_environment={ 'BASH_ENV' : '~/.bashrc' },
+ retain_job_scripts = True, drmaa_session=my_drmaa_session)
+ run_job("ls",
+ job_name = "test",
+ job_other_options="-P mott-flint.prja -q short.qa",
+ job_script_directory = "test_dir",
+ job_environment={ 'BASH_ENV' : '~/.bashrc' },
+ retain_job_scripts = True,
+ drmaa_session=my_drmaa_session,
+ working_directory = "/gpfs1/well/mott-flint/lg/src/oss/ruffus/doc")
+
+ #
+ # catch exceptions
+ #
+ try:
+ stdout_res, stderr_res = run_job(cmd,
+ job_name = job_name,
+ logger = logger,
+ drmaa_session = drmaa_session,
+ run_locally = options.local_run,
+ job_other_options = get_queue_name())
+
+ # relay all the stdout, stderr, drmaa output to diagnose failures
+ except error_drmaa_job as err:
+ raise Exception("\n".join(map(str,
+ ["Failed to run:",
+ cmd,
+ err,
+ stdout_res,
+ stderr_res])))
+
+ my_drmaa_session.exit()
+
+
+
+ **Parameters:**
+
+.. _drmaa_wrapper.run_job.cmd_str:
+
+ * *cmd_str*
+
+ The command which will be run remotely including all parameters
+
+.. _drmaa_wrapper.run_job.job_name:
+
+ * *job_name*
+
+ A descriptive name for the command. This will be displayed by `SGE qstat <http://gridscheduler.sourceforge.net/htmlman/htmlman1/qstat.html>`__, for example.
+ Defaults to "ruffus_job"
+
+.. _drmaa_wrapper.run_job.job_other_options:
+
+ * *job_other_options*
+
+ Other drmaa parameters can be passed verbatim as a string.
+
+ Examples for SGE include project name (``-P project_name``), parallel environment (``-pe parallel_environ``), account (``-A account_string``), resource (``-l resource=expression``),
+ queue name (``-q a_queue_name``), queue priority (``-p 15``).
+
+ These are parameters which you normally need to include when submitting jobs interactively, for example via
+ `SGE qsub <http://gridscheduler.sourceforge.net/htmlman/htmlman1/qsub.html>`__
+ or `SLURM <http://apps.man.poznan.pl/trac/slurm-drmaa/wiki/WikiStart#Nativespecification>`__ (`srun <https://computing.llnl.gov/linux/slurm/srun.html>`__)
+
+.. _drmaa_wrapper.run_job.job_script_directory:
+
+ * *job_script_directory*
+
+ The directory where drmaa temporary script files will be found. Defaults to the current working directory.
+
+
+.. _drmaa_wrapper.run_job.job_environment:
+
+ * *job_environment*
+
+ A dictionary of key / values with environment variables. E.g. ``"{'BASH_ENV': '~/.bashrc'}"``
+
+
+.. _drmaa_wrapper.run_job.working_directory:
+
+ * *working_directory*
+
+ * Sets the working directory.
+ * Should be a fully qualified path.
+ * Defaults to the current working directory.
+
+
+.. _drmaa_wrapper.run_job.retain_job_scripts:
+
+ * *retain_job_scripts*
+
+ Do not delete temporary script files containg drmaa commands. Useful for
+ debugging, running on the command line directly, and can provide a useful record of the commands.
+
+.. _drmaa_wrapper.run_job.logger:
+
+ * *logger*
+
+ For logging messages indicating the progress of the pipeline in terms of tasks and jobs. Takes objects with the standard python
+ `logging <https://docs.python.org/2/library/logging.html>`__ module interface.
+
+.. _drmaa_wrapper.run_job.drmaa_session:
+
+ * *drmaa_session*
+
+ A shared drmaa session created and managed separately.
+
+ In the main part of your **Ruffus** pipeline script somewhere there should be code looking like this:
+
+ .. code-block:: python
+
+ #
+ # start shared drmaa session for all jobs / tasks in pipeline
+ #
+ import drmaa
+ drmaa_session = drmaa.Session()
+ drmaa_session.initialize()
+
+
+ #
+ # pipeline functions
+ #
+
+ if __name__ == '__main__':
+ cmdline.run (options, multithread = options.jobs)
+ drmaa_session.exit()
+
+.. _drmaa_wrapper.run_job.run_locally:
+
+ * *run_locally*
+
+ Runs commands locally using the standard python `subprocess <https://docs.python.org/2/library/subprocess.html>`__ module
+ rather than dispatching remotely. This allows scripts to be debugged easily
+
+.. _drmaa_wrapper.run_job.touch_only:
+
+ * *touch_only*
+
+ Create or update :ref:`Output files <drmaa_wrapper.run_job.output_files>`
+ only to simulate the running of the pipeline.
+ Does not dispatch commands remotely or locally. This is most useful to force a
+ pipeline to acknowledge that a particular part is now up-to-date.
+
+ See also: :ref:`pipeline_run(touch_files_only=True) <pipeline_functions.pipeline_run.touch_files_only>`
+
+
+.. _drmaa_wrapper.run_job.output_files:
+
+ * *output_files*
+
+ Output files which will be created or updated if :ref:`touch_only <drmaa_wrapper.run_job.touch_only>` ``=True``
+
+
diff --git a/doc/examples/bioinformatics/index.rst b/doc/examples/bioinformatics/index.rst
new file mode 100644
index 0000000..fe17943
--- /dev/null
+++ b/doc/examples/bioinformatics/index.rst
@@ -0,0 +1,290 @@
+.. _examples_bioinformatics_part1:
+
+
+
+###################################################################
+Construction of a simple pipeline to run BLAST jobs
+###################################################################
+
+============
+Overview
+============
+
+ This is a simple example to illustrate the convenience **Ruffus**
+ brings to simple tasks in bioinformatics.
+
+ 1. **Split** a problem into multiple fragments that can be
+ 2. **Run in parallel** giving partial solutions that can be
+ 3. **Recombined** into the complete solution.
+
+ The example code runs a `ncbi <http://blast.ncbi.nlm.nih.gov/>`__
+ `blast <http://en.wikipedia.org/wiki/BLAST>`__ search for four sequences
+ against the human `refseq <http://en.wikipedia.org/wiki/RefSeq>`_ protein sequence database.
+
+ #. **Split** each of the four sequences into a separate file.
+ #. **Run in parallel** Blastall on each sequence file
+ #. **Recombine** the BLAST results by simple concatenation.
+
+
+ In real life,
+
+ * `BLAST <http://blast.ncbi.nlm.nih.gov/>`__ already provides support for multiprocessing
+ * Sequence files would be split in much larger chunks, with many sequences
+ * The jobs would be submitted to large computational farms (in our case, using the SunGrid Engine).
+ * The High Scoring Pairs (HSPs) would be parsed / filtered / stored in your own formats.
+
+
+
+
+ .. note::
+
+ This bioinformatics example is intended to showcase *some* of the features of Ruffus.
+
+ #. See the :ref:`manual <new_manual.introduction>` to learn about the various features in Ruffus.
+
+
+========================
+Prerequisites
+========================
+
+-------------
+1. Ruffus
+-------------
+ To install Ruffus on most systems with python installed:
+
+ ::
+
+ easy_install -U ruffus
+
+ Otherwise, `download <http://code.google.com/p/ruffus/downloads/list>`_ Ruffus and run:
+
+ ::
+
+ tar -xvzf ruffus-xxx.tar.gz
+ cd ruffus-xxx
+ ./setup install
+
+ where xxx is the latest Ruffus version.
+
+
+-------------
+2. BLAST
+-------------
+ This example assumes that the `BLAST <http://blast.ncbi.nlm.nih.gov/>`__ ``blastall`` and ``formatdb`` executables are
+ installed and on the search path. Otherwise download from `here <http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download>`_.
+
+
+---------------------------------------
+3. human refseq sequence database
+---------------------------------------
+
+ We also need to download the human refseq sequence file and format the ncbi database:
+
+ ::
+
+ wget ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz
+ gunzip human.protein.faa.gz
+
+ formatdb -i human.protein.faa
+
+---------------------------------------
+4. test sequences
+---------------------------------------
+ Query sequences in FASTA format can be found in `original.fa <../../_static/examples/bioinformatics/original.fa>`_
+
+
+=========================
+Code
+=========================
+ The code for this example can be found :ref:`here <examples_bioinformatics_part1_code>` and
+ pasted into the python command shell.
+
+
+================================================
+Step 1. Splitting up the query sequences
+================================================
+
+ We want each of our sequences in the query file `original.fa <../../_static/examples/bioinformatics/original.fa>`_ to be placed
+ in a separate files named ``XXX.segment`` where ``XXX`` = 1 -> the number of sequences.
+
+ ::
+
+ current_file_index = 0
+ for line in open("original.fa"):
+ # start a new file for each accession line
+ if line[0] == '>':
+ current_file_index += 1
+ current_file = open("%d.segment" % current_file_index, "w")
+ current_file.write(line)
+
+
+
+ To use this in a pipeline, we only need to wrap this in a function, "decorated" with the Ruffus
+ keyword :ref:`@split <new_manual.split>`:
+
+
+
+ .. image:: ../../images/examples_bioinformatics_split.jpg
+
+
+ | This indicates that we are splitting up the input file `original.fa <../../_static/examples/bioinformatics/original.fa>`_ into however many
+ ``*.segment`` files as it takes.
+ | The pipelined function itself takes two arguments, for the input and output.
+
+ We shall see later this simple :ref:`@split <new_manual.split>` decorator already gives all the benefits of:
+
+ * Dependency checking
+ * Flowchart printing
+
+================================================
+Step 2. Run BLAST jobs in parallel
+================================================
+
+ Assuming that blast is already installed, sequence matches can be found with this python
+ code:
+
+ ::
+
+ os.system("blastall -p blastp -d human.protein.faa -i 1.segment > 1.blastResult")
+
+ To pipeline this, we need to simply wrap in a function, decorated with the **Ruffus**
+ keyword :ref:`@transform <new_manual.transform>`.
+
+ .. image:: ../../images/examples_bioinformatics_transform.jpg
+
+ This indicates that we are taking all the output files from the previous ``splitFasta``
+ operation (``*.segment``) and :ref:`@transform <new_manual.transform>`-ing each to a new file with the ``.blastResult``
+ suffix. Each of these transformation operations can run in parallel if specified.
+
+
+================================================
+Step 3. Combining BLAST results
+================================================
+
+ The following python code will concatenate the results together
+ ::
+
+ output_file = open("final.blast_results", "w")
+ for i in glob("*.blastResults"):
+ output_file.write(open(i).read())
+
+
+
+ To pipeline this, we need again to decorate with the **Ruffus** keyword :ref:`@merge <new_manual.merge>`.
+
+ .. image:: ../../images/examples_bioinformatics_merge.jpg
+
+ This indicates that we are taking all the output files from the previous ``runBlast``
+ operation (``*.blastResults``) and :ref:`@merge <new_manual.merge>`-ing them to the new file ``final.blast_results``.
+
+
+================================================
+Step 4. Running the pipeline
+================================================
+
+ We can run the completed pipeline using a maximum of 4 parallel processes by calling
+ :ref:`pipeline_run <pipeline_functions.pipeline_run>` :
+
+ ::
+
+ pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
+
+ Though we have only asked Ruffus to run ``combineBlastResults``, it traces all the dependencies
+ of this task and runs all the necessary parts of the pipeline.
+
+
+ .. note ::
+
+ The full code for this example can be found :ref:`here <examples_bioinformatics_part1_code>`
+ suitable for pasting into the python command shell.
+
+ The ``verbose`` parameter causes the following output to be printed to stderr as the pipeline
+ runs:
+
+ ::
+
+ >>> pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+ Job = [original.fa -> *.segment] completed
+ Completed Task = splitFasta
+ Job = [1.segment -> 1.blastResult] completed
+ Job = [3.segment -> 3.blastResult] completed
+ Job = [2.segment -> 2.blastResult] completed
+ Job = [4.segment -> 4.blastResult] completed
+ Completed Task = runBlast
+ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] -> final.blast_results] completed
+ Completed Task = combineBlastResults
+
+
+================================================
+Step 5. Testing dependencies
+================================================
+
+ If we invoked :ref:`pipeline_run <pipeline_functions.pipeline_run>` again, nothing
+ further would happen because the
+ pipeline is now up-to-date. But what if the pipeline had not run to completion?
+
+ We can simulate the failure of one of the ``blastall`` jobs by deleting its results:
+
+ ::
+
+ os.unlink("4.blastResult")
+
+ Let us use the :ref:`pipeline_printout <pipeline_functions.pipeline_printout>`
+ function to print out the dependencies of the pipeline at a high ``verbose`` level which
+ will show both complete and incomplete jobs:
+
+ ::
+
+ >>> import sys
+ >>> pipeline_printout(sys.stdout, [combineBlastResults], verbose = 4)
+
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = splitFasta
+ "Split sequence file into as many fragments as appropriate depending on the size of
+ original_fasta"
+
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = runBlast
+ "Run blast"
+ Job = [4.segment
+ ->4.blastResult]
+ Job needs update: Missing file 4.blastResult
+
+ Task = combineBlastResults
+ "Combine blast results"
+ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult]
+ ->final.blast_results]
+ Job needs update: Missing file 4.blastResult
+
+ ________________________________________
+
+ Only the parts of the pipeline which involve the missing BLAST result will be rerun.
+ We can confirm this by invoking the pipeline.
+
+ ::
+
+ >>> pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
+ Job = [1.segment -> 1.blastResult] unnecessary: already up to date
+ Job = [2.segment -> 2.blastResult] unnecessary: already up to date
+ Job = [3.segment -> 3.blastResult] unnecessary: already up to date
+ Job = [4.segment -> 4.blastResult] completed
+ Completed Task = runBlast
+ Job = [[1.blastResult, 2.blastResult, 3.blastResult, 4.blastResult] -> final.blast_results] completed
+ Completed Task = combineBlastResults
+
+================================================
+What is next?
+================================================
+
+
+ In the :ref:`next (short) part <examples_bioinformatics_part2>`,
+ we shall add some standard (boilerplate) code to
+ turn this BLAST pipeline into a (slightly more) useful python program.
+
diff --git a/doc/examples/bioinformatics/part1_code.rst b/doc/examples/bioinformatics/part1_code.rst
new file mode 100644
index 0000000..c02232f
--- /dev/null
+++ b/doc/examples/bioinformatics/part1_code.rst
@@ -0,0 +1,70 @@
+.. include:: ../../global.inc
+.. _examples_bioinformatics_part1_code:
+
+
+###################################################################
+Ruffus code
+###################################################################
+
+::
+
+ import os, sys
+
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..","..")))
+
+ from ruffus import *
+
+
+ original_fasta = "original.fa"
+ database_file = "human.protein.faa"
+
+ @split(original_fasta, "*.segment")
+ def splitFasta (seqFile, segments):
+ """Split sequence file into
+ as many fragments as appropriate
+ depending on the size of original_fasta"""
+ current_file_index = 0
+ for line in open(original_fasta):
+ #
+ # start a new file for each accession line
+ #
+ if line[0] == '>':
+ current_file_index += 1
+ current_file = open("%d.segment" % current_file_index, "w")
+ current_file.write(line)
+
+
+
+ @transform(splitFasta, suffix(".segment"), ".blastResult")
+ def runBlast(seqFile, blastResultFile):
+ """Run blast"""
+ os.system("blastall -p blastp -d %s -i %s > %s" %
+ (database_file, seqFile, blastResultFile))
+
+
+ @merge(runBlast, "final.blast_results")
+ def combineBlastResults (blastResultFiles, combinedBlastResultFile):
+ """Combine blast results"""
+ output_file = open(combinedBlastResultFile, "w")
+ for i in blastResultFiles:
+ output_file.write(open(i).read())
+
+
+ pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
+
+ #
+ # Simulate interuption of the pipeline by
+ # deleting the output of one of the BLAST jobs
+ #
+ os.unlink("4.blastResult")
+
+ pipeline_printout(sys.stdout, [combineBlastResults], verbose = 4)
+
+
+ #
+ # Re-running the pipeline
+ #
+ pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
+
diff --git a/doc/examples/bioinformatics/part2.rst b/doc/examples/bioinformatics/part2.rst
new file mode 100644
index 0000000..30e1343
--- /dev/null
+++ b/doc/examples/bioinformatics/part2.rst
@@ -0,0 +1,152 @@
+.. _examples_bioinformatics_part2:
+
+
+###################################################################
+Part 2: A slightly more practical pipeline to run blasts jobs
+###################################################################
+
+============
+Overview
+============
+
+ :ref:`Previously <examples_bioinformatics_part1>`, we had built
+ a simple pipeline to split up a FASTA file of query sequences so
+ that these can be matched against a sequence database in parallel.
+
+ We shall wrap this code so that
+
+ * It is more robust to interruptions
+ * We can specify the file names on the command line
+
+==================================================================
+Step 1. Cleaning up any leftover junk from previous pipeline runs
+==================================================================
+
+ | We split up each of our sequences in the query file `original.fa <../../_static/examples/bioinformatics/original.fa>`_
+ into a separate files named ``XXX.segment`` where ``XXX`` is the number of sequences in
+ the FASTA file.
+
+ | However, if we start with 6 sequences (giving ``1.segment`` ... ``6.segment``), and we
+ then edited `original.fa <../../_static/examples/bioinformatics/original.fa>`_
+ so that only 5 were left, the file ``6.segment`` would still be left
+ hanging around as an unwanted, extraneous and confusing orphan.
+
+ As a general rule, it is a good idea to clean up the results of a previous run in
+ a :ref:`@split <manual.split>` operation:
+
+ ::
+
+ @split("original.fa", "*.segment")
+ def splitFasta (seqFile, segments):
+
+ #
+ # Clean up any segment files from previous runs before creating new one
+ #
+ for i in glob.glob("*.segment"):
+ os.unlink(i)
+
+ # code as before...
+
+.. _examples_bioinformatics_part2.step2:
+
+===============================================================
+Step 2. Adding a "flag" file to mark successful completion
+===============================================================
+
+ When pipelined tasks are interrupted half way through an operation, the output may
+ only contain part of the results in an incomplete or inconsistent state.
+ There are three general options to deal with this:
+
+ #. Catch any interrupting conditions and delete the incomplete output
+ #. Tag successfully completed output with a special marker at the end of the file
+ #. Create an empty "flag" file whose only point is to signal success
+
+ Option (3) is the most reliable way and involves the least amount of work in Ruffus.
+ We add flag files with the suffix ``.blastSuccess`` for our parallel BLAST jobs:
+
+ ::
+
+ @transform(splitFasta, suffix(".segment"), [".blastResult", ".blastSuccess"])
+ def runBlast(seqFile, output_files):
+
+ blastResultFile, flag_file = output_files
+
+ #
+ # Existing code unchanged
+ #
+ os.system("blastall -p blastp -d human.protein.faa "+
+ "-i %s > %s" % (seqFile, blastResultFile))
+
+ #
+ # "touch" flag file to indicate success
+ #
+ open(flag_file, "w")
+
+
+==============================================================
+Step 3. Allowing the script to be invoked on the command line
+==============================================================
+
+ We allow the query sequence file, as well as the sequence database and end results
+ to be specified at runtime using the standard python `optparse <http://docs.python.org/library/optparse.html>`_ module.
+ We find this approach to run time arguments generally useful for many Ruffus scripts.
+ The full code can be :ref:`viewed here <examples_bioinformatics_part2_code>` and
+ `downloaded from run_parallel_blast.py <../../_static/examples/bioinformatics/run_parallel_blast.py>`_.
+
+ The different options can be inspected by running the script with the ``--help`` or ``-h``
+ argument.
+
+ The following options are useful for developing Ruffus scripts:
+
+ ::
+
+ --verbose | -v : Print more detailed messages for each additional verbose level.
+ E.g. run_parallel_blast --verbose --verbose --verbose ... (or -vvv)
+
+ --jobs | -j : Specifies the number of jobs (operations) to run in parallel.
+
+ --flowchart FILE : Print flowchart of the pipeline to FILE. Flowchart format
+ depends on extension. Alternatives include (".dot", ".jpg",
+ "*.svg", "*.png" etc). Formats other than ".dot" require
+ the dot program to be installed (http://www.graphviz.org/).
+
+ --just_print | -n Only print a trace (description) of the pipeline.
+ The level of detail is set by --verbose.
+
+
+============================================================
+Step 4. Printing out a flowchart for the pipeline
+============================================================
+ The ``--flowchart`` argument results in a call to ``pipeline_printout_graph(...)``
+ This prints out a flowchart of the pipeline. Valid formats include ".dot", ".jpg", ".svg", ".png"
+ but all except for the first require the ``dot`` program to be installed
+ (http://www.graphviz.org/).
+
+ The state of the pipeline is reflected in the flowchart:
+
+ .. image:: ../../images/examples_bioinformatics_pipeline.jpg
+
+
+============================================================
+Step 5. Errors
+============================================================
+ Because Ruffus scripts are just normal python functions, you can debug them using
+ your usual tools, or jump to the offending line(s) even when the pipeline is running in
+ parallel.
+
+ For example, these are the what the error messages would look like if we had mis-spelt ``blastal``.
+ In :ref:`run_parallel_blast.py <examples_bioinformatics_part2_code>`,
+ python exceptions are raised if the ``blastall`` command fails.
+
+ Each of the exceptions for the parallel operations are printed out with the
+ offending lines (line 204), and problems (``blastal`` not found)
+ highlighted in red.
+
+ .. image:: ../../images/examples_bioinformatics_error.png
+
+============================================================
+Step 6. Will it run?
+============================================================
+ The full code can be :ref:`viewed here <examples_bioinformatics_part2_code>` and
+ `downloaded from run_parallel_blast.py <../../_static/examples/bioinformatics/run_parallel_blast.py>`_.
+
diff --git a/doc/examples/bioinformatics/part2_code.rst b/doc/examples/bioinformatics/part2_code.rst
new file mode 100644
index 0000000..93b628d
--- /dev/null
+++ b/doc/examples/bioinformatics/part2_code.rst
@@ -0,0 +1,267 @@
+.. include:: ../../global.inc
+.. _examples_bioinformatics_part2_code:
+
+
+###################################################################
+Ruffus code
+###################################################################
+
+::
+
+ #!/usr/bin/env python
+ """
+
+ run_parallel_blast.py
+ [--log_file PATH]
+ [--quiet]
+
+ """
+
+ ################################################################################
+ #
+ # run_parallel_blast
+ #
+ #
+ # Copyright (c) 4/21/2010 Leo Goodstadt
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
+ # of this software and associated documentation files (the "Software"), to deal
+ # in the Software without restriction, including without limitation the rights
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ # copies of the Software, and to permit persons to whom the Software is
+ # furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice shall be included in
+ # all copies or substantial portions of the Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ # THE SOFTWARE.
+ #################################################################################
+ import os, sys
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # options
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ from optparse import OptionParser
+ import sys, os
+
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %prog --input_file QUERY_FASTA --database_file FASTA_DATABASE [more_options]")
+ parser.add_option("-i", "--input_file", dest="input_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of query sequence file in FASTA format. ")
+ parser.add_option("-d", "--database_file", dest="database_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of FASTA database to search. ")
+ parser.add_option("--result_file", dest="result_file",
+ metavar="FILE",
+ type="string",
+ default="final.blast_results",
+ help="Name and path of where the files should end up. ")
+ parser.add_option("-t", "--temp_directory", dest="temp_directory",
+ metavar="PATH",
+ type="string",
+ default="tmp",
+ help="Name and path of temporary directory where calculations "
+ "should take place. ")
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more detailed messages for each additional verbose level."
+ " E.g. run_parallel_blast --verbose --verbose --verbose ... (or -vvv)")
+
+ #
+ # pipeline
+ #
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (operations) to run in parallel.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Print flowchart of the pipeline to FILE. Flowchart format "
+ "depends on extension. Alternatives include ('.dot', '.jpg', "
+ "'*.svg', '*.png' etc). Formats other than '.dot' require "
+ "the dot program to be installed (http://www.graphviz.org/).")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Only print a trace (description) of the pipeline. "
+ " The level of detail is set by --verbose.")
+
+ (options, remaining_args) = parser.parse_args()
+
+
+ if not options.flowchart:
+ if not options.database_file:
+ parser.error("\n\n\tMissing parameter --database_file FILE\n\n")
+ if not options.input_file:
+ parser.error("\n\n\tMissing parameter --input_file FILE\n\n")
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ from ruffus import *
+ import subprocess
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Functions
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ def run_cmd(cmd_str):
+ """
+ Throw exception if run command fails
+ """
+ process = subprocess.Popen(cmd_str, stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE, shell = True)
+ stdout_str, stderr_str = process.communicate()
+ if process.returncode != 0:
+ raise Exception("Failed to run '%s'\n%s%sNon-zero exit status %s" %
+ (cmd_str, stdout_str, stderr_str, process.returncode))
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Logger
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ import logging
+ logger = logging.getLogger("run_parallel_blast")
+ #
+ # We are interesting in all messages
+ #
+ if options.verbose:
+ logger.setLevel(logging.DEBUG)
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ logger.addHandler(stderrhandler)
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Pipeline tasks
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ original_fasta = options.input_file
+ database_file = options.database_file
+ temp_directory = options.temp_directory
+ result_file = options.result_file
+
+ @follows(mkdir(temp_directory))
+
+ @split(original_fasta, os.path.join(temp_directory, "*.segment"))
+ def splitFasta (seqFile, segments):
+ """Split sequence file into
+ as many fragments as appropriate
+ depending on the size of original_fasta"""
+ #
+ # Clean up any segment files from previous runs before creating new one
+ #
+ for i in segments:
+ os.unlink(i)
+ #
+ current_file_index = 0
+ for line in open(original_fasta):
+ #
+ # start a new file for each accession line
+ #
+ if line[0] == '>':
+ current_file_index += 1
+ file_name = "%d.segment" % current_file_index
+ file_path = os.path.join(temp_directory, file_name)
+ current_file = open(file_path, "w")
+ current_file.write(line)
+
+
+ @transform(splitFasta, suffix(".segment"), [".blastResult", ".blastSuccess"])
+ def runBlast(seqFile, output_files):
+ #
+ blastResultFile, flag_file = output_files
+ #
+ run_cmd("blastall -p blastp -d human.protein.faa -i %s > %s" % (seqFile, blastResultFile))
+ #
+ # "touch" flag file to indicate success
+ #
+ open(flag_file, "w")
+
+
+ @merge(runBlast, result_file)
+ def combineBlastResults (blastResult_and_flag_Files, combinedBlastResultFile):
+ """Combine blast results"""
+ #
+ output_file = open(combinedBlastResultFile, "w")
+ for blastResult_file, flag_file in blastResult_and_flag_Files:
+ output_file.write(open(blastResult_file).read())
+
+
+
+
+
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Print list of tasks
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ if options.just_print:
+ pipeline_printout(sys.stdout, [combineBlastResults], verbose=options.verbose)
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Print flowchart
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ elif options.flowchart:
+ # use file extension for output format
+ output_format = os.path.splitext(options.flowchart)[1][1:]
+ pipeline_printout_graph (open(options.flowchart, "w"),
+ output_format,
+ [combineBlastResults],
+ no_key_legend = True)
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Run Pipeline
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ else:
+ pipeline_run([combineBlastResults], multiprocess = options.jobs,
+ logger = logger, verbose=options.verbose)
+
+
diff --git a/doc/examples/paired_end_data.py.rst b/doc/examples/paired_end_data.py.rst
new file mode 100644
index 0000000..cbf1b58
--- /dev/null
+++ b/doc/examples/paired_end_data.py.rst
@@ -0,0 +1,122 @@
+.. _faq.paired_files.code:
+
+############################################################################################################################################################################################################
+Example code for :ref:`FAQ Good practices: "What is the best way of handling data in file pairs (or triplets etc.)?" <faq.paired_files>`
+############################################################################################################################################################################################################
+
+ .. seealso::
+
+ * :ref:`@collate <new_manual.collate>`
+
+
+ .. code-block:: python
+ :emphasize-lines: 10-21,29-31,40-43,70-74
+
+ #!/usr/bin/env python
+ import sys, os
+
+ from ruffus import *
+ import ruffus.cmdline as cmdline
+ from subprocess import check_call
+
+ parser = cmdline.get_argparse(description="Parimala's pipeline?")
+
+ # .
+ # Very flexible handling of input files .
+ # .
+ # input files can be specified flexibly as: .
+ # --input a.fastq b.fastq .
+ # --input a.fastq --input b.fastq .
+ # --input *.fastq --input other/*.fastq .
+ # --input "*.fastq" .
+ # .
+ # The last form is expanded in the script and avoids limitations on command .
+ # line lengths .
+ # .
+ parser.add_argument('-i', '--input', nargs='+', metavar="FILE", action="append", help = "Fastq files")
+
+ options = parser.parse_args()
+
+ # standard python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging ("PARIMALA", options.log_file, options.verbose)
+
+ # .
+ # Useful code to turn input files into a flat list .
+ # .
+ from glob import glob
+ original_data_files = [fn for grouped in options.input for glob_spec in grouped for fn in glob(glob_spec)] if options.input else []
+ if not original_data_files:
+ original_data_files = [["C1W1_R1.fastq.gz", "C1W1_R2.fastq.gz"]]
+ #raise Exception ("No matching files specified with --input.")
+
+ # <<<---- pipelined functions go here
+
+ #_________________________________________________________________________________
+ # .
+ # Group together file pairs .
+ #_________________________________________________________________________________
+ @collate(original_data_files,
+ # match file name up to the "R1.fastq.gz"
+ formatter("([^/]+)R[12].fastq.gz$"),
+ # Create output parameter supplied to next task
+ ["{path[0]}/{1[0]}paired.R1.fastq.gz", # paired file 1
+ "{path[0]}/{1[0]}paired.R2.fastq.gz"], # paired file 2
+ # Extra parameters for our own convenience and use
+ ["{path[0]}/{1[0]}unpaired.R1.fastq.gz", # unpaired file 1
+ "{path[0]}/{1[0]}unpaired.R2.fastq.gz"], # unpaired file 2
+ logger, logger_mutex)
+ def trim_fastq(input_files, output_paired_files, discarded_unpaired_files, logger, logger_mutex):
+ if len(input_files) != 2:
+ raise Exception("One of read pairs %s missing" % (input_files,))
+ cmd = ("java -jar ~/SPRING-SUMMER_2014/Softwares/Trimmomatic/Trimmomatic-0.32/trimmomatic-0.32.jar "
+ " PE -phred33 "
+ " {input_files[0]} {input_files[1]} "
+ " {output_paired_files[0]} {output_paired_files[1]} "
+ " {discarded_unpaired_files[0]} {discarded_unpaired_files[1]} "
+ " LEADING:30 TRAILING:30 SLIDINGWINDOW:4:15 MINLEN:50 "
+ )
+
+ check_call(cmd.format(**locals()))
+
+ with logger_mutex:
+ logger.debug("Hooray trim_fastq worked")
+
+ #_________________________________________________________________________________
+ # .
+ # Each file pair now makes its way down the rest of the pipeline as .
+ # a couple .
+ #_________________________________________________________________________________
+ @transform(trim_fastq,
+ # regular expression match on first of pe files
+ formatter("([^/]+)paired.R1.fastq.gz$"),
+ # Output parameter supplied to next task
+ "{path[0]}/{1[0]}.sam"
+
+ # Extra parameters for our own convenience and use
+ "{path[0]}/{1[0]}.pe_soap_pe", # soap intermediate file
+ "{path[0]}/{1[0]}.pe_soap_se", # soap intermediate file
+ logger, logger_mutex)
+ def align_seq(input_files, output_file, soap_pe_output_file, soap_se_output_file, logger, logger_mutex):
+ if len(input_files) != 2:
+ raise Exception("One of read pairs %s missing" % (input_files,))
+ cmd = ("~/SPRING-SUMMER_2014/Softwares/soap2.21release/soap "
+ " -a {input_files[0]} "
+ " -b {input_files[1]} "
+ " -D Y55_genome.fa.index* "
+ " -o {soap_pe_output_file} -2 {soap_se_output_file} -m 400 -x 600")
+
+ check_call(cmd.format(**locals()))
+
+
+ #Soap_to_sam
+ cmd = " perl ~/SPRING-SUMMER_2014/Softwares/soap2sam.pl -p {soap_pe_output_file} > {output_file}"
+
+ check_call(cmd.format(**locals()))
+
+
+ with logger_mutex:
+ logger.debug("Hooray align_seq worked")
+
+
+ cmdline.run (options)
+
diff --git a/doc/faq.rst b/doc/faq.rst
new file mode 100644
index 0000000..2b70f66
--- /dev/null
+++ b/doc/faq.rst
@@ -0,0 +1,980 @@
+.. include:: global.inc
+#############
+FAQ
+#############
+
+**********************************************************
+Citations
+**********************************************************
+
+===============================================================
+Q. How should *Ruffus* be cited in academic publications?
+===============================================================
+
+ The official publication describing the original version of *Ruffus* is:
+
+ `Leo Goodstadt (2010) <http://bioinformatics.oxfordjournals.org/content/early/2010/09/16/bioinformatics.btq524>`_ : **Ruffus: a lightweight Python library for computational pipelines.** *Bioinformatics* 26(21): 2778-2779
+
+
+**********************************************************
+Good practices
+**********************************************************
+
+==================================================================================================================
+Q. What is the best way of keeping my data and workings separate?
+==================================================================================================================
+
+ It is good practice to run your pipeline in a temporary, "working" directory away from your original data.
+
+ The first step of your pipeline might be to make softlinks to your original data in your working directory.
+ This is example (relatively paranoid) code to do just this:
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ def re_symlink (input_file, soft_link_name, logger, logging_mutex):
+ """
+ Helper function: relinks soft symbolic link if necessary
+ """
+ # Guard agains soft linking to oneself: Disastrous consequences of deleting the original files!!
+ if input_file == soft_link_name:
+ logger.debug("Warning: No symbolic link made. You are using the original data directory as the working directory.")
+ return
+ # Soft link already exists: delete for relink?
+ if os.path.lexists(soft_link_name):
+ # do not delete or overwrite real (non-soft link) file
+ if not os.path.islink(soft_link_name):
+ raise Exception("%s exists and is not a link" % soft_link_name)
+ try:
+ os.unlink(soft_link_name)
+ except:
+ with logging_mutex:
+ logger.debug("Can't unlink %s" % (soft_link_name))
+ with logging_mutex:
+ logger.debug("os.symlink(%s, %s)" % (input_file, soft_link_name))
+ #
+ # symbolic link relative to original directory so that the entire path
+ # can be moved around with breaking everything
+ #
+ os.symlink( os.path.relpath(os.path.abspath(input_file),
+ os.path.abspath(os.path.dirname(soft_link_name))), soft_link_name)
+
+ #
+ # First task should soft link data to working directory
+ #
+ @jobs_limit(1)
+ @mkdir(options.working_dir)
+ @transform( input_files,
+ formatter(),
+ # move to working directory
+ os.path.join(options.working_dir, "{basename[0]}{ext[0]}"),
+ logger, logging_mutex
+ )
+ def soft_link_inputs_to_working_directory (input_file, soft_link_name, logger, logging_mutex):
+ """
+ Make soft link in working directory
+ """
+ with logging_mutex:
+ logger.info("Linking files %(input_file)s -> %(soft_link_name)s\n" % locals())
+ re_symlink(input_file, soft_link_name, logger, logging_mutex)
+
+
+.. _faq.paired_files:
+
+==================================================================================================================
+Q. What is the best way of handling data in file pairs (or triplets etc.)
+==================================================================================================================
+
+
+ In Bioinformatics, DNA data often consists of only the nucleotide sequence at the two ends of larger fragments.
+ The `paired_end <http://www.illumina.com/technology/next-generation-sequencing/paired-end-sequencing_assay.ilmn>`__ or
+ `mate pair <http://en.wikipedia.org/wiki/Shotgun_sequencing#Whole_genome_shotgun_sequencing>`__ data frequently
+ consists of of file pairs with conveniently related names such as "*.R1.fastq" and "*.R2.fastq".
+
+ At some point in data pipeline, these file pairs or triplets must find each other and be analysed in the same job.
+
+ Provided these file pairs or triplets are named consistently, an easiest way to regroup them is to use the
+ Ruffus :ref:`@collate <new_manual.collate>` decorator. For example:
+
+
+ .. code-block:: python
+
+ @collate(original_data_files,
+
+ # match file name up to the "R1.fastq.gz"
+ formatter("([^/]+)R[12].fastq.gz$"),
+
+ # Create output parameter supplied to next task
+ "{path[0]}/{1[0]}.sam",
+ logger, logger_mutex)
+ def handle_paired_end(input_files, output_paired_files, logger, logger_mutex):
+ # check that we really have a pair of two files not an orphaned singleton
+ if len(input_files) != 2:
+ raise Exception("One of read pairs %s missing" % (input_files,))
+
+ # do stuff here
+
+
+
+ This (incomplete, untested) :ref:`example code <faq.paired_files.code>` shows what this would look like *in vivo*.
+
+
+
+**********************************************************
+General
+**********************************************************
+
+=========================================================
+Q. *Ruffus* won't create dependency graphs
+=========================================================
+
+ A. You need to have installed ``dot`` from `Graphviz <http://www.graphviz.org/>`_ to produce
+ pretty flowcharts likes this:
+
+ .. image:: images/pretty_flowchart.png
+
+
+
+
+=========================================================
+Q. *Ruffus* seems to be hanging in the same place
+=========================================================
+
+ A. If *ruffus* is interrupted, for example, by a Ctrl-C,
+ you will often find the following lines of code highlighted::
+
+ File "build/bdist.linux-x86_64/egg/ruffus/task.py", line 1904, in pipeline_run
+ File "build/bdist.linux-x86_64/egg/ruffus/task.py", line 1380, in run_all_jobs_in_task
+ File "/xxxx/python2.6/multiprocessing/pool.py", line 507, in next
+ self._cond.wait(timeout)
+ File "/xxxxx/python2.6/threading.py", line 237, in wait
+ waiter.acquire()
+
+ This is *not* where *ruffus* is hanging but the boundary between the main programme process
+ and the sub-processes which run *ruffus* jobs in parallel.
+
+ This is naturally where broken execution threads get washed up onto.
+
+
+
+
+=========================================================
+Q. Regular expression substitutions don't work
+=========================================================
+
+ A. If you are using the special regular expression forms ``"\1"``, ``"\2"`` etc.
+ to refer to matching groups, remember to 'escape' the subsitution pattern string.
+ The best option is to use `'raw' python strings <http://docs.python.org/library/re.html>`_.
+ For example:
+
+ ::
+
+ r"\1_substitutes\2correctly\3four\4times"
+
+ Ruffus will throw an exception if it sees an unescaped ``"\1"`` or ``"\2"`` in a file name.
+
+========================================================================================
+Q. How to force a pipeline to appear up to date?
+========================================================================================
+
+ *I have made a trivial modification to one of my data files and now Ruffus wants to rerun my month long pipeline. How can I convince Ruffus that everything is fine and to leave things as they are?*
+
+ The standard way to do what you are trying to do is to touch all the files downstream...
+ That way the modification times of your analysis files would postdate your existing files.
+ You can do this manually but Ruffus also provides direct support:
+
+ .. code-block:: python
+
+ pipeline_run (touch_files_only = True)
+
+ pipeline_run will run your script normally stepping over up-to-date tasks and starting
+ with jobs which look out of date. However, after that, none of your pipeline task functions
+ will be called, instead, each non-up-to-date file is `touch <https://en.wikipedia.org/wiki/Touch_(Unix)>`__-ed in
+ turn so that the file modification dates follow on successively.
+
+ See the documentation for :ref:`pipeline_run() <pipeline_functions.pipeline_run>`
+
+ It is even simpler if you are using the new Ruffus.cmdline support from version 2.4. You can just type
+
+ .. code-block:: bash
+
+ your script --touch_files_only [--other_options_of_your_own_etc]
+
+ See :ref:`command line <new_manual.cmdline>` documentation.
+
+========================================================================================
+Q. How can I use my own decorators with Ruffus?
+========================================================================================
+
+(Thanks to Radhouane Aniba for contributing to this answer.)
+
+A. With care! If the following two points are observed:
+
+____________________________________________________________________________________________________________________________________________________________________________________________________________________
+1. Use `@wraps <https://docs.python.org/2/library/functools.html#functools.wraps>`__ from ``functools`` or Michele Simionato's `decorator <https://pypi.python.org/pypi/decorator>`__ module
+____________________________________________________________________________________________________________________________________________________________________________________________________________________
+
+ These will automatically forward attributes from the task function correctly:
+
+ * ``__name__`` and ``__module__`` is used to identify functions uniquely in a Ruffus pipeline, and
+ * ``pipeline_task`` is used to hold per task data
+
+__________________________________________________________________________________________________________
+2. Always call Ruffus decorators first before your own decorators.
+__________________________________________________________________________________________________________
+
+ Otherwise, your decorator will be ignored.
+
+ So this works:
+
+ .. code-block:: python
+
+ @follows(prev_task)
+ @custom_decorator(something)
+ def test():
+ pass
+
+ This is a bit futile
+
+ .. code-block:: python
+
+ # ignore @custom_decorator
+ @custom_decorator(something)
+ @follows(prev_task)
+ def test():
+ pass
+
+
+ This order dependency is an unfortunate quirk of how python decorators work. The last (rather futile)
+ piece of code is equivalent to:
+
+ .. code-block:: python
+
+ test = custom_decorator(something)(ruffus.follows(prev_task)(test))
+
+ Unfortunately, Ruffus has no idea that someone else (``custom_decorator``) is also modifying the ``test()`` function
+ after it (``ruffus.follows``) has had its go.
+
+
+
+_____________________________________________________
+Example decorator:
+_____________________________________________________
+
+ Let us look at a decorator to time jobs:
+
+ .. code-block:: python
+
+ import sys, time
+ def time_func_call(func, stream, *args, **kwargs):
+ """prints elapsed time to standard out, or any other file-like object with a .write() method.
+ """
+ start = time.time()
+ # Run the decorated function.
+ ret = func(*args, **kwargs)
+ # Stop the timer.
+ end = time.time()
+ elapsed = end - start
+ stream.write("{} took {} seconds\n".format(func.__name__, elapsed))
+ return ret
+
+
+ from ruffus import *
+ import sys
+ import time
+
+ @time_job(sys.stderr)
+ def first_task():
+ print "First task"
+
+
+ @follows(first_task)
+ @time_job(sys.stderr)
+ def second_task():
+ print "Second task"
+
+
+ @follows(second_task)
+ @time_job(sys.stderr)
+ def final_task():
+ print "Final task"
+
+ pipeline_run()
+
+
+ What would ``@time_job`` look like?
+
+__________________________________________________________________________________________________________
+1. Using functools `@wraps <https://docs.python.org/2/library/functools.html#functools.wraps>`__
+__________________________________________________________________________________________________________
+
+
+ .. code-block:: python
+
+ import functools
+ def time_job(stream=sys.stdout):
+ def actual_time_job(func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ return time_func_call(func, stream, *args, **kwargs)
+ return wrapper
+ return actual_time_job
+
+__________________________________________________________________________________________________________
+2. Using Michele Simionato's `decorator <https://pypi.python.org/pypi/decorator>`__ module
+__________________________________________________________________________________________________________
+
+
+ .. code-block:: python
+
+ import decorator
+ def time_job(stream=sys.stdout):
+ def time_job(func, *args, **kwargs):
+ return time_func_call(func, stream, *args, **kwargs)
+ return decorator.decorator(time_job)
+
+
+_______________________________________________________________________________________________________________________________________________________________
+2. By hand, using a `callable object <https://docs.python.org/2/reference/datamodel.html#emulating-callable-objects>`__
+_______________________________________________________________________________________________________________________________________________________________
+
+
+ .. code-block:: python
+
+ class time_job(object):
+ def __init__(self, stream=sys.stdout):
+ self.stream = stream
+ def __call__(self, func):
+ def inner(*args, **kwargs):
+ return time_func_call(func, self.stream, *args, **kwargs)
+ # remember to forward __name__
+ inner.__name__ = func.__name__
+ inner.__module__ = func.__module__
+ inner.__doc__ = func.__doc__
+ if hasattr(func, "pipeline_task"):
+ inner.pipeline_task = func.pipeline_task
+ return inner
+
+
+
+
+
+========================================================================================
+Q. Can a task function in a *Ruffus* pipeline be called normally outside of Ruffus?
+========================================================================================
+ A. Yes. Most python decorators wrap themselves around a function. However, *Ruffus* leaves the
+ original function untouched and unwrapped. Instead, *Ruffus* adds a ``pipeline_task`` attribute
+ to the task function to signal that this is a pipelined function.
+
+ This means the original task function can be called just like any other python function.
+
+=====================================================================================================================
+Q. My *Ruffus* tasks create two files at a time. Why is the second one ignored in successive stages of my pipeline?
+=====================================================================================================================
+ *This is my code:*
+
+ ::
+
+ from ruffus import *
+ import sys
+ @transform("start.input", regex(".+"), ("first_output.txt", "second_output.txt"))
+ def task1(i,o):
+ pass
+
+ @transform(task1, suffix(".txt"), ".result")
+ def task2(i, o):
+ pass
+
+ pipeline_printout(sys.stdout, [task2], verbose=3)
+
+ ::
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = task1
+ Job = [start.input
+ ->[first_output.txt, second_output.txt]]
+
+ Task = task2
+ Job = [[first_output.txt, second_output.txt]
+ ->first_output.result]
+
+ ________________________________________
+
+ A: This code produces a single output of a tuple of 2 files. In fact, you want two
+ outputs, each consisting of 1 file.
+
+ You want a single job (single input) to be produce multiple outputs (multiple jobs
+ in downstream tasks). This is a one-to-many operation which calls for
+ :ref:`@split <decorators.split>`:
+
+ ::
+
+ from ruffus import *
+ import sys
+ @split("start.input", ("first_output.txt", "second_output.txt"))
+ def task1(i,o):
+ pass
+
+ @transform(task1, suffix(".txt"), ".result")
+ def task2(i, o):
+ pass
+
+ pipeline_printout(sys.stdout, [task2], verbose=3)
+
+ ::
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = task1
+ Job = [start.input
+ ->[first_output.txt, second_output.txt]]
+
+ Task = task2
+ Job = [first_output.txt
+ ->first_output.result]
+ Job = [second_output.txt
+ ->second_output.result]
+
+ ________________________________________
+
+
+=======================================================================================
+Q. How can a *Ruffus* task produce output which goes off in different directions?
+=======================================================================================
+
+ A. As above, anytime there is a situation which requires a one-to-many operation, you should reach
+ for :ref:`@subdivide <decorators.subdivide>`. The advanced form takes a regular expression, making
+ it easier to produce multiple derivatives of the input file. The following example subdivides
+ *2* jobs each into *3*, so that the subsequence task will run *2* x *3* = *6* jobs.
+
+ ::
+
+ from ruffus import *
+ import sys
+ @subdivide(["1.input_file",
+ "2.input_file"],
+ regex(r"(.+).input_file"), # match file prefix
+ [r"\1.file_type1",
+ r"\1.file_type2",
+ r"\1.file_type3"])
+ def split_task(input, output):
+ pass
+
+
+ @transform(split_task, regex("(.+)"), r"\1.test")
+ def test_split_output(i, o):
+ pass
+
+ pipeline_printout(sys.stdout, [test_split_output], verbose = 3)
+
+ Each of the original 2 files have been split in three so that test_split_output will run
+ 6 jobs simultaneously.
+
+ ::
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = split_task
+ Job = [1.input_file ->[1.file_type1, 1.file_type2, 1.file_type3]]
+ Job = [2.input_file ->[2.file_type1, 2.file_type2, 2.file_type3]]
+
+ Task = test_split_output
+ Job = [1.file_type1 ->1.file_type1.test]
+ Job = [1.file_type2 ->1.file_type2.test]
+ Job = [1.file_type3 ->1.file_type3.test]
+ Job = [2.file_type1 ->2.file_type1.test]
+ Job = [2.file_type2 ->2.file_type2.test]
+ Job = [2.file_type3 ->2.file_type3.test]
+ ________________________________________
+
+
+
+=======================================================================================
+Q. Can I call extra code before each job?
+=======================================================================================
+ A. This is easily accomplished by hijacking the process
+ for checking if jobs are up to date or not (:ref:`@check_if_uptodate <decorators.check_if_uptodate>`):
+
+ ::
+
+ from ruffus import *
+ import sys
+
+ def run_this_before_each_job (*args):
+ print "Calling function before each job using these args", args
+ # Remember to delegate to the default *Ruffus* code for checking if
+ # jobs need to run.
+ return needs_update_check_modify_time(*args)
+
+ @check_if_uptodate(run_this_before_each_job)
+ @files([[None, "a.1"], [None, "b.1"]])
+ def task_func(input, output):
+ pass
+
+ pipeline_printout(sys.stdout, [task_func])
+
+ This results in:
+ ::
+
+ ________________________________________
+ >>> pipeline_run([task_func])
+ Calling function before each job using these args (None, 'a.1')
+ Calling function before each job using these args (None, 'a.1')
+ Calling function before each job using these args (None, 'b.1')
+ Job = [None -> a.1] completed
+ Job = [None -> b.1] completed
+ Completed Task = task_func
+
+ .. note::
+
+ Because ``run_this_before_each_job(...)`` is called whenever *Ruffus* checks to see if
+ a job is up to date or not, the function may be called twice for some jobs
+ (e.g. ``(None, 'a.1')`` above).
+
+
+=========================================================================================================
+Q. Does *Ruffus* allow checkpointing: to distinguish interrupted and completed results?
+=========================================================================================================
+
+_____________________________________________________
+A. Use the builtin sqlite checkpointing
+_____________________________________________________
+
+
+ By default, ``pipeline_run(...)`` will save the timestamps for output files from successfully run jobs to an sqlite database file (``.ruffus_history.sqlite``) in the current directory .
+
+ * If you are using ``Ruffus.cmdline``, you can change the checksum / timestamp database file name on the command line using ``--checksum_file_name NNNN``
+ *
+
+
+ The level of timestamping / checksumming can be set via the ``checksum_level`` parameter:
+
+ .. code-block:: python
+
+ pipeline_run(..., checksum_level = N, ...)
+
+ where the default is 1::
+
+ level 0 : Use only file timestamps
+ level 1 : above, plus timestamp of successful job completion
+ level 2 : above, plus a checksum of the pipeline function body
+ level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+_____________________________________________________
+A. Use a flag file
+_____________________________________________________
+
+ When gmake is interrupted, it will delete the target file it is updating so that the target is
+ remade from scratch when make is next run. Ruffus, by design, does not do this because, more often than
+ not, the partial / incomplete file may be usefully if only to reveal, for example, what might have caused an interrupting error
+ or exception. It also seems a bit too clever and underhand to go around the programmer's back to delete files...
+
+ A common *Ruffus* convention is create an empty checkpoint or "flag" file whose sole purpose
+ is to record a modification-time and the successful completion of a job.
+
+ This would be task with a completion flag:
+
+ ::
+
+ #
+ # Assuming a pipelined task function named "stage1"
+ #
+ @transform(stage1, suffix(".stage1"), [".stage2", ".stage2_finished"] )
+ def stage2 (input_files, output_files):
+ task_output_file, flag_file = output_files
+ cmd = ("do_something2 %(input_file)s >| %(task_output_file)s ")
+ cmd = cmd % {
+ "input_file": input_files[0],
+ "task_output_file": task_output_file
+ }
+ if not os.system( cmd ):
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888
+ #
+ # It worked: Create completion flag_file
+ #
+ open(flag_file, "w")
+ #
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+ The flag_files ``xxx.stage2_finished`` indicate that each job is finished. If this is missing,
+ ``xxx.stage2`` is only a partial, interrupted result.
+
+
+ The only thing to be aware of is that the flag file will appear in the list of inputs of the
+ downstream task, which should accordingly look like this:
+
+
+ ::
+
+ @transform(stage2, suffix(".stage2"), [".stage3", ".stage3_finished"] )
+ def stage3 (input_files, output_files):
+
+ #888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ #
+ # Note that the first parameter is a LIST of input files, the last of which
+ # is the flag file from the previous task which we can ignore
+ #
+ input_file, previous_flag_file = input_files
+ #
+ #888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ task_output_file, flag_file = output_files
+ cmd = ("do_something3 %(input_file)s >| %(task_output_file)s ")
+ cmd = cmd % {
+ "input_file": input_file,
+ "task_output_file": task_output_file
+ }
+ # completion flag file for this task
+ if not os.system( cmd ):
+ open(flag_file, "w")
+
+
+ The :ref:`Bioinformatics example<examples_bioinformatics_part2.step2>` contains :ref:`code <examples_bioinformatics_part2_code>` for checkpointing.
+
+
+_____________________________________________________
+A. Use a temp file
+_____________________________________________________
+
+ Thanks to Martin Goodson for suggesting this and providing an example. In his words:
+
+ "I normally use a decorator to create a temporary file which is only renamed after the task has completed without any problems. This seems a more elegant solution to the problem:"
+
+
+ .. code-block:: python
+
+ def usetemp(task_func):
+ """ Decorate a function to write to a tmp file and then rename it. So half finished tasks cannot create up to date targets.
+ """
+ @wraps(task_func)
+ def wrapper_function(*args, **kwargs):
+ args=list(args)
+ outnames=args[1]
+ if not isinstance(outnames, basestring) and hasattr(outnames, '__getitem__'):
+ tmpnames=[str(x)+".tmp" for x in outnames]
+ args[1]=tmpnames
+ task_func(*args, **kwargs)
+ try:
+ for tmp, name in zip(tmpnames, outnames):
+ if os.path.exists(tmp):
+ os.rename(tmp, str(name))
+ except BaseException as e:
+ for name in outnames:
+ if os.path.exists(name):
+ os.remove(name)
+ raise (e)
+ else:
+ tmp=str(outnames)+'.tmp'
+ args[1]=tmp
+ task_func(*args, **kwargs)
+ os.rename(tmp, str(outnames))
+ return wrapper_function
+
+
+ Use like this:
+
+ .. code-block:: python
+
+ @files(None, 'client1.price')
+ @usetemp
+ def getusers(inputfile, outputname):
+ #**************************************************
+ # code goes here
+ # outputname now refers to temporary file
+ pass
+
+
+
+
+
+**********************************************************
+Windows
+**********************************************************
+
+=========================================================
+Q. Windows seems to spawn *ruffus* processes recursively
+=========================================================
+
+ A. It is necessary to protect the "entry point" of the program under windows.
+ Otherwise, a new process will be started each time the main module is imported
+ by a new Python interpreter as an unintended side effects. Causing a cascade
+ of new processes.
+
+ See: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+
+ This code works::
+
+ if __name__ == '__main__':
+ try:
+ pipeline_run([parallel_task], multiprocess = 5)
+ except Exception, e:
+ print e.args
+
+
+
+**********************************************************
+Sun Grid Engine / PBS / SLURM etc
+**********************************************************
+
+==========================================================================================================================================
+Q. Can Ruffus be used to manage a cluster or grid based pipeline?
+==========================================================================================================================================
+ A. Some minimum modifications have to be made to your *Ruffus* script to allow it to submit jobs to a cluster
+
+ See :ref:`ruffus.drmaa_wrapper <new_manual.ruffus.drmaa_wrapper.run_job>`
+
+ Thanks to Andreas Heger and others at CGAT and Bernie Pope for contributing ideas and code.
+
+
+==========================================================================================================================================
+Q. When I submit lots of jobs via Sun Grid Engine (SGE), the head node occassionally freezes and dies
+==========================================================================================================================================
+
+ A. You need to use multithreading rather than multiprocessing. See :ref:`ruffus.drmaa_wrapper <new_manual.ruffus.drmaa_wrapper.run_job>`
+
+
+=====================================================================
+Q. Keeping Large intermediate files
+=====================================================================
+
+ Sometimes pipelines create a large number of intermediate files which might not be needed later.
+
+ Unfortunately, the current design of *Ruffus* requires these files to hang around otherwise the pipeline
+ will not know that it ran successfully.
+
+ We have some tentative plans to get around this but in the meantime, Bernie Pope suggests
+ truncating intermediate files in place, preserving timestamps::
+
+
+ # truncate a file to zero bytes, and preserve its original modification time
+ def zeroFile(file):
+ if os.path.exists(file):
+ # save the current time of the file
+ timeInfo = os.stat(file)
+ try:
+ f = open(file,'w')
+ except IOError:
+ pass
+ else:
+ f.truncate(0)
+ f.close()
+ # change the time of the file back to what it was
+ os.utime(file,(timeInfo.st_atime, timeInfo.st_mtime))
+
+**********************************************************************************
+Sharing python objects between Ruffus processes running concurrently
+**********************************************************************************
+
+ The design of Ruffus envisages that much of the data flow in pipelines occurs in files but it is also possible to pass python objects in memory.
+
+ Ruffus uses the `multiprocessing <http://docs.python.org/2/library/multiprocessing.html>`_ module and much of the following is a summary of what is covered
+ in depth in the Python Standard Library `Documentation <http://docs.python.org/2/library/multiprocessing.html#sharing-state-between-processes>`_.
+
+ Running Ruffus using ``pipeline_run(..., multiprocess = NNN)`` where ``NNN`` > 1 runs each job concurrently on up to ``NNN`` separate local processes.
+ Each task function runs independently in a different python intepreter, possibly on a different CPU, in the most efficient way.
+ However, this does mean we have to pay some attention to how data is sent across process boundaries (unlike the situation with ``pipeline_run(..., multithread = NNN)`` ).
+
+ The python code and data which comprises your multitasking Ruffus job is sent to a separate process in three ways:
+
+ #. The python function code and data objects are `pickled <http://docs.python.org/2/library/pickle.html>`__, i.e. converting into a byte stream, by the master process, sent to the remote process
+ before being converted back into normal python (unpickling).
+ #. The parameters for your jobs, i.e. what Ruffus calls your task functions with, are separately `pickled <http://docs.python.org/2/library/pickle.html>`__ and sent to the remote process via
+ `multiprocessing.Queue <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`_
+ #. You can share and synchronise other data yourselves. The canonical example is the logger provided by ``Ruffus.cmdline.setup_logging``
+
+ .. note::
+
+ Check that your function code and data can be `pickled <http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled>`_.
+
+ Only functions, built-in functions and classes defined at the top level of a module are picklable.
+
+
+ The following answers are a short "how-to" for sharing and synchronising data yourselves.
+
+
+==============================================================================
+Can ordinary python objects be shared between processes?
+==============================================================================
+
+ A. Objects which can be `pickled <http://docs.python.org/2/library/pickle.html>`__ can be shared as is. These include
+
+ * numbers
+ * strings
+ * tuples, lists, sets, and dictionaries containing only objects which can be `pickled <http://docs.python.org/2/library/pickle.html>`__.
+
+ #. If these do not change during your pipeline, you can just use them without any further effort in your task.
+ #. If you need to use the value at the point when the task function is *called*, then you need to pass the python object as parameters to your task.
+ For example:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+ # changing_list changes...
+ @transform(previous_task, suffix(".foo"), ".bar", changing_list)
+ def next_task(input_file, output_file, changing_list):
+ pass
+
+ #. If you need to use the value when the task function is *run* then see :ref:`the following answer. <how-about-synchronising-python-objects-in-real-time>`.
+
+
+================================================================================================
+Why am I getting ``PicklingError``?
+================================================================================================
+
+ What is happening? Didn't `Joan of Arc <https://en.wikipedia.org/wiki/Battle_of_the_Herrings>`_ solve this once and for all?
+
+ A. Some of the data or code in your function cannot be `pickled <http://docs.python.org/2/library/pickle.html>`__ and is being asked to be sent by python ``mulitprocessing`` across process boundaries.
+
+
+ When you run your pipeline using multiprocess:
+
+ .. code-block:: python
+
+ pipeline_run([], verbose = 5, multiprocess = 5, logger = ruffusLoggerProxy)
+
+ You will get the following errors:
+
+ .. code-block:: python
+
+ Exception in thread Thread-2:
+ Traceback (most recent call last):
+ File "/path/to/python/python2.7/threading.py", line 808, in __bootstrap_inner
+ self.run()
+ File "/path/to/python/python2.7/threading.py", line 761, in run
+ self.__target(*self.__args, * *self.__kwargs)
+ File "/path/to/python/python2.7/multiprocessing/pool.py", line 342, in _handle_tasks
+ put(task)
+ PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
+
+
+ which go away when you set ``pipeline_run([], multiprocess = 1, ...)``
+
+
+
+
+ Unfortunately, pickling errors are particularly ill-served by standard python error messages. The only really good advice is to take the offending
+ code and try and `pickle <http://docs.python.org/2/library/pickle.html>`__ it yourself and narrow down the errors. Check your objects against the list
+ in the `pickle <http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled>`_ module.
+ Watch out especially for nested functions. These will have to be moved to file scope.
+ Other objects may have to be passed in proxy (see below).
+
+
+.. _how-about-synchronising-python-objects-in-real-time:
+
+================================================================================================
+How about synchronising python objects in real time?
+================================================================================================
+
+ A. You can use managers and proxy objects from the `multiprocessing <http://docs.python.org/library/multiprocessing.html>`__ module.
+
+ The underlying python object would be owned and managed by a (hidden) server process. Other processes can access the shared objects transparently by using proxies. This is how the logger provided by
+ ``Ruffus.cmdline.setup_logging`` works:
+
+ .. code-block:: python
+
+ # optional logger which can be passed to ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ ``logger`` is a proxy for the underlying python `logger <http://docs.python.org/2/library/logging.html>`__ object, and it can be shared freely between processes.
+
+ The best course is to pass ``logger`` as a parameter to a *Ruffus* task.
+
+ The only caveat is that we should make sure multiple jobs are not writting to the log at the same time. To synchronise logging, we use proxy to a non-reentrant `multiprocessing.lock <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Lock>`_.
+
+ .. code-block:: python
+
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+ @transform(previous_task, suffix(".foo"), ".bar", logger, logger_mutex)
+ def next_task(input_file, output_file, logger, logger_mutex):
+ with logger_mutex:
+ logger.info("We are in the middle of next_task: %s -> %s" % (input_file, output_file))
+
+
+==============================================================================
+Can I share and synchronise my own python classes via proxies?
+==============================================================================
+
+ A. `multiprocessing.managers.SyncManager <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager>`__ provides out of the box support for lists, arrays and dicts etc.
+
+ Most of the time, we can use a "vanilla" manager provided by `multiprocessing.Manager() <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.multiprocessing.Manager>`_:
+
+ .. code-block:: python
+
+
+ import multiprocessing
+ manager = multiprocessing.Manager()
+
+ list_proxy = manager.list()
+ dict_proxy = manager.dict()
+ lock_proxy = manager.Lock()
+ namespace_proxy = manager.Namespace()
+ queue_proxy = manager.Queue([maxsize])
+ rentrant_lock_proxy = manager.RLock()
+ semaphore_proxy = manager.Semaphore([value])
+ char_array_proxy = manager.Array('c')
+ integer_proxy = manager.Value('i', 6)
+
+ @transform(previous_task, suffix(".foo"), ".bar", lock_proxy, dict_proxy, list_proxy)
+ def next_task(input_file, output_file, lock_proxy, dict_proxy, list_proxy):
+ with lock_proxy:
+ list_proxy.append(3)
+ dict_proxy['a'] = 5
+
+
+ However, you can also create proxy custom classes for your own objects.
+
+ In this case you may need to derive from `multiprocessing.managers.SyncManager <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager>`_
+ and register proxy functions. See ``Ruffus.proxy_logger`` for an example of how to do this.
+
+============================================================================================================================================================
+How do I send python objects back and forth without tangling myself in horrible synchronisation code?
+============================================================================================================================================================
+
+ A. Sharing python objects by passing messages is a much more modern and safer way to coordinate multitasking than using synchronization primitives like locks.
+
+ The python `multiprocessing <http://docs.python.org/2/library/multiprocessing.html#pipes-and-queues>`__ module provides support for passing python objects as messages between processes.
+ You can either use `pipes <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe>`__
+ or `queues <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`__.
+ The idea is that one process pushes and object onto a `pipe <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe>`__ or `queue <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`__
+ and the other processes pops it out at the other end. `Pipes <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Pipe>`__ are
+ only two ended so `queues <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue>`__ are usually a better fit for sending data to multiple Ruffus jobs.
+
+ Proxies for `queues <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager.Queue>`__ can be passed between processes as in the previous section
+
+
+==============================================================================
+How do I share large amounts of data efficiently across processes?
+==============================================================================
+
+ A. If it is really impractical to use data files on disk, you can put the data in shared memory.
+
+ It is possible to create shared objects using shared memory which can be inherited by child processes or passed as Ruffus parameters.
+ This is probably most efficently done via the `array <http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Array>`_
+ interface. Again, it is easy to create locks and proxies for synchronised access:
+
+
+ .. code-block:: python
+
+ from multiprocessing import Process, Lock
+ from multiprocessing.sharedctypes import Value, Array
+ from ctypes import Structure, c_double
+
+ manager = multiprocessing.Manager()
+
+ lock_proxy = manager.Lock()
+ int_array_proxy = manager.Array('i', [123] * 100)
+
+ @transform(previous_task, suffix(".foo"), ".bar", lock_proxy, int_array_proxy)
+ def next_task(input_file, output_file, lock_proxy, int_array_proxy):
+ with lock_proxy:
+ int_array_proxy[23] = 71
+
+
+
+
diff --git a/doc/gallery.rst b/doc/gallery.rst
new file mode 100644
index 0000000..90ff20a
--- /dev/null
+++ b/doc/gallery.rst
@@ -0,0 +1,63 @@
+.. include:: global.inc
+
+.. image:: images/logo.jpg
+
+******************************************************
+Hall of Fame: User contributed flowcharts
+******************************************************
+Please contribute your own work flows in your favourite colours with (an optional) short description
+to email: ruffus_lib at llew.org.uk
+
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+RNASeq pipeline
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ http://en.wikipedia.org/wiki/RNA-Seq
+
+ Mapping transcripts onto genomes using high-throughput sequencing technologies (:download:`svg <images/gallery/gallery_rna_seq.svg>`).
+
+ .. image:: images/gallery/gallery_rna_seq.png
+ :target: _downloads/gallery_rna_seq.svg
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+non-coding evolutionary constraints
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ http://en.wikipedia.org/wiki/Noncoding_DNA
+
+ Non-protein coding evolutionary constraints in different species (:download:`svg <images/gallery/gallery_dless.svg>`).
+
+ .. image:: images/gallery/gallery_dless.png
+ :target: _downloads/gallery_dless.svg
+
+^^^^^^^^^^^^^^^^^
+SNP annotation
+^^^^^^^^^^^^^^^^^
+Predicting impact of different Single Nucleotide Polymorphisms
+
+http://en.wikipedia.org/wiki/Single-nucleotide_polymorphism
+
+Population variation across genomes (:download:`svg <images/gallery/gallery_snp_annotation.svg>`).
+
+.. image:: images/gallery/gallery_snp_annotation.png
+ :target: _downloads/gallery_snp_annotation.svg
+
+Using "pseudo" targets to run only part of the pipeline (:download:`svg <images/gallery/gallery_snp_annotation_consequences.svg>`).
+
+.. image:: images/gallery/gallery_snp_annotation_consequences.png
+ :target: _downloads/gallery_snp_annotation_consequences.svg
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Chip-Seq analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Analysing DNA binding sites with Chip-Seq
+http://en.wikipedia.org/wiki/Chip-Sequencing
+
+ (:download:`svg <images/gallery/gallery_big_pipeline.svg>`)
+
+ .. image:: images/gallery/gallery_big_pipeline.png
+ :target: _downloads/gallery_big_pipeline.svg
+
diff --git a/doc/global.inc b/doc/global.inc
new file mode 100644
index 0000000..034a3b6
--- /dev/null
+++ b/doc/global.inc
@@ -0,0 +1,4 @@
+.. |glob| replace:: `glob`
+.. _glob: http://docs.python.org/library/glob.html
+
+
diff --git a/doc/glossary.rst b/doc/glossary.rst
new file mode 100644
index 0000000..9d86bab
--- /dev/null
+++ b/doc/glossary.rst
@@ -0,0 +1,81 @@
+.. include:: global.inc
+****************
+Glossary
+****************
+.. _Glossary:
+
+.. _glossary.task:
+
+.. glossary::
+
+
+
+ task
+ A stage in a computational pipeline.
+
+ Each **task** in *ruffus* is represented by a python function.
+
+ For example, a task might be to find the products of a sets of two numbers::
+
+ 4 x 5 = 20
+ 5 x 6 = 30
+ 2 x 7 = 14
+
+ job
+ Any number of operations which can be run in parallel and make up
+ the work in a stage of a computional pipeline.
+
+ Each **task** in *ruffus* is a separate call to the **task** function.
+
+ For example, if a task is to find products of numbers, each of these will be a separate job.
+
+ Job1::
+
+ 4 x 5 = 20
+
+ Job2::
+
+ 5 x 6 = 30
+
+ Job3::
+
+ 2 x 7 = 14
+
+ Jobs need not complete in order.
+
+
+ decorator
+ Ruffus decorators allow functions to be incorporated into a computational
+ pipeline, with automatic generation of parameters, dependency checking etc.,
+ without modifying any code within the function.
+ Quoting from the `python wiki <http://wiki.python.org/moin/PythonDecorators>`_:
+
+ A Python decorator is a specific change to the Python syntax that
+ allows us to more conveniently alter functions and methods.
+
+ Decorators dynamically alter the functionality of a function, method, or
+ class without having to directly use subclasses or change the source code
+ of the function being decorated.
+
+
+
+ generator
+ python generators are new to python 2.2
+ (see `Charming Python: Iterators and simple generators <http://www.ibm.com/developerworks/library/l-pycon.html>`_).
+ They allow iterable data to be generated on the fly.
+
+ Ruffus asks for generators when you want to generate **job** parameters dynamically.
+
+ Each set of job parameters is returned by the ``yield`` keyword for
+ greater clarity. For example,::
+
+ def generate_job_parameters():
+
+ for file_index, file_name in iterate(all_file_names):
+
+ # parameter for each job
+ yield file_index, file_name
+
+ Each job takes the parameters ``file_index`` and ``file_name``
+
+
diff --git a/doc/history.rst b/doc/history.rst
new file mode 100644
index 0000000..c0a8f05
--- /dev/null
+++ b/doc/history.rst
@@ -0,0 +1,733 @@
+.. include:: global.inc
+
+
+########################################
+Major Features added to Ruffus
+########################################
+
+.. note::
+
+ See :ref:`To do list <todo>` for future enhancements to Ruffus
+
+
+********************************************************************
+version 2.5RC
+********************************************************************
+
+ 31st July 2014: Release Candidate
+
+ 5th August 2014: Release
+
+============================================================================================================================================================
+1) Python3 compatability (but at least python 2.6 is now required)
+============================================================================================================================================================
+
+ Ruffus v2.5 is now python3 compatible. This has required surprisingly many changes to the codebase. Please report any bugs to me.
+
+ .. note::
+
+ **Ruffus now requires at least python 2.6**
+
+ It proved to be impossible to support python 2.5 and python 3.x at the same time.
+
+============================================================================================================================================================
+2) Ctrl-C interrupts
+============================================================================================================================================================
+
+ Ruffus now mostly(!) terminates gracefully when interrupted by Ctrl-C .
+
+ Please send me bug reports for when this doesn't work with a minimally reproducible case.
+
+ This means that, in general, if an ``Exception`` is thrown during your pipeline but you don't want to wait for the rest of the jobs to complete, you can still press Ctrl-C at any point.
+ Note that you may still need to clean up spawned processes, for example, using ``qdel`` if you are using ``Ruffus.drmaa_wrapper``
+
+============================================================================================================================================================
+3) Customising flowcharts in pipeline_printout_graph() with ``@graphviz``
+============================================================================================================================================================
+
+ *Contributed by Sean Davis, with improved syntax via Jake Biesinger*
+
+ The graphics for each task can have its own attributes (URL, shape, colour) etc. by adding
+ `graphviz attributes <http://www.graphviz.org/doc/info/attrs.html>`__
+ using the ``@graphviz`` decorator.
+
+ * This allows HTML formatting in the task names (using the ``label`` parameter as in the following example).
+ HTML labels **must** be enclosed in ``<`` and ``>``. E.g.
+
+ .. code-block:: python
+
+ label = "<Line <BR/> wrapped task_name()>"
+
+ * You can also opt to keep the task name and wrap it with a prefix and suffix:
+
+ .. code-block:: python
+
+ label_suffix = "??? ", label_prefix = ": What is this?"
+
+ * The ``URL`` attribute allows the generation of clickable svg, and also client / server
+ side image maps usable in web pages.
+ See `Graphviz documentation <http://www.graphviz.org/content/output-formats#dimap>`__
+
+
+ Example:
+
+ .. code-block:: python
+
+
+ @graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ # Can use dictionary if you wish...
+ graphviz_params = {"URL":"http://cnn.com", "fontcolor": '"#FF00FF"'}
+ @graphviz(**graphviz_params)
+ def myTask(input,output):
+ pass
+
+ .. **
+
+ .. image:: images/history_html_flowchart.png
+ :scale: 30
+
+
+============================================================================================================================================================
+4. Consistent verbosity levels
+============================================================================================================================================================
+
+ The verbosity levels are now more fine-grained and consistent between pipeline_printout and pipeline_run.
+ Note that At verbosity > 2, ``pipeline_run`` outputs lists of up-to-date tasks before running the pipeline.
+ Many users who defaulted to using a verbosity of 3 may want to move up to ``verbose = 4``.
+
+ * **level 0** : *Nothing*
+ * **level 1** : *Out-of-date Task names*
+ * **level 2** : *All Tasks (including any task function docstrings)*
+ * **level 3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * **level 4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * **level 5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * **level 6** : *All jobs in All Tasks whether out of date or not*
+ * **level 10**: *Logs messages useful only for debugging ruffus pipeline code*
+
+ * Defaults to **level 4** for pipeline_printout: *Out of date jobs, with explanations and warnings*
+ * Defaults to **level 1** for pipeline_run: *Out-of-date Task names*
+
+============================================================================================================================================================
+5. Allow abbreviated paths from ``pipeline_run`` or ``pipeline_printout``
+============================================================================================================================================================
+
+ .. note ::
+
+ Please contact me with suggestions if you find the abbreviations useful but "aesthetically challenged"!
+
+ Some pipelines produce interminable lists of long filenames. It would be nice to be able to abbreviate this
+ to just enough information to follow the progress.
+
+ Ruffus now allows either
+ 1) Only the nth top level sub-directories to be included
+ 2) The message to be truncated to a specified number of characters (to fit on a line, for example)
+
+ Note that the number of characters specified is the separate length of the input and output parameters,
+ not the entire message. You many need to specify a smaller limit that you expect (e.g. ``60`` rather than `80`)
+
+ .. code-block:: python
+
+ pipeline_printout(verbose_abbreviated_path = NNN)
+ pipeline_run(verbose_abbreviated_path = -MMM)
+
+
+ The ``verbose_abbreviated_path`` parameter restricts the length of input / output file paths to either
+
+ * NNN levels of nested paths
+ * A total of MMM characters, MMM is specified by setting ``verbose_abbreviated_path`` to -MMM (i.e. negative values)
+
+ ``verbose_abbreviated_path`` defaults to ``2``
+
+
+ For example:
+
+ Given ``["aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"]``
+
+
+ .. code-block:: python
+ :emphasize-lines: 1,4,8,19
+
+ # Original relative paths
+ "[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Full abspath
+ verbose_abbreviated_path = 0
+ "[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Specifed level of nested directories
+ verbose_abbreviated_path = 1
+ "[.../dddd.txt, .../gggg.txt]"
+
+ verbose_abbreviated_path = 2
+ "[.../cc/dddd.txt, .../ffff/gggg.txt]"
+
+ verbose_abbreviated_path = 3
+ "[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]"
+
+
+ # Truncated to MMM characters
+ verbose_abbreviated_path = -60
+ "<???> /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+
+ If you are using ``ruffus.cmdline``, the abbreviated path lengths can be specified on
+ the command line as an extension to the verbosity:
+
+ .. code-block:: bash
+ :emphasize-lines: 4,7
+
+ # verbosity of 4
+ yourscript.py --verbose 4
+
+ # display three levels of nested directories
+ yourscript.py --verbose 4:3
+
+ # restrict input and output parameters to 60 letters
+ yourscript.py --verbose 4:-60
+
+
+ The number after the colon is the abbreviated path length
+
+
+============================================================================================================================================================
+Other changes
+============================================================================================================================================================
+ * BUG FIX: Output producing wild cards was not saved in the checksum files!!!
+ * BUG FIX: @mkdir bug under Windows. Thanks to Sean Turley. (Aargh! Different exceptions are thrown in Windows vs. Linux for the same condition!)
+ * Added :ref:`pipeline_get_task_names(...) <pipeline_functions.pipeline_get_task_names>` which returns all task name as a list of strings. Thanks to Clare Sloggett
+
+
+********************************************************************
+version 2.4.1
+********************************************************************
+
+ 26th April 2014
+
+ * Breaking changes to drmaa API suggested by Bernie Pope to ensure portability across different drmaa implementations (SGE, SLURM etc.)
+
+********************************************************************
+version 2.4
+********************************************************************
+
+ 4th April 2014
+
+============================================================================================================================================================
+Additions to ``ruffus`` namespace
+============================================================================================================================================================
+
+ * :ref:`formatter() <new_manual.formatter>` (:ref:`syntax <decorators.formatter>`)
+ * :ref:`originate() <new_manual.originate>` (:ref:`syntax <decorators.originate>`)
+ * :ref:`subdivide() <new_manual.subdivide>` (:ref:`syntax <decorators.subdivide>`)
+
+============================================================================================================================================================
+Installation: use pip
+============================================================================================================================================================
+
+ ::
+
+ sudo pip install ruffus --upgrade
+
+============================================================================================================================================================
+1) Command Line support
+============================================================================================================================================================
+
+ The optional ``Ruffus.cmdline`` module provides support for a set of common command
+ line arguments which make writing *Ruffus* pipelines much more pleasant.
+ See :ref:`manual <new_manual.cmdline>`
+
+============================================================================================================================================================
+2) Check pointing
+============================================================================================================================================================
+
+ * Contributed by **Jake Biesinger**
+ * See :ref:`Manual <new_manual.checkpointing>`
+ * Uses a fault resistant sqlite database file to log i/o files, and additional checksums
+ * defaults to checking file timestamps stored in the current directory (``ruffus_utilility.RUFFUS_HISTORY_FILE = '.ruffus_history.sqlite'``)
+ * :ref:`pipeline_run(..., checksum_level = N, ...) <pipeline_functions.pipeline_run>`
+
+ * level 0 = CHECKSUM_FILE_TIMESTAMPS : Classic mode. Use only file timestamps (no checksum file will be created)
+ * level 1 = CHECKSUM_HISTORY_TIMESTAMPS : Also store timestamps in a database after successful job completion
+ * level 2 = CHECKSUM_FUNCTIONS : As above, plus a checksum of the pipeline function body
+ * level 3 = CHECKSUM_FUNCTIONS_AND_PARAMS : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+ * defaults to level 1
+
+ * Can speed up trivial tasks: Previously Ruffus always added an extra 1 second pause between tasks
+ to guard against file systems (Ext3, FAT, some NFS) with low timestamp granularity.
+
+
+============================================================================================================================================================
+3) :ref:`subdivide() <new_manual.subdivide>` (:ref:`syntax <decorators.subdivide>`)
+============================================================================================================================================================
+
+ * Take a list of input jobs (like :ref:`@transform <decorators.transform>`) but further splits each into multiple jobs, i.e. it is a **many->even more** relationship
+ * synonym for the deprecated ``@split(..., regex(), ...)``
+
+========================================================================================================================================================================================================================================================================================================================
+4) :ref:`mkdir() <new_manual.mkdir>` (:ref:`syntax <decorators.mkdir>`) with :ref:`formatter() <new_manual.formatter>`, :ref:`suffix() <decorators.suffix>` and :ref:`regex() <decorators.regex>`
+========================================================================================================================================================================================================================================================================================================================
+
+ * allows directories to be created depending on runtime parameters or the output of previous tasks
+ * behaves just like :ref:`@transform <decorators.transform>` but with its own (internal) function which does the actual work of making a directory
+ * Previous behavior is retained:``mkdir`` continues to work seamlessly inside :ref:`@follows <decorators.follows>`
+
+============================================================================================================================================================
+5) :ref:`originate() <new_manual.originate>` (:ref:`syntax <decorators.originate>`)
+============================================================================================================================================================
+
+ * Generates output files without dependencies from scratch (*ex nihilo*!)
+ * For first step in a pipeline
+ * Task function obviously only takes output and not input parameters. (There *are* no inputs!)
+ * synonym for :ref:`@split(None,...) <decorators.split>`
+ * See :ref:`Summary <decorators.originate>` / :ref:`Manual <new_manual.originate>`
+
+========================================================================================================================================================================================================================================================================================================================
+6) New flexible :ref:`formatter() <new_manual.formatter>` (:ref:`syntax <decorators.formatter>`) alternative to :ref:`regex() <decorators.regex>` & :ref:`suffix() <decorators.suffix>`
+========================================================================================================================================================================================================================================================================================================================
+
+ * Easy manipulation of path subcomponents in the style of `os.path.split() <http://docs.python.org/2/library/os.path.html#os.path.split>`__
+ * Regular expressions are no longer necessary for path manipulation
+ * Familiar python syntax
+ * Optional regular expression matches
+ * Can refer to any in the list of N input files (not only the first file as for ``regex(...)``)
+ * Can even refer to individual letters within a match
+
+============================================================================================================================================================
+7) Combinatorics (all vs. all decorators)
+============================================================================================================================================================
+
+ * :ref:`@product <new_manual.product>` (See `itertools.product <http://docs.python.org/2/library/itertools.html#itertools.product>`__)
+ * :ref:`@permutations <new_manual.permutations>` (See `itertools.permutations <http://docs.python.org/2/library/itertools.html#itertools.permutations>`__)
+ * :ref:`@combinations <new_manual.combinations>` (See `itertools.combinations <http://docs.python.org/2/library/itertools.html#itertools.combinations>`__)
+ * :ref:`@combinations_with_replacement <new_manual.combinations_with_replacement>` (See `itertools.combinations_with_replacement <http://docs.python.org/2/library/itertools.html#itertools.combinations_with_replacement>`__)
+ * in optional :ref:`combinatorics <new_manual.combinatorics>` module
+ * Only :ref:`formatter() <new_manual.formatter>` provides the necessary flexibility to construct the output. (:ref:`suffix() <decorators.suffix>` and :ref:`regex() <decorators.regex>` are not supported.)
+ * See :ref:`Summary <decorators.combinatorics>` / :ref:`Manual <new_manual.combinatorics>`
+
+
+
+============================================================================================================================================================
+8) drmaa support and multithreading:
+============================================================================================================================================================
+
+ * :ref:`ruffus.drmaa_wrapper.run_job() <new_manual.ruffus.drmaa_wrapper.run_job>` (:ref:`syntax <drmaa_wrapper.run_job>`)
+ * Optional helper module allows jobs to dispatch work to a computational cluster and wait until it completes.
+ * Requires ``multithread`` rather than ``multiprocess``
+
+============================================================================================================================================================
+9) ``pipeline_run(...)`` and exceptions
+============================================================================================================================================================
+ See :ref:`Manual <new_manual.exceptions>`
+
+ * Optionally terminate pipeline after first exception
+ * Display exceptions without delay
+
+
+============================================================================================================================================================
+10) Miscellaneous
+============================================================================================================================================================
+
+ Better error messages for ``formatter()``, ``suffix()`` and ``regex()`` for ``pipeline_printout(..., verbose >= 3, ...)``
+ * Error messages for showing mismatching regular expression and offending file name
+ * Wrong capture group names or out of range indices will raise informative Exception
+
+********************************************************************
+version 2.3
+********************************************************************
+ 1st September, 2013
+
+ * ``@active_if`` turns off tasks at runtime
+ The Design and initial implementation were contributed by Jacob Biesinger
+
+ Takes one or more parameters which can be either booleans or functions or callable objects which return True / False::
+
+ run_if_true_1 = True
+ run_if_true_2 = False
+
+ @active_if(run_if_true, lambda: run_if_true_2)
+ def this_task_might_be_inactive():
+ pass
+
+ The expressions inside @active_if are evaluated each time
+ ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+
+ Dormant tasks behave as if they are up to date and have no output.
+
+ * Command line parsing
+ * Supports both argparse (python 2.7) and optparse (python 2.6):
+ * ``Ruffus.cmdline`` module is optional.
+ * See :ref:`manual <new_manual.cmdline>`
+ * Optionally terminate pipeline after first exception
+ To have all exceptions interrupt immediately::
+
+ pipeline_run(..., exceptions_terminate_immediately = True)
+
+ By default ruffus accumulates ``NN`` errors before interrupting the pipeline prematurely. ``NN`` is the specified parallelism for ``pipeline_run(..., multiprocess = NN)``.
+
+ Otherwise, a pipeline will only be interrupted immediately if exceptions of type ``ruffus.JobSignalledBreak`` are thrown.
+
+ * Display exceptions without delay
+
+ By default, Ruffus re-throws exceptions in ensemble after pipeline termination.
+
+ To see exceptions as they occur::
+
+ pipeline_run(..., log_exceptions = True)
+
+ ``logger.error(...)`` will be invoked with the string representation of the each exception, and associated stack trace.
+
+ The default logger prints to sys.stderr, but this can be changed to any class from the logging module or compatible object via ``pipeline_run(..., logger = ???)``
+
+ * Improved ``pipeline_printout()``
+
+ * `@split` operations now show the 1->many output in pipeline_printout
+
+ This make it clearer that ``@split`` is creating multiple output parameters (rather than a single output parameter consisting of a list)::
+
+ Task = split_animals
+ Job = [None
+ -> cows
+ -> horses
+ -> pigs
+ , any_extra_parameters]
+ * File date and time are displayed in human readable form and out of date files are flagged with asterisks.
+
+
+
+********************************************************************
+version 2.2
+********************************************************************
+ 22nd July, 2010
+
+ * Simplifying **@transform** syntax with **suffix(...)**
+
+ Regular expressions within ruffus are very powerful, and can allow files to be moved
+ from one directory to another and renamed at will.
+
+ However, using consistent file extensions and
+ ``@transform(..., suffix(...))`` makes the code much simpler and easier to read.
+
+ Previously, ``suffix(...)`` did not cooperate well with ``inputs(...)``.
+ For example, finding the corresponding header file (".h") for the matching input
+ required a complicated ``regex(...)`` regular expression and ``input(...)``. This simple case,
+ e.g. matching "something.c" with "something.h", is now much easier in Ruffus.
+
+
+ For example:
+ ::
+
+ source_files = ["something.c", "more_code.c"]
+ @transform(source_files, suffix(".c"), add_inputs(r"\1.h", "common.h"), ".o")
+ def compile(input_files, output_file):
+ ( source_file,
+ header_file,
+ common_header) = input_files
+ # call compiler to make object file
+
+ This is equivalent to calling:
+
+ ::
+
+ compile(["something.c", "something.h", "common.h"], "something.o")
+ compile(["more_code.c", "more_code.h", "common.h"], "more_code.o")
+
+ The ``\1`` matches everything *but* the suffix and will be applied to both ``glob``\ s and file names.
+
+ For simplicity and compatibility with previous versions, there is always an implied r"\1" before
+ the output parameters. I.e. output parameters strings are *always* substituted.
+
+
+ * Tasks and glob in **inputs(...)** and **add_inputs(...)**
+
+ ``glob``\ s and tasks can be added as the prerequisites / input files using
+ ``inputs(...)`` and ``add_inputs(...)``. ``glob`` expansions will take place when the task
+ is run.
+
+ * Advanced form of **@split** with **regex**:
+
+ The standard ``@split`` divided one set of inputs into multiple outputs (the number of which
+ can be determined at runtime).
+
+ This is a ``one->many`` operation.
+
+
+ An advanced form of ``@split`` has been added which can split each of several files further.
+
+ In other words, this is a ``many->"many more"`` operation.
+
+ For example, given three starting files:
+ ::
+
+ original_files = ["original_0.file",
+ "original_1.file",
+ "original_2.file"]
+ We can split each into its own set of sub-sections:
+ ::
+
+ @split(original_files,
+ regex(r"starting_(\d+).fa"), # match starting files
+ r"files.split.\1.*.fa" # glob pattern
+ r"\1") # index of original file
+ def split_files(input_file, output_files, original_index):
+ """
+ Code to split each input_file
+ "original_0.file" -> "files.split.0.*.fa"
+ "original_1.file" -> "files.split.1.*.fa"
+ "original_2.file" -> "files.split.2.*.fa"
+ """
+
+
+ This is, conceptually, the reverse of the @collate(...) decorator
+
+ * Ruffus will complain about unescaped regular expression special characters:
+
+ Ruffus uses "\\1" and "\\2" in regular expression substitutions. Even seasoned python
+ users may not remember that these have to be 'escaped' in strings. The best option is
+ to use 'raw' python strings e.g.
+
+ ::
+
+ r"\1_substitutes\2correctly\3four\4times"
+
+ Ruffus will throw an exception if it sees an unescaped "\\1" or "\\2" in a file name,
+ which should catch most of these bugs.
+
+ * Prettier output from *pipeline_printout_graph*
+
+ Changed to nicer colours, symbols etc. for a more professional look.
+ @split and @merge tasks now look different from @transform.
+ Colours, size and resolution are now fully customisable::
+
+ pipeline_printout_graph( #...
+ user_colour_scheme = {
+ "colour_scheme_index":1,
+ "Task to run" : {"fillcolor":"blue"},
+ pipeline_name : "My flowchart",
+ size : (11,8),
+ dpi : 120)})
+
+ An SVG bug in firefox has been worked around so that font size are displayed correctly.
+
+
+
+
+********************************************************************
+version 2.1.1
+********************************************************************
+ * **@transform(.., add_inputs(...))**
+ ``add_inputs(...)`` allows the addition of extra input dependencies / parameters for each job.
+
+ Unlike ``inputs(...)``, the original input parameter is retained:
+ ::
+
+ from ruffus import *
+ @transform(["a.input", "b.input"], suffix(".input"), add_inputs("just.1.more","just.2.more"), ".output")
+ def task(i, o):
+ ""
+
+ Produces:
+ ::
+
+ Job = [[a.input, just.1.more, just.2.more] ->a.output]
+ Job = [[b.input, just.1.more, just.2.more] ->b.output]
+
+
+ Like ``inputs``, ``add_inputs`` accepts strings, tasks and ``glob`` s
+ This minor syntactic change promises add much clarity to Ruffus code.
+ ``add_inputs()`` is available for ``@transform``, ``@collate`` and ``@split``
+
+
+********************************************************************
+version 2.1.0
+********************************************************************
+ * **@jobs_limit**
+ Some tasks are resource intensive and too many jobs should not be run at the
+ same time. Examples include disk intensive operations such as unzipping, or
+ downloading from FTP sites.
+
+ Adding::
+
+ @jobs_limit(4)
+ @transform(new_data_list, suffix(".big_data.gz"), ".big_data")
+ def unzip(i, o):
+ "unzip code goes here"
+
+ would limit the unzip operation to 4 jobs at a time, even if the rest of the
+ pipeline runs highly in parallel.
+
+ (Thanks to Rob Young for suggesting this.)
+
+********************************************************************
+version 2.0.10
+********************************************************************
+ * **touch_files_only** option for **pipeline_run**
+
+ When the pipeline runs, task functions will not be run. Instead, the output files for
+ each job (in each task) will be ``touch``\ -ed if necessary.
+ This can be useful for simulating a pipeline run so that all files look as
+ if they are up-to-date.
+
+ Caveats:
+
+ * This may not work correctly where output files are only determined at runtime, e.g. with **@split**
+ * Only the output from pipelined jobs which are currently out-of-date will be ``touch``\ -ed.
+ In other words, the pipeline runs *as normal*, the only difference is that the
+ output files are ``touch``\ -ed instead of being created by the python task functions
+ which would otherwise have been called.
+
+ * Parameter substitution for **inputs(...)**
+
+ The **inputs(...)** parameter in **@transform**, **@collate** can now take tasks and ``glob`` s,
+ and these will be expanded appropriately (after regular expression replacement).
+
+ For example::
+
+ @transform("dir/a.input", regex(r"(.*)\/(.+).input"),
+ inputs((r"\1/\2.other", r"\1/*.more")), r"elsewhere/\2.output")
+ def task1(i, o):
+ """
+ Some pipeline task
+ """
+
+ Is equivalent to calling::
+
+ task1(("dir/a.other", "dir/1.more", "dir/2.more"), "elsewhere/a.output")
+
+ \
+
+ Here::
+
+ r"\1/*.more"
+
+ is first converted to::
+
+ r"dir/*.more"
+
+ which matches::
+
+ "dir/1.more"
+ "dir/2.more"
+
+
+********************************************************************
+version 2.0.9
+********************************************************************
+
+ * Better display of logging output
+ * Advanced form of **@split**
+ This is an experimental feature.
+
+ Hitherto, **@split** only takes 1 set of input (tasks/files/``glob`` s) and split these
+ into an indeterminate number of output.
+
+ This is a one->many operation.
+
+ Sometimes it is desirable to take multiple input files, and split each of them further.
+
+ This is a many->many (more) operation.
+
+ It is possible to hack something together using **@transform** but downstream tasks would not
+ aware that each job in **@transform** produces multiple outputs (rather than one input,
+ one output per job).
+
+ The syntax looks like::
+
+ @split(get_files, regex(r"(.+).original"), r"\1.*.split")
+ def split_files(i, o):
+ pass
+
+ If ``get_files()`` returned ``A.original``, ``B.original`` and ``C.original``,
+ ``split_files()`` might lead to the following operations::
+
+ A.original
+ -> A.1.original
+ -> A.2.original
+ -> A.3.original
+ B.original
+ -> B.1.original
+ -> B.2.original
+ C.original
+ -> C.1.original
+ -> C.2.original
+ -> C.3.original
+ -> C.4.original
+ -> C.5.original
+
+ Note that each input (``A/B/C.original``) can produce a number of output, the exact
+ number of which does not have to be pre-determined.
+ This is similar to **@split**
+
+ Tasks following ``split_files`` will have ten inputs corresponding to each of the
+ output from ``split_files``.
+
+ If **@transform** was used instead of **@split**, then tasks following ``split_files``
+ would only have 3 inputs.
+
+********************************************************************
+version 2.0.8
+********************************************************************
+
+ * File names can be in unicode
+ * File systems with 1 second timestamp granularity no longer cause problems.
+
+********************************************************************
+version 2.0.2
+********************************************************************
+
+ * Much prettier /useful output from :ref:`pipeline_printout <pipeline_functions.pipeline_printout>`
+ * New tutorial / manual
+
+
+
+********************************************************************
+version 2.0
+********************************************************************
+ * Revamped documentation:
+
+ * Rewritten tutorial
+ * Comprehensive manual
+ * New syntax help
+
+ * Major redesign. New decorators include
+
+ * :ref:`@split <new_manual.split>`
+ * :ref:`@transform <new_manual.transform>`
+ * :ref:`@merge <new_manual.merge>`
+ * :ref:`@collate <new_manual.collate>`
+
+ * Major redesign. Decorator *inputs* can mix
+
+ * Output from previous tasks
+ * |glob|_ patterns e.g. ``*.txt``
+ * Files names
+ * Any other data type
+
+********************************************************************
+version 1.1.4
+********************************************************************
+ Tasks can get their input by automatically chaining to the output from one or more parent tasks using :ref:`@files_re <decorators.files_re>`
+
+********************************************************************
+version 1.0.7
+********************************************************************
+ Added `proxy_logger` module for accessing a shared log across multiple jobs in different processes.
+
+********************************************************************
+version 1.0
+********************************************************************
+
+ Initial Release in Oxford
+
+########################################
+Fixed Bugs
+########################################
+
+ Full list at `"Latest Changes wiki entry" <http://code.google.com/p/ruffus/wiki/LatestChanges>`_
diff --git a/doc/images/bestiary_combinatorics.png b/doc/images/bestiary_combinatorics.png
new file mode 100644
index 0000000..da876ce
Binary files /dev/null and b/doc/images/bestiary_combinatorics.png differ
diff --git a/doc/images/bestiary_decorators.png b/doc/images/bestiary_decorators.png
new file mode 100644
index 0000000..a4bb53d
Binary files /dev/null and b/doc/images/bestiary_decorators.png differ
diff --git a/doc/images/bestiary_transform.png b/doc/images/bestiary_transform.png
new file mode 100644
index 0000000..b184bfc
Binary files /dev/null and b/doc/images/bestiary_transform.png differ
diff --git a/doc/images/colour_schemes.svg b/doc/images/colour_schemes.svg
new file mode 100644
index 0000000..b53b1e9
--- /dev/null
+++ b/doc/images/colour_schemes.svg
@@ -0,0 +1,799 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (lg) Leo Goodstadt -->
+<!-- Title: Colour schemes Pages: 1 -->
+<svg width="792pt" height="364pt"
+ viewBox="0.00 0.00 792.00 364.12" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.486785 0.486785) rotate(0) translate(4 744)">
+<title>Colour schemes</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-744 1623,-744 1623,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clusterkey0</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="8,-16 8,-732 230,-732 230,-16 8,-16"/>
+<text text-anchor="middle" x="119" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 0</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey1</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="238,-16 238,-732 460,-732 460,-16 238,-16"/>
+<text text-anchor="middle" x="349" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 1</text>
+</g>
+<g id="cluster4" class="cluster"><title>clusterkey2</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="468,-16 468,-732 690,-732 690,-16 468,-16"/>
+<text text-anchor="middle" x="579" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 2</text>
+</g>
+<g id="cluster5" class="cluster"><title>clusterkey3</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="698,-16 698,-732 920,-732 920,-16 698,-16"/>
+<text text-anchor="middle" x="809" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 3</text>
+</g>
+<g id="cluster6" class="cluster"><title>clusterkey4</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="928,-16 928,-732 1150,-732 1150,-16 928,-16"/>
+<text text-anchor="middle" x="1039" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 4</text>
+</g>
+<g id="cluster7" class="cluster"><title>clusterkey5</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1158,-16 1158,-732 1380,-732 1380,-16 1158,-16"/>
+<text text-anchor="middle" x="1269" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 5</text>
+</g>
+<g id="cluster8" class="cluster"><title>clusterkey6</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1388,-16 1388,-732 1611,-732 1611,-16 1388,-16"/>
+<text text-anchor="middle" x="1499" y="-700.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 6</text>
+</g>
+<!-- k1_0 -->
+<g id="node2" class="node"><title>k1_0</title>
+<polygon style="fill:#ff3232;stroke:white;" points="187,-681.5 55,-681.5 51,-677.5 51,-630.5 183,-630.5 187,-634.5 187,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="183,-677.5 51,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="183,-677.5 183,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="183,-677.5 187,-681.5 "/>
+<text text-anchor="middle" x="119" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_0 -->
+<g id="node3" class="node"><title>k2_0</title>
+<polygon style="fill:white;stroke:gray;" points="184,-608 58,-608 54,-604 54,-534 180,-534 184,-538 184,-608"/>
+<polyline style="fill:none;stroke:gray;" points="180,-604 54,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="180,-604 180,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="180,-604 184,-608 "/>
+<text text-anchor="middle" x="119" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="119" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_0->k2_0 -->
+<g id="edge3" class="edge"><title>k1_0->k2_0</title>
+<path style="fill:none;stroke:#ff3232;" d="M113,-630C112,-626 112,-622 112,-618"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="115.5,-618 112,-608 108.5,-618 115.5,-618"/>
+</g>
+<!-- k2_0->k1_0 -->
+<g id="edge5" class="edge"><title>k2_0->k1_0</title>
+<path style="fill:none;stroke:#ff3232;" d="M126,-608C126,-612 126,-616 126,-620"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="122.512,-619.701 125,-630 129.478,-620.398 122.512,-619.701"/>
+</g>
+<!-- k3_0 -->
+<g id="node6" class="node"><title>k3_0</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="195,-511.5 47,-511.5 43,-507.5 43,-460.5 191,-460.5 195,-464.5 195,-511.5"/>
+<polyline style="fill:none;stroke:#006000;" points="191,-507.5 43,-507.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="191,-507.5 191,-460.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="191,-507.5 195,-511.5 "/>
+<text text-anchor="middle" x="119" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date task</text>
+</g>
+<!-- k2_0->k3_0 -->
+<g id="edge7" class="edge"><title>k2_0->k3_0</title>
+<path style="fill:none;stroke:gray;" d="M119,-534C119,-530 119,-526 119,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="122.5,-522 119,-512 115.5,-522 122.5,-522"/>
+</g>
+<!-- k4_0 -->
+<g id="node8" class="node"><title>k4_0</title>
+<polygon style="fill:none;stroke:black;" points="206,-438 36,-438 32,-434 32,-364 202,-364 206,-368 206,-438"/>
+<polyline style="fill:none;stroke:black;" points="202,-434 32,-434 "/>
+<polyline style="fill:none;stroke:black;" points="202,-434 202,-364 "/>
+<polyline style="fill:none;stroke:black;" points="202,-434 206,-438 "/>
+<text text-anchor="middle" x="119" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="119" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_0->k4_0 -->
+<g id="edge9" class="edge"><title>k3_0->k4_0</title>
+<path style="fill:none;stroke:gray;" d="M119,-460C119,-456 119,-452 119,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="122.5,-448 119,-438 115.5,-448 122.5,-448"/>
+</g>
+<!-- k5_0 -->
+<g id="node10" class="node"><title>k5_0</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="177,-341.5 65,-341.5 61,-337.5 61,-290.5 173,-290.5 177,-294.5 177,-341.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="173,-337.5 61,-337.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="173,-337.5 173,-290.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="173,-337.5 177,-341.5 "/>
+<text text-anchor="middle" x="119" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Task to run</text>
+</g>
+<!-- k4_0->k5_0 -->
+<g id="edge11" class="edge"><title>k4_0->k5_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M119,-364C119,-360 119,-356 119,-352"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="122.5,-352 119,-342 115.5,-352 122.5,-352"/>
+</g>
+<!-- k6_0 -->
+<g id="node12" class="node"><title>k6_0</title>
+<polygon style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="195,-268 47,-268 43,-264 43,-194 191,-194 195,-198 195,-268"/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="191,-264 43,-264 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="191,-264 191,-194 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="191,-264 195,-268 "/>
+<text text-anchor="middle" x="119" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Up-to-date task</text>
+<text text-anchor="middle" x="119" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">forced to rerun</text>
+</g>
+<!-- k5_0->k6_0 -->
+<g id="edge13" class="edge"><title>k5_0->k6_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M119,-290C119,-286 119,-282 119,-278"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="122.5,-278 119,-268 115.5,-278 122.5,-278"/>
+</g>
+<!-- k7_0 -->
+<g id="node14" class="node"><title>k7_0</title>
+<polygon style="fill:#efa03b;stroke:#006000;" points="179,-172 63,-172 59,-168 59,-98 175,-98 179,-102 179,-172"/>
+<polyline style="fill:none;stroke:#006000;" points="175,-168 59,-168 "/>
+<polyline style="fill:none;stroke:#006000;" points="175,-168 175,-98 "/>
+<polyline style="fill:none;stroke:#006000;" points="175,-168 179,-172 "/>
+<text text-anchor="middle" x="119" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date</text>
+<text text-anchor="middle" x="119" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Final target</text>
+</g>
+<!-- k6_0->k7_0 -->
+<g id="edge15" class="edge"><title>k6_0->k7_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M119,-194C119,-190 119,-186 119,-182"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="122.5,-182 119,-172 115.5,-182 122.5,-182"/>
+</g>
+<!-- k8_0 -->
+<g id="node16" class="node"><title>k8_0</title>
+<polygon style="fill:#efa03b;stroke:black;" points="179,-75.5 63,-75.5 59,-71.5 59,-24.5 175,-24.5 179,-28.5 179,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="175,-71.5 59,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="175,-71.5 175,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="175,-71.5 179,-75.5 "/>
+<text text-anchor="middle" x="119" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_0->k8_0 -->
+<g id="edge17" class="edge"><title>k7_0->k8_0</title>
+<path style="fill:none;stroke:gray;" d="M119,-98C119,-94 119,-90 119,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="122.5,-86 119,-76 115.5,-86 122.5,-86"/>
+</g>
+<!-- k1_1 -->
+<g id="node19" class="node"><title>k1_1</title>
+<polygon style="fill:#d93611;stroke:white;" points="417,-681.5 285,-681.5 281,-677.5 281,-630.5 413,-630.5 417,-634.5 417,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="413,-677.5 281,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="413,-677.5 413,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="413,-677.5 417,-681.5 "/>
+<text text-anchor="middle" x="349" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_1 -->
+<g id="node20" class="node"><title>k2_1</title>
+<polygon style="fill:white;stroke:gray;" points="414,-608 288,-608 284,-604 284,-534 410,-534 414,-538 414,-608"/>
+<polyline style="fill:none;stroke:gray;" points="410,-604 284,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="410,-604 410,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="410,-604 414,-608 "/>
+<text text-anchor="middle" x="349" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="349" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge20" class="edge"><title>k1_1->k2_1</title>
+<path style="fill:none;stroke:#d93611;" d="M343,-630C342,-626 342,-622 342,-618"/>
+<polygon style="fill:#d93611;stroke:#d93611;" points="345.5,-618 342,-608 338.5,-618 345.5,-618"/>
+</g>
+<!-- k2_1->k1_1 -->
+<g id="edge22" class="edge"><title>k2_1->k1_1</title>
+<path style="fill:none;stroke:#d93611;" d="M356,-608C356,-612 356,-616 356,-620"/>
+<polygon style="fill:#d93611;stroke:#d93611;" points="352.512,-619.701 355,-630 359.478,-620.398 352.512,-619.701"/>
+</g>
+<!-- k3_1 -->
+<g id="node23" class="node"><title>k3_1</title>
+<polygon style="fill:#9ed983;stroke:#4b8c2e;" points="425,-511.5 277,-511.5 273,-507.5 273,-460.5 421,-460.5 425,-464.5 425,-511.5"/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="421,-507.5 273,-507.5 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="421,-507.5 421,-460.5 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="421,-507.5 425,-511.5 "/>
+<text text-anchor="middle" x="349" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4b8c2e;">Up-to-date task</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge24" class="edge"><title>k2_1->k3_1</title>
+<path style="fill:none;stroke:gray;" d="M349,-534C349,-530 349,-526 349,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="352.5,-522 349,-512 345.5,-522 352.5,-522"/>
+</g>
+<!-- k4_1 -->
+<g id="node25" class="node"><title>k4_1</title>
+<polygon style="fill:none;stroke:black;" points="436,-438 266,-438 262,-434 262,-364 432,-364 436,-368 436,-438"/>
+<polyline style="fill:none;stroke:black;" points="432,-434 262,-434 "/>
+<polyline style="fill:none;stroke:black;" points="432,-434 432,-364 "/>
+<polyline style="fill:none;stroke:black;" points="432,-434 436,-438 "/>
+<text text-anchor="middle" x="349" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="349" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge26" class="edge"><title>k3_1->k4_1</title>
+<path style="fill:none;stroke:gray;" d="M349,-460C349,-456 349,-452 349,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="352.5,-448 349,-438 345.5,-448 352.5,-448"/>
+</g>
+<!-- k5_1 -->
+<g id="node27" class="node"><title>k5_1</title>
+<polygon style="fill:none;stroke:#000ddf;" points="407,-341.5 295,-341.5 291,-337.5 291,-290.5 403,-290.5 407,-294.5 407,-341.5"/>
+<polyline style="fill:none;stroke:#000ddf;" points="403,-337.5 291,-337.5 "/>
+<polyline style="fill:none;stroke:#000ddf;" points="403,-337.5 403,-290.5 "/>
+<polyline style="fill:none;stroke:#000ddf;" points="403,-337.5 407,-341.5 "/>
+<text text-anchor="middle" x="349" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">Task to run</text>
+</g>
+<!-- k4_1->k5_1 -->
+<g id="edge28" class="edge"><title>k4_1->k5_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M349,-364C349,-360 349,-356 349,-352"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="352.5,-352 349,-342 345.5,-352 352.5,-352"/>
+</g>
+<!-- k6_1 -->
+<g id="node29" class="node"><title>k6_1</title>
+<polygon style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="425,-268 277,-268 273,-264 273,-194 421,-194 425,-198 425,-268"/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="421,-264 273,-264 "/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="421,-264 421,-194 "/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="421,-264 425,-268 "/>
+<text text-anchor="middle" x="349" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">Up-to-date task</text>
+<text text-anchor="middle" x="349" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">forced to rerun</text>
+</g>
+<!-- k5_1->k6_1 -->
+<g id="edge30" class="edge"><title>k5_1->k6_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M349,-290C349,-286 349,-282 349,-278"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="352.5,-278 349,-268 345.5,-278 352.5,-278"/>
+</g>
+<!-- k7_1 -->
+<g id="node31" class="node"><title>k7_1</title>
+<polygon style="fill:#d98100;stroke:#4b8c2e;" points="409,-172 293,-172 289,-168 289,-98 405,-98 409,-102 409,-172"/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="405,-168 289,-168 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="405,-168 405,-98 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="405,-168 409,-172 "/>
+<text text-anchor="middle" x="349" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b8c2e;">Up-to-date</text>
+<text text-anchor="middle" x="349" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b8c2e;">Final target</text>
+</g>
+<!-- k6_1->k7_1 -->
+<g id="edge32" class="edge"><title>k6_1->k7_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M349,-194C349,-190 349,-186 349,-182"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="352.5,-182 349,-172 345.5,-182 352.5,-182"/>
+</g>
+<!-- k8_1 -->
+<g id="node33" class="node"><title>k8_1</title>
+<polygon style="fill:#d98100;stroke:black;" points="409,-75.5 293,-75.5 289,-71.5 289,-24.5 405,-24.5 409,-28.5 409,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="405,-71.5 289,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="405,-71.5 405,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="405,-71.5 409,-75.5 "/>
+<text text-anchor="middle" x="349" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_1->k8_1 -->
+<g id="edge34" class="edge"><title>k7_1->k8_1</title>
+<path style="fill:none;stroke:gray;" d="M349,-98C349,-94 349,-90 349,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="352.5,-86 349,-76 345.5,-86 352.5,-86"/>
+</g>
+<!-- k1_2 -->
+<g id="node36" class="node"><title>k1_2</title>
+<polygon style="fill:#a54a64;stroke:white;" points="647,-681.5 515,-681.5 511,-677.5 511,-630.5 643,-630.5 647,-634.5 647,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="643,-677.5 511,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="643,-677.5 643,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="643,-677.5 647,-681.5 "/>
+<text text-anchor="middle" x="579" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_2 -->
+<g id="node37" class="node"><title>k2_2</title>
+<polygon style="fill:white;stroke:gray;" points="644,-608 518,-608 514,-604 514,-534 640,-534 644,-538 644,-608"/>
+<polyline style="fill:none;stroke:gray;" points="640,-604 514,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="640,-604 640,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="640,-604 644,-608 "/>
+<text text-anchor="middle" x="579" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="579" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_2->k2_2 -->
+<g id="edge37" class="edge"><title>k1_2->k2_2</title>
+<path style="fill:none;stroke:#a54a64;" d="M573,-630C572,-626 572,-622 572,-618"/>
+<polygon style="fill:#a54a64;stroke:#a54a64;" points="575.5,-618 572,-608 568.5,-618 575.5,-618"/>
+</g>
+<!-- k2_2->k1_2 -->
+<g id="edge39" class="edge"><title>k2_2->k1_2</title>
+<path style="fill:none;stroke:#a54a64;" d="M586,-608C586,-612 586,-616 586,-620"/>
+<polygon style="fill:#a54a64;stroke:#a54a64;" points="582.512,-619.701 585,-630 589.478,-620.398 582.512,-619.701"/>
+</g>
+<!-- k3_2 -->
+<g id="node40" class="node"><title>k3_2</title>
+<polygon style="fill:#99d1c1;stroke:#4a92a5;" points="655,-511.5 507,-511.5 503,-507.5 503,-460.5 651,-460.5 655,-464.5 655,-511.5"/>
+<polyline style="fill:none;stroke:#4a92a5;" points="651,-507.5 503,-507.5 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="651,-507.5 651,-460.5 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="651,-507.5 655,-511.5 "/>
+<text text-anchor="middle" x="579" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Up-to-date task</text>
+</g>
+<!-- k2_2->k3_2 -->
+<g id="edge41" class="edge"><title>k2_2->k3_2</title>
+<path style="fill:none;stroke:gray;" d="M579,-534C579,-530 579,-526 579,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="582.5,-522 579,-512 575.5,-522 582.5,-522"/>
+</g>
+<!-- k4_2 -->
+<g id="node42" class="node"><title>k4_2</title>
+<polygon style="fill:none;stroke:black;" points="666,-438 496,-438 492,-434 492,-364 662,-364 666,-368 666,-438"/>
+<polyline style="fill:none;stroke:black;" points="662,-434 492,-434 "/>
+<polyline style="fill:none;stroke:black;" points="662,-434 662,-364 "/>
+<polyline style="fill:none;stroke:black;" points="662,-434 666,-438 "/>
+<text text-anchor="middle" x="579" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="579" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_2->k4_2 -->
+<g id="edge43" class="edge"><title>k3_2->k4_2</title>
+<path style="fill:none;stroke:gray;" d="M579,-460C579,-456 579,-452 579,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="582.5,-448 579,-438 575.5,-448 582.5,-448"/>
+</g>
+<!-- k5_2 -->
+<g id="node44" class="node"><title>k5_2</title>
+<polygon style="fill:none;stroke:#4a64a5;" points="637,-341.5 525,-341.5 521,-337.5 521,-290.5 633,-290.5 637,-294.5 637,-341.5"/>
+<polyline style="fill:none;stroke:#4a64a5;" points="633,-337.5 521,-337.5 "/>
+<polyline style="fill:none;stroke:#4a64a5;" points="633,-337.5 633,-290.5 "/>
+<polyline style="fill:none;stroke:#4a64a5;" points="633,-337.5 637,-341.5 "/>
+<text text-anchor="middle" x="579" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">Task to run</text>
+</g>
+<!-- k4_2->k5_2 -->
+<g id="edge45" class="edge"><title>k4_2->k5_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M579,-364C579,-360 579,-356 579,-352"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="582.5,-352 579,-342 575.5,-352 582.5,-352"/>
+</g>
+<!-- k6_2 -->
+<g id="node46" class="node"><title>k6_2</title>
+<polygon style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="655,-268 507,-268 503,-264 503,-194 651,-194 655,-198 655,-268"/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="651,-264 503,-264 "/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="651,-264 651,-194 "/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="651,-264 655,-268 "/>
+<text text-anchor="middle" x="579" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">Up-to-date task</text>
+<text text-anchor="middle" x="579" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">forced to rerun</text>
+</g>
+<!-- k5_2->k6_2 -->
+<g id="edge47" class="edge"><title>k5_2->k6_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M579,-290C579,-286 579,-282 579,-278"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="582.5,-278 579,-268 575.5,-278 582.5,-278"/>
+</g>
+<!-- k7_2 -->
+<g id="node48" class="node"><title>k7_2</title>
+<polygon style="fill:#d2c24a;stroke:#4a92a5;" points="639,-172 523,-172 519,-168 519,-98 635,-98 639,-102 639,-172"/>
+<polyline style="fill:none;stroke:#4a92a5;" points="635,-168 519,-168 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="635,-168 635,-98 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="635,-168 639,-172 "/>
+<text text-anchor="middle" x="579" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Up-to-date</text>
+<text text-anchor="middle" x="579" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Final target</text>
+</g>
+<!-- k6_2->k7_2 -->
+<g id="edge49" class="edge"><title>k6_2->k7_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M579,-194C579,-190 579,-186 579,-182"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="582.5,-182 579,-172 575.5,-182 582.5,-182"/>
+</g>
+<!-- k8_2 -->
+<g id="node50" class="node"><title>k8_2</title>
+<polygon style="fill:#d2c24a;stroke:black;" points="639,-75.5 523,-75.5 519,-71.5 519,-24.5 635,-24.5 639,-28.5 639,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="635,-71.5 519,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="635,-71.5 635,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="635,-71.5 639,-75.5 "/>
+<text text-anchor="middle" x="579" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_2->k8_2 -->
+<g id="edge51" class="edge"><title>k7_2->k8_2</title>
+<path style="fill:none;stroke:gray;" d="M579,-98C579,-94 579,-90 579,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="582.5,-86 579,-76 575.5,-86 582.5,-86"/>
+</g>
+<!-- k1_3 -->
+<g id="node53" class="node"><title>k1_3</title>
+<polygon style="fill:#ff3e68;stroke:white;" points="877,-681.5 745,-681.5 741,-677.5 741,-630.5 873,-630.5 877,-634.5 877,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="873,-677.5 741,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="873,-677.5 873,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="873,-677.5 877,-681.5 "/>
+<text text-anchor="middle" x="809" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_3 -->
+<g id="node54" class="node"><title>k2_3</title>
+<polygon style="fill:white;stroke:gray;" points="874,-608 748,-608 744,-604 744,-534 870,-534 874,-538 874,-608"/>
+<polyline style="fill:none;stroke:gray;" points="870,-604 744,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="870,-604 870,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="870,-604 874,-608 "/>
+<text text-anchor="middle" x="809" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="809" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_3->k2_3 -->
+<g id="edge54" class="edge"><title>k1_3->k2_3</title>
+<path style="fill:none;stroke:#ff3e68;" d="M803,-630C802,-626 802,-622 802,-618"/>
+<polygon style="fill:#ff3e68;stroke:#ff3e68;" points="805.5,-618 802,-608 798.5,-618 805.5,-618"/>
+</g>
+<!-- k2_3->k1_3 -->
+<g id="edge56" class="edge"><title>k2_3->k1_3</title>
+<path style="fill:none;stroke:#ff3e68;" d="M816,-608C816,-612 816,-616 816,-620"/>
+<polygon style="fill:#ff3e68;stroke:#ff3e68;" points="812.512,-619.701 815,-630 819.478,-620.398 812.512,-619.701"/>
+</g>
+<!-- k3_3 -->
+<g id="node57" class="node"><title>k3_3</title>
+<polygon style="fill:#c9d787;stroke:#7d8a2e;" points="885,-511.5 737,-511.5 733,-507.5 733,-460.5 881,-460.5 885,-464.5 885,-511.5"/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="881,-507.5 733,-507.5 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="881,-507.5 881,-460.5 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="881,-507.5 885,-511.5 "/>
+<text text-anchor="middle" x="809" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Up-to-date task</text>
+</g>
+<!-- k2_3->k3_3 -->
+<g id="edge58" class="edge"><title>k2_3->k3_3</title>
+<path style="fill:none;stroke:gray;" d="M809,-534C809,-530 809,-526 809,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="812.5,-522 809,-512 805.5,-522 812.5,-522"/>
+</g>
+<!-- k4_3 -->
+<g id="node59" class="node"><title>k4_3</title>
+<polygon style="fill:none;stroke:black;" points="896,-438 726,-438 722,-434 722,-364 892,-364 896,-368 896,-438"/>
+<polyline style="fill:none;stroke:black;" points="892,-434 722,-434 "/>
+<polyline style="fill:none;stroke:black;" points="892,-434 892,-364 "/>
+<polyline style="fill:none;stroke:black;" points="892,-434 896,-438 "/>
+<text text-anchor="middle" x="809" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="809" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_3->k4_3 -->
+<g id="edge60" class="edge"><title>k3_3->k4_3</title>
+<path style="fill:none;stroke:gray;" d="M809,-460C809,-456 809,-452 809,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="812.5,-448 809,-438 805.5,-448 812.5,-448"/>
+</g>
+<!-- k5_3 -->
+<g id="node61" class="node"><title>k5_3</title>
+<polygon style="fill:none;stroke:#bfb5ff;" points="867,-341.5 755,-341.5 751,-337.5 751,-290.5 863,-290.5 867,-294.5 867,-341.5"/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="863,-337.5 751,-337.5 "/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="863,-337.5 863,-290.5 "/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="863,-337.5 867,-341.5 "/>
+<text text-anchor="middle" x="809" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">Task to run</text>
+</g>
+<!-- k4_3->k5_3 -->
+<g id="edge62" class="edge"><title>k4_3->k5_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M809,-364C809,-360 809,-356 809,-352"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="812.5,-352 809,-342 805.5,-352 812.5,-352"/>
+</g>
+<!-- k6_3 -->
+<g id="node63" class="node"><title>k6_3</title>
+<polygon style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="885,-268 737,-268 733,-264 733,-194 881,-194 885,-198 885,-268"/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="881,-264 733,-264 "/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="881,-264 881,-194 "/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="881,-264 885,-268 "/>
+<text text-anchor="middle" x="809" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">Up-to-date task</text>
+<text text-anchor="middle" x="809" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">forced to rerun</text>
+</g>
+<!-- k5_3->k6_3 -->
+<g id="edge64" class="edge"><title>k5_3->k6_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M809,-290C809,-286 809,-282 809,-278"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="812.5,-278 809,-268 805.5,-278 812.5,-278"/>
+</g>
+<!-- k7_3 -->
+<g id="node65" class="node"><title>k7_3</title>
+<polygon style="fill:#fff1dc;stroke:#7d8a2e;" points="869,-172 753,-172 749,-168 749,-98 865,-98 869,-102 869,-172"/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="865,-168 749,-168 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="865,-168 865,-98 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="865,-168 869,-172 "/>
+<text text-anchor="middle" x="809" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Up-to-date</text>
+<text text-anchor="middle" x="809" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Final target</text>
+</g>
+<!-- k6_3->k7_3 -->
+<g id="edge66" class="edge"><title>k6_3->k7_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M809,-194C809,-190 809,-186 809,-182"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="812.5,-182 809,-172 805.5,-182 812.5,-182"/>
+</g>
+<!-- k8_3 -->
+<g id="node67" class="node"><title>k8_3</title>
+<polygon style="fill:#fff1dc;stroke:black;" points="869,-75.5 753,-75.5 749,-71.5 749,-24.5 865,-24.5 869,-28.5 869,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="865,-71.5 749,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="865,-71.5 865,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="865,-71.5 869,-75.5 "/>
+<text text-anchor="middle" x="809" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_3->k8_3 -->
+<g id="edge68" class="edge"><title>k7_3->k8_3</title>
+<path style="fill:none;stroke:gray;" d="M809,-98C809,-94 809,-90 809,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="812.5,-86 809,-76 805.5,-86 812.5,-86"/>
+</g>
+<!-- k1_4 -->
+<g id="node70" class="node"><title>k1_4</title>
+<polygon style="fill:#f54f29;stroke:white;" points="1107,-681.5 975,-681.5 971,-677.5 971,-630.5 1103,-630.5 1107,-634.5 1107,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="1103,-677.5 971,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="1103,-677.5 1103,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="1103,-677.5 1107,-681.5 "/>
+<text text-anchor="middle" x="1039" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_4 -->
+<g id="node71" class="node"><title>k2_4</title>
+<polygon style="fill:white;stroke:gray;" points="1104,-608 978,-608 974,-604 974,-534 1100,-534 1104,-538 1104,-608"/>
+<polyline style="fill:none;stroke:gray;" points="1100,-604 974,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="1100,-604 1100,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="1100,-604 1104,-608 "/>
+<text text-anchor="middle" x="1039" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="1039" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_4->k2_4 -->
+<g id="edge71" class="edge"><title>k1_4->k2_4</title>
+<path style="fill:none;stroke:#f54f29;" d="M1033,-630C1032,-626 1032,-622 1032,-618"/>
+<polygon style="fill:#f54f29;stroke:#f54f29;" points="1035.5,-618 1032,-608 1028.5,-618 1035.5,-618"/>
+</g>
+<!-- k2_4->k1_4 -->
+<g id="edge73" class="edge"><title>k2_4->k1_4</title>
+<path style="fill:none;stroke:#f54f29;" d="M1046,-608C1046,-612 1046,-616 1046,-620"/>
+<polygon style="fill:#f54f29;stroke:#f54f29;" points="1042.51,-619.701 1045,-630 1049.48,-620.398 1042.51,-619.701"/>
+</g>
+<!-- k3_4 -->
+<g id="node74" class="node"><title>k3_4</title>
+<polygon style="fill:#b8cc6e;stroke:#4b6000;" points="1115,-511.5 967,-511.5 963,-507.5 963,-460.5 1111,-460.5 1115,-464.5 1115,-511.5"/>
+<polyline style="fill:none;stroke:#4b6000;" points="1111,-507.5 963,-507.5 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1111,-507.5 1111,-460.5 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1111,-507.5 1115,-511.5 "/>
+<text text-anchor="middle" x="1039" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Up-to-date task</text>
+</g>
+<!-- k2_4->k3_4 -->
+<g id="edge75" class="edge"><title>k2_4->k3_4</title>
+<path style="fill:none;stroke:gray;" d="M1039,-534C1039,-530 1039,-526 1039,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="1042.5,-522 1039,-512 1035.5,-522 1042.5,-522"/>
+</g>
+<!-- k4_4 -->
+<g id="node76" class="node"><title>k4_4</title>
+<polygon style="fill:none;stroke:black;" points="1126,-438 956,-438 952,-434 952,-364 1122,-364 1126,-368 1126,-438"/>
+<polyline style="fill:none;stroke:black;" points="1122,-434 952,-434 "/>
+<polyline style="fill:none;stroke:black;" points="1122,-434 1122,-364 "/>
+<polyline style="fill:none;stroke:black;" points="1122,-434 1126,-438 "/>
+<text text-anchor="middle" x="1039" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="1039" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_4->k4_4 -->
+<g id="edge77" class="edge"><title>k3_4->k4_4</title>
+<path style="fill:none;stroke:gray;" d="M1039,-460C1039,-456 1039,-452 1039,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="1042.5,-448 1039,-438 1035.5,-448 1042.5,-448"/>
+</g>
+<!-- k5_4 -->
+<g id="node78" class="node"><title>k5_4</title>
+<polygon style="fill:none;stroke:#004460;" points="1097,-341.5 985,-341.5 981,-337.5 981,-290.5 1093,-290.5 1097,-294.5 1097,-341.5"/>
+<polyline style="fill:none;stroke:#004460;" points="1093,-337.5 981,-337.5 "/>
+<polyline style="fill:none;stroke:#004460;" points="1093,-337.5 1093,-290.5 "/>
+<polyline style="fill:none;stroke:#004460;" points="1093,-337.5 1097,-341.5 "/>
+<text text-anchor="middle" x="1039" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">Task to run</text>
+</g>
+<!-- k4_4->k5_4 -->
+<g id="edge79" class="edge"><title>k4_4->k5_4</title>
+<path style="fill:none;stroke:#004460;" d="M1039,-364C1039,-360 1039,-356 1039,-352"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1042.5,-352 1039,-342 1035.5,-352 1042.5,-352"/>
+</g>
+<!-- k6_4 -->
+<g id="node80" class="node"><title>k6_4</title>
+<polygon style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1115,-268 967,-268 963,-264 963,-194 1111,-194 1115,-198 1115,-268"/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1111,-264 963,-264 "/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1111,-264 1111,-194 "/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1111,-264 1115,-268 "/>
+<text text-anchor="middle" x="1039" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">Up-to-date task</text>
+<text text-anchor="middle" x="1039" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">forced to rerun</text>
+</g>
+<!-- k5_4->k6_4 -->
+<g id="edge81" class="edge"><title>k5_4->k6_4</title>
+<path style="fill:none;stroke:#004460;" d="M1039,-290C1039,-286 1039,-282 1039,-278"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1042.5,-278 1039,-268 1035.5,-278 1042.5,-278"/>
+</g>
+<!-- k7_4 -->
+<g id="node82" class="node"><title>k7_4</title>
+<polygon style="fill:#fff0a3;stroke:#4b6000;" points="1099,-172 983,-172 979,-168 979,-98 1095,-98 1099,-102 1099,-172"/>
+<polyline style="fill:none;stroke:#4b6000;" points="1095,-168 979,-168 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1095,-168 1095,-98 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1095,-168 1099,-172 "/>
+<text text-anchor="middle" x="1039" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Up-to-date</text>
+<text text-anchor="middle" x="1039" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Final target</text>
+</g>
+<!-- k6_4->k7_4 -->
+<g id="edge83" class="edge"><title>k6_4->k7_4</title>
+<path style="fill:none;stroke:#004460;" d="M1039,-194C1039,-190 1039,-186 1039,-182"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1042.5,-182 1039,-172 1035.5,-182 1042.5,-182"/>
+</g>
+<!-- k8_4 -->
+<g id="node84" class="node"><title>k8_4</title>
+<polygon style="fill:#fff0a3;stroke:black;" points="1099,-75.5 983,-75.5 979,-71.5 979,-24.5 1095,-24.5 1099,-28.5 1099,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1095,-71.5 979,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1095,-71.5 1095,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1095,-71.5 1099,-75.5 "/>
+<text text-anchor="middle" x="1039" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_4->k8_4 -->
+<g id="edge85" class="edge"><title>k7_4->k8_4</title>
+<path style="fill:none;stroke:gray;" d="M1039,-98C1039,-94 1039,-90 1039,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1042.5,-86 1039,-76 1035.5,-86 1042.5,-86"/>
+</g>
+<!-- k1_5 -->
+<g id="node87" class="node"><title>k1_5</title>
+<polygon style="fill:#ff5555;stroke:white;" points="1337,-681.5 1205,-681.5 1201,-677.5 1201,-630.5 1333,-630.5 1337,-634.5 1337,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="1333,-677.5 1201,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="1333,-677.5 1333,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="1333,-677.5 1337,-681.5 "/>
+<text text-anchor="middle" x="1269" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_5 -->
+<g id="node88" class="node"><title>k2_5</title>
+<polygon style="fill:white;stroke:gray;" points="1334,-608 1208,-608 1204,-604 1204,-534 1330,-534 1334,-538 1334,-608"/>
+<polyline style="fill:none;stroke:gray;" points="1330,-604 1204,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="1330,-604 1330,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="1330,-604 1334,-608 "/>
+<text text-anchor="middle" x="1269" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="1269" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_5->k2_5 -->
+<g id="edge88" class="edge"><title>k1_5->k2_5</title>
+<path style="fill:none;stroke:#ff5555;" d="M1263,-630C1262,-626 1262,-622 1262,-618"/>
+<polygon style="fill:#ff5555;stroke:#ff5555;" points="1265.5,-618 1262,-608 1258.5,-618 1265.5,-618"/>
+</g>
+<!-- k2_5->k1_5 -->
+<g id="edge90" class="edge"><title>k2_5->k1_5</title>
+<path style="fill:none;stroke:#ff5555;" d="M1276,-608C1276,-612 1276,-616 1276,-620"/>
+<polygon style="fill:#ff5555;stroke:#ff5555;" points="1272.51,-619.701 1275,-630 1279.48,-620.398 1272.51,-619.701"/>
+</g>
+<!-- k3_5 -->
+<g id="node91" class="node"><title>k3_5</title>
+<polygon style="fill:#44ff44;stroke:#007700;" points="1345,-511.5 1197,-511.5 1193,-507.5 1193,-460.5 1341,-460.5 1345,-464.5 1345,-511.5"/>
+<polyline style="fill:none;stroke:#007700;" points="1341,-507.5 1193,-507.5 "/>
+<polyline style="fill:none;stroke:#007700;" points="1341,-507.5 1341,-460.5 "/>
+<polyline style="fill:none;stroke:#007700;" points="1341,-507.5 1345,-511.5 "/>
+<text text-anchor="middle" x="1269" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#007700;">Up-to-date task</text>
+</g>
+<!-- k2_5->k3_5 -->
+<g id="edge92" class="edge"><title>k2_5->k3_5</title>
+<path style="fill:none;stroke:gray;" d="M1269,-534C1269,-530 1269,-526 1269,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="1272.5,-522 1269,-512 1265.5,-522 1272.5,-522"/>
+</g>
+<!-- k4_5 -->
+<g id="node93" class="node"><title>k4_5</title>
+<polygon style="fill:none;stroke:black;" points="1356,-438 1186,-438 1182,-434 1182,-364 1352,-364 1356,-368 1356,-438"/>
+<polyline style="fill:none;stroke:black;" points="1352,-434 1182,-434 "/>
+<polyline style="fill:none;stroke:black;" points="1352,-434 1352,-364 "/>
+<polyline style="fill:none;stroke:black;" points="1352,-434 1356,-438 "/>
+<text text-anchor="middle" x="1269" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="1269" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_5->k4_5 -->
+<g id="edge94" class="edge"><title>k3_5->k4_5</title>
+<path style="fill:none;stroke:gray;" d="M1269,-460C1269,-456 1269,-452 1269,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="1272.5,-448 1269,-438 1265.5,-448 1272.5,-448"/>
+</g>
+<!-- k5_5 -->
+<g id="node95" class="node"><title>k5_5</title>
+<polygon style="fill:#aabbff;stroke:#1122ff;" points="1327,-341.5 1215,-341.5 1211,-337.5 1211,-290.5 1323,-290.5 1327,-294.5 1327,-341.5"/>
+<polyline style="fill:none;stroke:#1122ff;" points="1323,-337.5 1211,-337.5 "/>
+<polyline style="fill:none;stroke:#1122ff;" points="1323,-337.5 1323,-290.5 "/>
+<polyline style="fill:none;stroke:#1122ff;" points="1323,-337.5 1327,-341.5 "/>
+<text text-anchor="middle" x="1269" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">Task to run</text>
+</g>
+<!-- k4_5->k5_5 -->
+<g id="edge96" class="edge"><title>k4_5->k5_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1269,-364C1269,-360 1269,-356 1269,-352"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1272.5,-352 1269,-342 1265.5,-352 1272.5,-352"/>
+</g>
+<!-- k6_5 -->
+<g id="node97" class="node"><title>k6_5</title>
+<polygon style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1345,-268 1197,-268 1193,-264 1193,-194 1341,-194 1345,-198 1345,-268"/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1341,-264 1193,-264 "/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1341,-264 1341,-194 "/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1341,-264 1345,-268 "/>
+<text text-anchor="middle" x="1269" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">Up-to-date task</text>
+<text text-anchor="middle" x="1269" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">forced to rerun</text>
+</g>
+<!-- k5_5->k6_5 -->
+<g id="edge98" class="edge"><title>k5_5->k6_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1269,-290C1269,-286 1269,-282 1269,-278"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1272.5,-278 1269,-268 1265.5,-278 1272.5,-278"/>
+</g>
+<!-- k7_5 -->
+<g id="node99" class="node"><title>k7_5</title>
+<polygon style="fill:#ff883b;stroke:#007700;" points="1329,-172 1213,-172 1209,-168 1209,-98 1325,-98 1329,-102 1329,-172"/>
+<polyline style="fill:none;stroke:#007700;" points="1325,-168 1209,-168 "/>
+<polyline style="fill:none;stroke:#007700;" points="1325,-168 1325,-98 "/>
+<polyline style="fill:none;stroke:#007700;" points="1325,-168 1329,-172 "/>
+<text text-anchor="middle" x="1269" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#007700;">Up-to-date</text>
+<text text-anchor="middle" x="1269" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#007700;">Final target</text>
+</g>
+<!-- k6_5->k7_5 -->
+<g id="edge100" class="edge"><title>k6_5->k7_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1269,-194C1269,-190 1269,-186 1269,-182"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1272.5,-182 1269,-172 1265.5,-182 1272.5,-182"/>
+</g>
+<!-- k8_5 -->
+<g id="node101" class="node"><title>k8_5</title>
+<polygon style="fill:#ff883b;stroke:black;" points="1329,-75.5 1213,-75.5 1209,-71.5 1209,-24.5 1325,-24.5 1329,-28.5 1329,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1325,-71.5 1209,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1325,-71.5 1325,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1325,-71.5 1329,-75.5 "/>
+<text text-anchor="middle" x="1269" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_5->k8_5 -->
+<g id="edge102" class="edge"><title>k7_5->k8_5</title>
+<path style="fill:none;stroke:gray;" d="M1269,-98C1269,-94 1269,-90 1269,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1272.5,-86 1269,-76 1265.5,-86 1272.5,-86"/>
+</g>
+<!-- k1_6 -->
+<g id="node104" class="node"><title>k1_6</title>
+<polygon style="fill:#d3181f;stroke:white;" points="1567,-681.5 1435,-681.5 1431,-677.5 1431,-630.5 1563,-630.5 1567,-634.5 1567,-681.5"/>
+<polyline style="fill:none;stroke:white;" points="1563,-677.5 1431,-677.5 "/>
+<polyline style="fill:none;stroke:white;" points="1563,-677.5 1563,-630.5 "/>
+<polyline style="fill:none;stroke:white;" points="1563,-677.5 1567,-681.5 "/>
+<text text-anchor="middle" x="1499" y="-649.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_6 -->
+<g id="node105" class="node"><title>k2_6</title>
+<polygon style="fill:white;stroke:gray;" points="1564,-608 1438,-608 1434,-604 1434,-534 1560,-534 1564,-538 1564,-608"/>
+<polyline style="fill:none;stroke:gray;" points="1560,-604 1434,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="1560,-604 1560,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="1560,-604 1564,-608 "/>
+<text text-anchor="middle" x="1499" y="-575.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="1499" y="-552.892" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">down stream</text>
+</g>
+<!-- k1_6->k2_6 -->
+<g id="edge105" class="edge"><title>k1_6->k2_6</title>
+<path style="fill:none;stroke:#d3181f;" d="M1493,-630C1492,-626 1492,-622 1492,-618"/>
+<polygon style="fill:#d3181f;stroke:#d3181f;" points="1495.5,-618 1492,-608 1488.5,-618 1495.5,-618"/>
+</g>
+<!-- k2_6->k1_6 -->
+<g id="edge107" class="edge"><title>k2_6->k1_6</title>
+<path style="fill:none;stroke:#d3181f;" d="M1506,-608C1506,-612 1506,-616 1506,-620"/>
+<polygon style="fill:#d3181f;stroke:#d3181f;" points="1502.51,-619.701 1505,-630 1509.48,-620.398 1502.51,-619.701"/>
+</g>
+<!-- k3_6 -->
+<g id="node108" class="node"><title>k3_6</title>
+<polygon style="fill:#d3fae3;stroke:#87b379;" points="1575,-511.5 1427,-511.5 1423,-507.5 1423,-460.5 1571,-460.5 1575,-464.5 1575,-511.5"/>
+<polyline style="fill:none;stroke:#87b379;" points="1571,-507.5 1423,-507.5 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1571,-507.5 1571,-460.5 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1571,-507.5 1575,-511.5 "/>
+<text text-anchor="middle" x="1499" y="-479.392" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Up-to-date task</text>
+</g>
+<!-- k2_6->k3_6 -->
+<g id="edge109" class="edge"><title>k2_6->k3_6</title>
+<path style="fill:none;stroke:gray;" d="M1499,-534C1499,-530 1499,-526 1499,-522"/>
+<polygon style="fill:gray;stroke:gray;" points="1502.5,-522 1499,-512 1495.5,-522 1502.5,-522"/>
+</g>
+<!-- k4_6 -->
+<g id="node110" class="node"><title>k4_6</title>
+<polygon style="fill:none;stroke:black;" points="1586,-438 1416,-438 1412,-434 1412,-364 1582,-364 1586,-368 1586,-438"/>
+<polyline style="fill:none;stroke:black;" points="1582,-434 1412,-434 "/>
+<polyline style="fill:none;stroke:black;" points="1582,-434 1582,-364 "/>
+<polyline style="fill:none;stroke:black;" points="1582,-434 1586,-438 "/>
+<text text-anchor="middle" x="1499" y="-405.892" style="font-family:Times New Roman;font-size:20.0px;">Force pipeline run</text>
+<text text-anchor="middle" x="1499" y="-382.892" style="font-family:Times New Roman;font-size:20.0px;">from this task</text>
+</g>
+<!-- k3_6->k4_6 -->
+<g id="edge111" class="edge"><title>k3_6->k4_6</title>
+<path style="fill:none;stroke:gray;" d="M1499,-460C1499,-456 1499,-452 1499,-448"/>
+<polygon style="fill:gray;stroke:gray;" points="1502.5,-448 1499,-438 1495.5,-448 1502.5,-448"/>
+</g>
+<!-- k5_6 -->
+<g id="node112" class="node"><title>k5_6</title>
+<polygon style="fill:none;stroke:#87bae4;" points="1557,-341.5 1445,-341.5 1441,-337.5 1441,-290.5 1553,-290.5 1557,-294.5 1557,-341.5"/>
+<polyline style="fill:none;stroke:#87bae4;" points="1553,-337.5 1441,-337.5 "/>
+<polyline style="fill:none;stroke:#87bae4;" points="1553,-337.5 1553,-290.5 "/>
+<polyline style="fill:none;stroke:#87bae4;" points="1553,-337.5 1557,-341.5 "/>
+<text text-anchor="middle" x="1499" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">Task to run</text>
+</g>
+<!-- k4_6->k5_6 -->
+<g id="edge113" class="edge"><title>k4_6->k5_6</title>
+<path style="fill:none;stroke:#87bae4;" d="M1499,-364C1499,-360 1499,-356 1499,-352"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1502.5,-352 1499,-342 1495.5,-352 1502.5,-352"/>
+</g>
+<!-- k6_6 -->
+<g id="node114" class="node"><title>k6_6</title>
+<polygon style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1575,-268 1427,-268 1423,-264 1423,-194 1571,-194 1575,-198 1575,-268"/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1571,-264 1423,-264 "/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1571,-264 1571,-194 "/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1571,-264 1575,-268 "/>
+<text text-anchor="middle" x="1499" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">Up-to-date task</text>
+<text text-anchor="middle" x="1499" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">forced to rerun</text>
+</g>
+<!-- k5_6->k6_6 -->
+<g id="edge115" class="edge"><title>k5_6->k6_6</title>
+<path style="fill:none;stroke:#87bae4;" d="M1499,-290C1499,-286 1499,-282 1499,-278"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1502.5,-278 1499,-268 1495.5,-278 1502.5,-278"/>
+</g>
+<!-- k7_6 -->
+<g id="node116" class="node"><title>k7_6</title>
+<polygon style="fill:#fdba40;stroke:#87b379;" points="1559,-172 1443,-172 1439,-168 1439,-98 1555,-98 1559,-102 1559,-172"/>
+<polyline style="fill:none;stroke:#87b379;" points="1555,-168 1439,-168 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1555,-168 1555,-98 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1555,-168 1559,-172 "/>
+<text text-anchor="middle" x="1499" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Up-to-date</text>
+<text text-anchor="middle" x="1499" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Final target</text>
+</g>
+<!-- k6_6->k7_6 -->
+<g id="edge117" class="edge"><title>k6_6->k7_6</title>
+<path style="fill:none;stroke:#87bae4;" d="M1499,-194C1499,-190 1499,-186 1499,-182"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1502.5,-182 1499,-172 1495.5,-182 1502.5,-182"/>
+</g>
+<!-- k8_6 -->
+<g id="node118" class="node"><title>k8_6</title>
+<polygon style="fill:#fdba40;stroke:black;" points="1559,-75.5 1443,-75.5 1439,-71.5 1439,-24.5 1555,-24.5 1559,-28.5 1559,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1555,-71.5 1439,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1555,-71.5 1555,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1555,-71.5 1559,-75.5 "/>
+<text text-anchor="middle" x="1499" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_6->k8_6 -->
+<g id="edge119" class="edge"><title>k7_6->k8_6</title>
+<path style="fill:none;stroke:gray;" d="M1499,-98C1499,-94 1499,-90 1499,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1502.5,-86 1499,-76 1495.5,-86 1502.5,-86"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/complete.svg b/doc/images/complete.svg
new file mode 100644
index 0000000..03f31e2
--- /dev/null
+++ b/doc/images/complete.svg
@@ -0,0 +1,144 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Mar 30 10:09:11 UTC 2009)
+ For user: (lg) leo goodstadt -->
+<!-- Title: tree Pages: 1 -->
+<svg width="432pt" height="476pt"
+ viewBox="0.00 0.00 432.00 476.33" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.751304 0.751304) rotate(0) translate(4 630)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-630 571,-630 571,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-76 8,-609 151,-609 151,-76 8,-76"/>
+<text text-anchor="middle" x="80" y="-578" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="159,-16 159,-618 559,-618 559,-16 159,-16"/>
+<text text-anchor="middle" x="359" y="-587" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<text text-anchor="middle" x="79" y="-533" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task1</text>
+</g>
+<!-- t1 -->
+<g id="node3" class="node"><title>t1</title>
+<text text-anchor="middle" x="79" y="-408" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task2</text>
+</g>
+<!-- t0->t1 -->
+<g id="edge3" class="edge"><title>t0->t1</title>
+<path style="fill:none;stroke:blue;" d="M79,-521C79,-500 79,-466 79,-442"/>
+<polygon style="fill:blue;stroke:blue;" points="82.5001,-442 79,-432 75.5001,-442 82.5001,-442"/>
+</g>
+<!-- t2 -->
+<g id="node5" class="node"><title>t2</title>
+<text text-anchor="middle" x="79" y="-265" style="font-family:Times New Roman;font-size:20.00;fill:blue;">task3</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge5" class="edge"><title>t1->t2</title>
+<path style="fill:none;stroke:blue;" d="M79,-396C79,-371 79,-328 79,-299"/>
+<polygon style="fill:blue;stroke:blue;" points="82.5001,-299 79,-289 75.5001,-299 82.5001,-299"/>
+</g>
+<!-- t3 -->
+<g id="node7" class="node"><title>t3</title>
+<polygon style="fill:none;stroke:orange;" points="129.912,-105.627 129.912,-124.373 100.088,-137.627 57.9117,-137.627 28.0883,-124.373 28.0883,-105.627 57.9117,-92.3726 100.088,-92.3726 129.912,-105.627"/>
+<polygon style="fill:none;stroke:orange;" points="133.912,-103.028 133.912,-126.972 100.937,-141.627 57.0628,-141.627 24.0883,-126.972 24.0883,-103.028 57.0628,-88.3726 100.937,-88.3726 133.912,-103.028"/>
+<polygon style="fill:none;stroke:orange;" points="137.912,-100.428 137.912,-129.572 101.786,-145.627 56.214,-145.627 20.0883,-129.572 20.0883,-100.428 56.214,-84.3726 101.786,-84.3726 137.912,-100.428"/>
+<text text-anchor="middle" x="79" y="-109" style="font-family:Times New Roman;font-size:20.00;fill:orange;">task4</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge7" class="edge"><title>t2->t3</title>
+<path style="fill:none;stroke:blue;" d="M79,-253C79,-230 79,-188 79,-156"/>
+<polygon style="fill:blue;stroke:blue;" points="82.5001,-156 79,-146 75.5001,-156 82.5001,-156"/>
+</g>
+<!-- k1 -->
+<g id="node10" class="node"><title>k1</title>
+<polygon style="fill:none;stroke:orange;" points="431.125,-531.385 431.125,-546.615 388.875,-557.385 329.125,-557.385 286.875,-546.615 286.875,-531.385 329.125,-520.615 388.875,-520.615 431.125,-531.385"/>
+<polygon style="fill:none;stroke:orange;" points="435.125,-528.276 435.125,-549.724 389.377,-561.385 328.623,-561.385 282.875,-549.724 282.875,-528.276 328.623,-516.615 389.377,-516.615 435.125,-528.276"/>
+<polygon style="fill:none;stroke:orange;" points="439.125,-525.168 439.125,-552.832 389.879,-565.385 328.121,-565.385 278.875,-552.832 278.875,-525.168 328.121,-512.615 389.879,-512.615 439.125,-525.168"/>
+<text text-anchor="middle" x="359" y="-534.5" style="font-family:Times New Roman;font-size:15.00;fill:orange;">Final target</text>
+</g>
+<!-- k2 -->
+<g id="node11" class="node"><title>k2</title>
+<polygon style="fill:red;stroke:black;" points="415,-490 303,-490 303,-454 415,-454 415,-490"/>
+<text text-anchor="middle" x="359" y="-467.5" style="font-family:Times New Roman;font-size:15.00;">Vicious cycle</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge10" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:red;" d="M352,-513C352,-509 352,-504 352,-500"/>
+<polygon style="fill:red;stroke:red;" points="355.488,-500.299 353,-490 348.522,-499.602 355.488,-500.299"/>
+</g>
+<!-- k2->k1 -->
+<g id="edge12" class="edge"><title>k2->k1</title>
+<path style="fill:none;stroke:red;" d="M365,-490C366,-494 366,-498 366,-503"/>
+<polygon style="fill:red;stroke:red;" points="362.5,-503 366,-513 369.5,-503 362.5,-503"/>
+</g>
+<!-- k3 -->
+<g id="node12" class="node"><title>k3</title>
+<text text-anchor="middle" x="359" y="-409.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Task to run</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge14" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M359,-454C359,-450 359,-446 359,-442"/>
+<polygon style="fill:blue;stroke:blue;" points="362.5,-442 359,-432 355.5,-442 362.5,-442"/>
+</g>
+<!-- k4 -->
+<g id="node13" class="node"><title>k4</title>
+<polygon style="fill:none;stroke:blue;" points="542.848,-339.385 542.848,-354.615 435.152,-365.385 282.848,-365.385 175.152,-354.615 175.152,-339.385 282.848,-328.615 435.152,-328.615 542.848,-339.385"/>
+<polygon style="fill:none;stroke:blue;" points="546.848,-335.765 546.848,-358.235 435.352,-369.385 282.648,-369.385 171.152,-358.235 171.152,-335.765 282.648,-324.615 435.352,-324.615 546.848,-335.765"/>
+<polygon style="fill:none;stroke:blue;" points="550.848,-332.145 550.848,-361.855 435.551,-373.385 282.449,-373.385 167.152,-361.855 167.152,-332.145 282.449,-320.615 435.551,-320.615 550.848,-332.145"/>
+<text text-anchor="middle" x="359" y="-342.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Force pipeline run from this task</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge15" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M359,-396C359,-392 359,-388 359,-383"/>
+<polygon style="fill:blue;stroke:blue;" points="362.5,-383 359,-373 355.5,-383 362.5,-383"/>
+</g>
+<!-- k5 -->
+<g id="node14" class="node"><title>k5</title>
+<polygon style="fill:none;stroke:gray;" points="491.229,-263.385 491.229,-278.615 413.771,-289.385 304.229,-289.385 226.771,-278.615 226.771,-263.385 304.229,-252.615 413.771,-252.615 491.229,-263.385"/>
+<polygon style="fill:none;stroke:gray;" points="495.229,-259.902 495.229,-282.098 414.048,-293.385 303.952,-293.385 222.771,-282.098 222.771,-259.902 303.952,-248.615 414.048,-248.615 495.229,-259.902"/>
+<polygon style="fill:none;stroke:gray;" points="499.229,-256.42 499.229,-285.58 414.325,-297.385 303.675,-297.385 218.771,-285.58 218.771,-256.42 303.675,-244.615 414.325,-244.615 499.229,-256.42"/>
+<text text-anchor="middle" x="359" y="-266.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date Final target</text>
+</g>
+<!-- k4->k5 -->
+<g id="edge16" class="edge"><title>k4->k5</title>
+<path style="fill:none;stroke:blue;" d="M359,-320C359,-316 359,-312 359,-308"/>
+<polygon style="fill:blue;stroke:blue;" points="362.5,-308 359,-298 355.5,-308 362.5,-308"/>
+</g>
+<!-- k6 -->
+<g id="node15" class="node"><title>k6</title>
+<polygon style="fill:none;stroke:olivedrab;" points="535.07,-187.385 535.07,-202.615 431.93,-213.385 286.07,-213.385 182.93,-202.615 182.93,-187.385 286.07,-176.615 431.93,-176.615 535.07,-187.385"/>
+<polygon style="fill:none;stroke:olivedrab;" points="539.07,-183.781 539.07,-206.219 432.139,-217.385 285.861,-217.385 178.93,-206.219 178.93,-183.781 285.861,-172.615 432.139,-172.615 539.07,-183.781"/>
+<polygon style="fill:none;stroke:olivedrab;" points="543.07,-180.177 543.07,-209.823 432.347,-221.385 285.653,-221.385 174.93,-209.823 174.93,-180.177 285.653,-168.615 432.347,-168.615 543.07,-180.177"/>
+<text text-anchor="middle" x="359" y="-190.5" style="font-family:Times New Roman;font-size:15.00;fill:blue;">Up-to-date task forced to rerun</text>
+</g>
+<!-- k5->k6 -->
+<g id="edge18" class="edge"><title>k5->k6</title>
+<path style="fill:none;stroke:gray;" d="M359,-244C359,-240 359,-236 359,-232"/>
+<polygon style="fill:gray;stroke:gray;" points="362.5,-232 359,-222 355.5,-232 362.5,-232"/>
+</g>
+<!-- k7 -->
+<g id="node16" class="node"><title>k7</title>
+<polygon style="fill:olivedrab;stroke:olivedrab;" points="451.839,-107.385 451.839,-122.615 397.455,-133.385 320.545,-133.385 266.161,-122.615 266.161,-107.385 320.545,-96.6152 397.455,-96.6152 451.839,-107.385"/>
+<text text-anchor="middle" x="359" y="-110.5" style="font-family:Times New Roman;font-size:15.00;">Up-to-date task</text>
+</g>
+<!-- k6->k7 -->
+<g id="edge19" class="edge"><title>k6->k7</title>
+<path style="fill:none;stroke:gray;" d="M359,-168C359,-160 359,-152 359,-144"/>
+<polygon style="fill:gray;stroke:gray;" points="362.5,-144 359,-134 355.5,-144 362.5,-144"/>
+</g>
+<!-- k8 -->
+<g id="node17" class="node"><title>k8</title>
+<polygon style="fill:white;stroke:gray;" points="496.179,-35.3848 496.179,-50.6152 415.821,-61.3848 302.179,-61.3848 221.821,-50.6152 221.821,-35.3848 302.179,-24.6152 415.821,-24.6152 496.179,-35.3848"/>
+<text text-anchor="middle" x="359" y="-38.5" style="font-family:Times New Roman;font-size:15.00;fill:gray;">Up-to-date dependence</text>
+</g>
+<!-- k7->k8 -->
+<g id="edge20" class="edge"><title>k7->k8</title>
+<path style="fill:none;stroke:gray;" d="M359,-96C359,-89 359,-80 359,-72"/>
+<polygon style="fill:gray;stroke:gray;" points="362.5,-72 359,-62 355.5,-72 362.5,-72"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/complex_conceptual.jpg b/doc/images/complex_conceptual.jpg
new file mode 100644
index 0000000..d8ef33f
Binary files /dev/null and b/doc/images/complex_conceptual.jpg differ
diff --git a/doc/images/complex_file_dag.jpg b/doc/images/complex_file_dag.jpg
new file mode 100644
index 0000000..bd812ef
Binary files /dev/null and b/doc/images/complex_file_dag.jpg differ
diff --git a/doc/images/complex_ruffus.jpg b/doc/images/complex_ruffus.jpg
new file mode 100644
index 0000000..ea9302d
Binary files /dev/null and b/doc/images/complex_ruffus.jpg differ
diff --git a/doc/images/design.file_based_workflow.dot b/doc/images/design.file_based_workflow.dot
new file mode 100644
index 0000000..6bebf2e
--- /dev/null
+++ b/doc/images/design.file_based_workflow.dot
@@ -0,0 +1,59 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+mindist = 0.001;
+nodesep = 0.001;
+fontsize=50;
+#rankdir="LR";
+ranksep = 0.3;
+sep = 0.001;
+esep = 0.001;
+width = 0.001;
+packmode="node";
+subgraph clusterkey
+{
+fontsize=30;
+label = "File-based\nPipeline\nManagement:";
+node[fontsize=30];
+ss[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=13, label="starting_files"];
+a1[color=blue, fontcolor=blue, shape=none, fontsize=10, label="a.analysis1"];
+a2[color=blue, fontcolor=blue, shape=none, fontsize=10, label="a.analysis2"];
+a3[color=blue, fontcolor=blue, shape=none, fontsize=10, label="a.analysis3"];
+b1[color=blue, fontcolor=blue, shape=none, fontsize=10, label="b.analysis1"];
+b2[color=blue, fontcolor=blue, shape=none, fontsize=10, label="b.analysis2"];
+b3[color=blue, fontcolor=blue, shape=none, fontsize=10, label="b.analysis3"];
+c1[color=blue, fontcolor=blue, shape=none, fontsize=10, label="c.analysis1"];
+c2[color=blue, fontcolor=blue, shape=none, fontsize=10, label="c.analysis2"];
+c3[color=blue, fontcolor=blue, shape=none, fontsize=10, label="c.analysis3"];
+d1[color=blue, fontcolor=blue, shape=none, fontsize=10, label="d.analysis1"];
+d2[color=blue, fontcolor=blue, shape=none, fontsize=10, label="d.analysis2"];
+d3[color=blue, fontcolor=blue, shape=none, fontsize=10, label="d.analysis3"];
+e1[color=blue, fontcolor=blue, shape=none, fontsize=10, label="e.analysis1"];
+e2[color=blue, fontcolor=blue, shape=none, fontsize=10, label="e.analysis2"];
+e3[color=blue, fontcolor=blue, shape=none, fontsize=10, label="e.analysis3"];
+rr[color=red, fontcolor=red, shape=tripleoctagon, fontsize=13, label="results.summary"];
+
+ss->a1[color=blue];
+ss->b1[color=blue];
+ss->c1[color=blue];
+ss->d1[color=blue];
+ss->e1[color=blue];
+a1->a2[color=blue];
+b1->b2[color=blue];
+c1->c2[color=blue];
+d1->d2[color=blue];
+e1->e2[color=blue];
+a2->a3[color=blue];
+b2->b3[color=blue];
+c2->c3[color=blue];
+d2->d3[color=blue];
+e2->e3[color=blue];
+a3->rr[color=red];
+b3->rr[color=red];
+c3->rr[color=red];
+d3->rr[color=red];
+e3->rr[color=red];
+
+}
+}
diff --git a/doc/images/design.file_based_workflow.png b/doc/images/design.file_based_workflow.png
new file mode 100644
index 0000000..5538530
Binary files /dev/null and b/doc/images/design.file_based_workflow.png differ
diff --git a/doc/images/design.task_based_workflow.dot b/doc/images/design.task_based_workflow.dot
new file mode 100644
index 0000000..ce195a6
--- /dev/null
+++ b/doc/images/design.task_based_workflow.dot
@@ -0,0 +1,19 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=40;
+ranksep = 0.3;
+#rankdir="LR";
+subgraph clusterkey
+{
+fontsize=30;
+label = "Task-based\nPipeline\nManagement:";
+node[fontsize=40];
+k1[color=red, fontcolor=red, shape=tripleoctagon, fontsize=15, label="Summarise results"];
+k2[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Analysis 3"];
+k3[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Analysis 2"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Analysis 1"];
+k2->k1[color=red];k4->k3->k2[color=blue];
+}
+}
diff --git a/doc/images/design.task_based_workflow.png b/doc/images/design.task_based_workflow.png
new file mode 100644
index 0000000..b6dd626
Binary files /dev/null and b/doc/images/design.task_based_workflow.png differ
diff --git a/doc/images/examples_bioinformatics_before.jpg b/doc/images/examples_bioinformatics_before.jpg
new file mode 100644
index 0000000..29e0a0e
Binary files /dev/null and b/doc/images/examples_bioinformatics_before.jpg differ
diff --git a/doc/images/examples_bioinformatics_complete.jpg b/doc/images/examples_bioinformatics_complete.jpg
new file mode 100644
index 0000000..2bd3abb
Binary files /dev/null and b/doc/images/examples_bioinformatics_complete.jpg differ
diff --git a/doc/images/examples_bioinformatics_error.png b/doc/images/examples_bioinformatics_error.png
new file mode 100644
index 0000000..469905e
Binary files /dev/null and b/doc/images/examples_bioinformatics_error.png differ
diff --git a/doc/images/examples_bioinformatics_merge.jpg b/doc/images/examples_bioinformatics_merge.jpg
new file mode 100644
index 0000000..a83a17f
Binary files /dev/null and b/doc/images/examples_bioinformatics_merge.jpg differ
diff --git a/doc/images/examples_bioinformatics_pipeline.jpg b/doc/images/examples_bioinformatics_pipeline.jpg
new file mode 100644
index 0000000..fdd3839
Binary files /dev/null and b/doc/images/examples_bioinformatics_pipeline.jpg differ
diff --git a/doc/images/examples_bioinformatics_split.jpg b/doc/images/examples_bioinformatics_split.jpg
new file mode 100644
index 0000000..4a9c428
Binary files /dev/null and b/doc/images/examples_bioinformatics_split.jpg differ
diff --git a/doc/images/examples_bioinformatics_transform.jpg b/doc/images/examples_bioinformatics_transform.jpg
new file mode 100644
index 0000000..7a5aac5
Binary files /dev/null and b/doc/images/examples_bioinformatics_transform.jpg differ
diff --git a/doc/images/flowchart_colour_schemes.png b/doc/images/flowchart_colour_schemes.png
new file mode 100644
index 0000000..a576cf9
Binary files /dev/null and b/doc/images/flowchart_colour_schemes.png differ
diff --git a/doc/images/flowchart_colour_schemes.svg b/doc/images/flowchart_colour_schemes.svg
new file mode 100644
index 0000000..0b4d872
--- /dev/null
+++ b/doc/images/flowchart_colour_schemes.svg
@@ -0,0 +1,895 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (lg) Leo Goodstadt -->
+<!-- Title: Colour schemes Pages: 1 -->
+<svg width="792pt" height="283pt"
+ viewBox="0.00 0.00 792.00 283.32" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.402439 0.402439) rotate(0) translate(4 700)">
+<title>Colour schemes</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-700 1964,-700 1964,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clusterkey0</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="8,-16 8,-688 244,-688 244,-16 8,-16"/>
+<text text-anchor="middle" x="126" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 0</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey1</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="252,-16 252,-688 488,-688 488,-16 252,-16"/>
+<text text-anchor="middle" x="370" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 1</text>
+</g>
+<g id="cluster4" class="cluster"><title>clusterkey2</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="496,-16 496,-688 732,-688 732,-16 496,-16"/>
+<text text-anchor="middle" x="614" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 2</text>
+</g>
+<g id="cluster5" class="cluster"><title>clusterkey3</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="740,-16 740,-688 976,-688 976,-16 740,-16"/>
+<text text-anchor="middle" x="858" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 3</text>
+</g>
+<g id="cluster6" class="cluster"><title>clusterkey4</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="984,-16 984,-688 1220,-688 1220,-16 984,-16"/>
+<text text-anchor="middle" x="1102" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 4</text>
+</g>
+<g id="cluster7" class="cluster"><title>clusterkey5</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1228,-16 1228,-688 1464,-688 1464,-16 1228,-16"/>
+<text text-anchor="middle" x="1346" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 5</text>
+</g>
+<g id="cluster8" class="cluster"><title>clusterkey6</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1472,-16 1472,-688 1708,-688 1708,-16 1472,-16"/>
+<text text-anchor="middle" x="1590" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 6</text>
+</g>
+<g id="cluster9" class="cluster"><title>clusterkey7</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="1716,-16 1716,-688 1952,-688 1952,-16 1716,-16"/>
+<text text-anchor="middle" x="1834" y="-656.892" style="font-family:Times New Roman;font-size:30.0px;">Colour Scheme 7</text>
+</g>
+<!-- k1_0 -->
+<g id="node2" class="node"><title>k1_0</title>
+<polygon style="fill:#ff3232;stroke:white;" points="194,-637.5 62,-637.5 58,-633.5 58,-586.5 190,-586.5 194,-590.5 194,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="190,-633.5 58,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="190,-633.5 190,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="190,-633.5 194,-637.5 "/>
+<text text-anchor="middle" x="126" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_0 -->
+<g id="node3" class="node"><title>k2_0</title>
+<polygon style="fill:white;stroke:gray;" points="193,-563.5 63,-563.5 59,-559.5 59,-512.5 189,-512.5 193,-516.5 193,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="189,-559.5 59,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="189,-559.5 189,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="189,-559.5 193,-563.5 "/>
+<text text-anchor="middle" x="126" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_0->k2_0 -->
+<g id="edge3" class="edge"><title>k1_0->k2_0</title>
+<path style="fill:none;stroke:#ff3232;" d="M120,-586C119,-582 119,-578 119,-574"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="122.488,-574.299 120,-564 115.522,-573.602 122.488,-574.299"/>
+</g>
+<!-- k2_0->k1_0 -->
+<g id="edge5" class="edge"><title>k2_0->k1_0</title>
+<path style="fill:none;stroke:#ff3232;" d="M132,-564C133,-568 133,-572 133,-576"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="129.512,-575.701 132,-586 136.478,-576.398 129.512,-575.701"/>
+</g>
+<!-- k3_0 -->
+<g id="node6" class="node"><title>k3_0</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="202,-489.5 54,-489.5 50,-485.5 50,-438.5 198,-438.5 202,-442.5 202,-489.5"/>
+<polyline style="fill:none;stroke:#006000;" points="198,-485.5 50,-485.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="198,-485.5 198,-438.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="198,-485.5 202,-489.5 "/>
+<text text-anchor="middle" x="126" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date task</text>
+</g>
+<!-- k2_0->k3_0 -->
+<g id="edge7" class="edge"><title>k2_0->k3_0</title>
+<path style="fill:none;stroke:gray;" d="M126,-512C126,-508 126,-504 126,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="129.5,-500 126,-490 122.5,-500 129.5,-500"/>
+</g>
+<!-- k4_0 -->
+<g id="node8" class="node"><title>k4_0</title>
+<polygon style="fill:none;stroke:black;" points="236,-415.5 20,-415.5 16,-411.5 16,-364.5 232,-364.5 236,-368.5 236,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="232,-411.5 16,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="232,-411.5 232,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="232,-411.5 236,-415.5 "/>
+<text text-anchor="middle" x="126" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_0->k4_0 -->
+<g id="edge9" class="edge"><title>k3_0->k4_0</title>
+<path style="fill:none;stroke:gray;" d="M126,-438C126,-434 126,-430 126,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="129.5,-426 126,-416 122.5,-426 129.5,-426"/>
+</g>
+<!-- k5_0 -->
+<g id="node10" class="node"><title>k5_0</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="184,-341.5 72,-341.5 68,-337.5 68,-290.5 180,-290.5 184,-294.5 184,-341.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="180,-337.5 68,-337.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="180,-337.5 180,-290.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="180,-337.5 184,-341.5 "/>
+<text text-anchor="middle" x="126" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Task to run</text>
+</g>
+<!-- k4_0->k5_0 -->
+<g id="edge11" class="edge"><title>k4_0->k5_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M126,-364C126,-360 126,-356 126,-352"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="129.5,-352 126,-342 122.5,-352 129.5,-352"/>
+</g>
+<!-- k6_0 -->
+<g id="node12" class="node"><title>k6_0</title>
+<polygon style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="202,-268 54,-268 50,-264 50,-194 198,-194 202,-198 202,-268"/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="198,-264 50,-264 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="198,-264 198,-194 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="198,-264 202,-268 "/>
+<text text-anchor="middle" x="126" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Up-to-date task</text>
+<text text-anchor="middle" x="126" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">forced to rerun</text>
+</g>
+<!-- k5_0->k6_0 -->
+<g id="edge13" class="edge"><title>k5_0->k6_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M126,-290C126,-286 126,-282 126,-278"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="129.5,-278 126,-268 122.5,-278 129.5,-278"/>
+</g>
+<!-- k7_0 -->
+<g id="node14" class="node"><title>k7_0</title>
+<polygon style="fill:#efa03b;stroke:#006000;" points="186,-172 70,-172 66,-168 66,-98 182,-98 186,-102 186,-172"/>
+<polyline style="fill:none;stroke:#006000;" points="182,-168 66,-168 "/>
+<polyline style="fill:none;stroke:#006000;" points="182,-168 182,-98 "/>
+<polyline style="fill:none;stroke:#006000;" points="182,-168 186,-172 "/>
+<text text-anchor="middle" x="126" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date</text>
+<text text-anchor="middle" x="126" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Final target</text>
+</g>
+<!-- k6_0->k7_0 -->
+<g id="edge15" class="edge"><title>k6_0->k7_0</title>
+<path style="fill:none;stroke:#0044a0;" d="M126,-194C126,-190 126,-186 126,-182"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="129.5,-182 126,-172 122.5,-182 129.5,-182"/>
+</g>
+<!-- k8_0 -->
+<g id="node16" class="node"><title>k8_0</title>
+<polygon style="fill:#efa03b;stroke:black;" points="186,-75.5 70,-75.5 66,-71.5 66,-24.5 182,-24.5 186,-28.5 186,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="182,-71.5 66,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="182,-71.5 182,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="182,-71.5 186,-75.5 "/>
+<text text-anchor="middle" x="126" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_0->k8_0 -->
+<g id="edge17" class="edge"><title>k7_0->k8_0</title>
+<path style="fill:none;stroke:gray;" d="M126,-98C126,-94 126,-90 126,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="129.5,-86 126,-76 122.5,-86 129.5,-86"/>
+</g>
+<!-- k1_1 -->
+<g id="node19" class="node"><title>k1_1</title>
+<polygon style="fill:#d93611;stroke:white;" points="438,-637.5 306,-637.5 302,-633.5 302,-586.5 434,-586.5 438,-590.5 438,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="434,-633.5 302,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="434,-633.5 434,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="434,-633.5 438,-637.5 "/>
+<text text-anchor="middle" x="370" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_1 -->
+<g id="node20" class="node"><title>k2_1</title>
+<polygon style="fill:white;stroke:gray;" points="437,-563.5 307,-563.5 303,-559.5 303,-512.5 433,-512.5 437,-516.5 437,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="433,-559.5 303,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="433,-559.5 433,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="433,-559.5 437,-563.5 "/>
+<text text-anchor="middle" x="370" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge20" class="edge"><title>k1_1->k2_1</title>
+<path style="fill:none;stroke:#d93611;" d="M364,-586C363,-582 363,-578 363,-574"/>
+<polygon style="fill:#d93611;stroke:#d93611;" points="366.488,-574.299 364,-564 359.522,-573.602 366.488,-574.299"/>
+</g>
+<!-- k2_1->k1_1 -->
+<g id="edge22" class="edge"><title>k2_1->k1_1</title>
+<path style="fill:none;stroke:#d93611;" d="M376,-564C377,-568 377,-572 377,-576"/>
+<polygon style="fill:#d93611;stroke:#d93611;" points="373.512,-575.701 376,-586 380.478,-576.398 373.512,-575.701"/>
+</g>
+<!-- k3_1 -->
+<g id="node23" class="node"><title>k3_1</title>
+<polygon style="fill:#9ed983;stroke:#4b8c2e;" points="446,-489.5 298,-489.5 294,-485.5 294,-438.5 442,-438.5 446,-442.5 446,-489.5"/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="442,-485.5 294,-485.5 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="442,-485.5 442,-438.5 "/>
+<polyline style="fill:none;stroke:#4b8c2e;" points="442,-485.5 446,-489.5 "/>
+<text text-anchor="middle" x="370" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4b8c2e;">Up-to-date task</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge24" class="edge"><title>k2_1->k3_1</title>
+<path style="fill:none;stroke:gray;" d="M370,-512C370,-508 370,-504 370,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="373.5,-500 370,-490 366.5,-500 373.5,-500"/>
+</g>
+<!-- k4_1 -->
+<g id="node25" class="node"><title>k4_1</title>
+<polygon style="fill:none;stroke:black;" points="480,-415.5 264,-415.5 260,-411.5 260,-364.5 476,-364.5 480,-368.5 480,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="476,-411.5 260,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="476,-411.5 476,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="476,-411.5 480,-415.5 "/>
+<text text-anchor="middle" x="370" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge26" class="edge"><title>k3_1->k4_1</title>
+<path style="fill:none;stroke:gray;" d="M370,-438C370,-434 370,-430 370,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="373.5,-426 370,-416 366.5,-426 373.5,-426"/>
+</g>
+<!-- k5_1 -->
+<g id="node27" class="node"><title>k5_1</title>
+<polygon style="fill:none;stroke:#000ddf;" points="428,-341.5 316,-341.5 312,-337.5 312,-290.5 424,-290.5 428,-294.5 428,-341.5"/>
+<polyline style="fill:none;stroke:#000ddf;" points="424,-337.5 312,-337.5 "/>
+<polyline style="fill:none;stroke:#000ddf;" points="424,-337.5 424,-290.5 "/>
+<polyline style="fill:none;stroke:#000ddf;" points="424,-337.5 428,-341.5 "/>
+<text text-anchor="middle" x="370" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">Task to run</text>
+</g>
+<!-- k4_1->k5_1 -->
+<g id="edge28" class="edge"><title>k4_1->k5_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M370,-364C370,-360 370,-356 370,-352"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="373.5,-352 370,-342 366.5,-352 373.5,-352"/>
+</g>
+<!-- k6_1 -->
+<g id="node29" class="node"><title>k6_1</title>
+<polygon style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="446,-268 298,-268 294,-264 294,-194 442,-194 446,-198 446,-268"/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="442,-264 294,-264 "/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="442,-264 442,-194 "/>
+<polyline style="fill:none;stroke:#000ddf;stroke-dasharray:5,2;" points="442,-264 446,-268 "/>
+<text text-anchor="middle" x="370" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">Up-to-date task</text>
+<text text-anchor="middle" x="370" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#000ddf;">forced to rerun</text>
+</g>
+<!-- k5_1->k6_1 -->
+<g id="edge30" class="edge"><title>k5_1->k6_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M370,-290C370,-286 370,-282 370,-278"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="373.5,-278 370,-268 366.5,-278 373.5,-278"/>
+</g>
+<!-- k7_1 -->
+<g id="node31" class="node"><title>k7_1</title>
+<polygon style="fill:#d98100;stroke:#d9d911;" points="430,-172 314,-172 310,-168 310,-98 426,-98 430,-102 430,-172"/>
+<polyline style="fill:none;stroke:#d9d911;" points="426,-168 310,-168 "/>
+<polyline style="fill:none;stroke:#d9d911;" points="426,-168 426,-98 "/>
+<polyline style="fill:none;stroke:#d9d911;" points="426,-168 430,-172 "/>
+<text text-anchor="middle" x="370" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#d9d911;">Up-to-date</text>
+<text text-anchor="middle" x="370" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#d9d911;">Final target</text>
+</g>
+<!-- k6_1->k7_1 -->
+<g id="edge32" class="edge"><title>k6_1->k7_1</title>
+<path style="fill:none;stroke:#000ddf;" d="M370,-194C370,-190 370,-186 370,-182"/>
+<polygon style="fill:#000ddf;stroke:#000ddf;" points="373.5,-182 370,-172 366.5,-182 373.5,-182"/>
+</g>
+<!-- k8_1 -->
+<g id="node33" class="node"><title>k8_1</title>
+<polygon style="fill:#d98100;stroke:black;" points="430,-75.5 314,-75.5 310,-71.5 310,-24.5 426,-24.5 430,-28.5 430,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="426,-71.5 310,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="426,-71.5 426,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="426,-71.5 430,-75.5 "/>
+<text text-anchor="middle" x="370" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_1->k8_1 -->
+<g id="edge34" class="edge"><title>k7_1->k8_1</title>
+<path style="fill:none;stroke:gray;" d="M370,-98C370,-94 370,-90 370,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="373.5,-86 370,-76 366.5,-86 373.5,-86"/>
+</g>
+<!-- k1_2 -->
+<g id="node36" class="node"><title>k1_2</title>
+<polygon style="fill:#a54a64;stroke:white;" points="682,-637.5 550,-637.5 546,-633.5 546,-586.5 678,-586.5 682,-590.5 682,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="678,-633.5 546,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="678,-633.5 678,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="678,-633.5 682,-637.5 "/>
+<text text-anchor="middle" x="614" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_2 -->
+<g id="node37" class="node"><title>k2_2</title>
+<polygon style="fill:white;stroke:gray;" points="681,-563.5 551,-563.5 547,-559.5 547,-512.5 677,-512.5 681,-516.5 681,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="677,-559.5 547,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="677,-559.5 677,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="677,-559.5 681,-563.5 "/>
+<text text-anchor="middle" x="614" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_2->k2_2 -->
+<g id="edge37" class="edge"><title>k1_2->k2_2</title>
+<path style="fill:none;stroke:#a54a64;" d="M608,-586C607,-582 607,-578 607,-574"/>
+<polygon style="fill:#a54a64;stroke:#a54a64;" points="610.488,-574.299 608,-564 603.522,-573.602 610.488,-574.299"/>
+</g>
+<!-- k2_2->k1_2 -->
+<g id="edge39" class="edge"><title>k2_2->k1_2</title>
+<path style="fill:none;stroke:#a54a64;" d="M620,-564C621,-568 621,-572 621,-576"/>
+<polygon style="fill:#a54a64;stroke:#a54a64;" points="617.512,-575.701 620,-586 624.478,-576.398 617.512,-575.701"/>
+</g>
+<!-- k3_2 -->
+<g id="node40" class="node"><title>k3_2</title>
+<polygon style="fill:#99d1c1;stroke:#4a92a5;" points="690,-489.5 542,-489.5 538,-485.5 538,-438.5 686,-438.5 690,-442.5 690,-489.5"/>
+<polyline style="fill:none;stroke:#4a92a5;" points="686,-485.5 538,-485.5 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="686,-485.5 686,-438.5 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="686,-485.5 690,-489.5 "/>
+<text text-anchor="middle" x="614" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Up-to-date task</text>
+</g>
+<!-- k2_2->k3_2 -->
+<g id="edge41" class="edge"><title>k2_2->k3_2</title>
+<path style="fill:none;stroke:gray;" d="M614,-512C614,-508 614,-504 614,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="617.5,-500 614,-490 610.5,-500 617.5,-500"/>
+</g>
+<!-- k4_2 -->
+<g id="node42" class="node"><title>k4_2</title>
+<polygon style="fill:none;stroke:black;" points="724,-415.5 508,-415.5 504,-411.5 504,-364.5 720,-364.5 724,-368.5 724,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="720,-411.5 504,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="720,-411.5 720,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="720,-411.5 724,-415.5 "/>
+<text text-anchor="middle" x="614" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_2->k4_2 -->
+<g id="edge43" class="edge"><title>k3_2->k4_2</title>
+<path style="fill:none;stroke:gray;" d="M614,-438C614,-434 614,-430 614,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="617.5,-426 614,-416 610.5,-426 617.5,-426"/>
+</g>
+<!-- k5_2 -->
+<g id="node44" class="node"><title>k5_2</title>
+<polygon style="fill:none;stroke:#4a64a5;" points="672,-341.5 560,-341.5 556,-337.5 556,-290.5 668,-290.5 672,-294.5 672,-341.5"/>
+<polyline style="fill:none;stroke:#4a64a5;" points="668,-337.5 556,-337.5 "/>
+<polyline style="fill:none;stroke:#4a64a5;" points="668,-337.5 668,-290.5 "/>
+<polyline style="fill:none;stroke:#4a64a5;" points="668,-337.5 672,-341.5 "/>
+<text text-anchor="middle" x="614" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">Task to run</text>
+</g>
+<!-- k4_2->k5_2 -->
+<g id="edge45" class="edge"><title>k4_2->k5_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M614,-364C614,-360 614,-356 614,-352"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="617.5,-352 614,-342 610.5,-352 617.5,-352"/>
+</g>
+<!-- k6_2 -->
+<g id="node46" class="node"><title>k6_2</title>
+<polygon style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="690,-268 542,-268 538,-264 538,-194 686,-194 690,-198 690,-268"/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="686,-264 538,-264 "/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="686,-264 686,-194 "/>
+<polyline style="fill:none;stroke:#4a64a5;stroke-dasharray:5,2;" points="686,-264 690,-268 "/>
+<text text-anchor="middle" x="614" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">Up-to-date task</text>
+<text text-anchor="middle" x="614" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a64a5;">forced to rerun</text>
+</g>
+<!-- k5_2->k6_2 -->
+<g id="edge47" class="edge"><title>k5_2->k6_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M614,-290C614,-286 614,-282 614,-278"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="617.5,-278 614,-268 610.5,-278 617.5,-278"/>
+</g>
+<!-- k7_2 -->
+<g id="node48" class="node"><title>k7_2</title>
+<polygon style="fill:#d2c24a;stroke:#4a92a5;" points="674,-172 558,-172 554,-168 554,-98 670,-98 674,-102 674,-172"/>
+<polyline style="fill:none;stroke:#4a92a5;" points="670,-168 554,-168 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="670,-168 670,-98 "/>
+<polyline style="fill:none;stroke:#4a92a5;" points="670,-168 674,-172 "/>
+<text text-anchor="middle" x="614" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Up-to-date</text>
+<text text-anchor="middle" x="614" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4a92a5;">Final target</text>
+</g>
+<!-- k6_2->k7_2 -->
+<g id="edge49" class="edge"><title>k6_2->k7_2</title>
+<path style="fill:none;stroke:#4a64a5;" d="M614,-194C614,-190 614,-186 614,-182"/>
+<polygon style="fill:#4a64a5;stroke:#4a64a5;" points="617.5,-182 614,-172 610.5,-182 617.5,-182"/>
+</g>
+<!-- k8_2 -->
+<g id="node50" class="node"><title>k8_2</title>
+<polygon style="fill:#d2c24a;stroke:black;" points="674,-75.5 558,-75.5 554,-71.5 554,-24.5 670,-24.5 674,-28.5 674,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="670,-71.5 554,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="670,-71.5 670,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="670,-71.5 674,-75.5 "/>
+<text text-anchor="middle" x="614" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_2->k8_2 -->
+<g id="edge51" class="edge"><title>k7_2->k8_2</title>
+<path style="fill:none;stroke:gray;" d="M614,-98C614,-94 614,-90 614,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="617.5,-86 614,-76 610.5,-86 617.5,-86"/>
+</g>
+<!-- k1_3 -->
+<g id="node53" class="node"><title>k1_3</title>
+<polygon style="fill:#ff3e68;stroke:white;" points="926,-637.5 794,-637.5 790,-633.5 790,-586.5 922,-586.5 926,-590.5 926,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="922,-633.5 790,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="922,-633.5 922,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="922,-633.5 926,-637.5 "/>
+<text text-anchor="middle" x="858" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_3 -->
+<g id="node54" class="node"><title>k2_3</title>
+<polygon style="fill:white;stroke:gray;" points="925,-563.5 795,-563.5 791,-559.5 791,-512.5 921,-512.5 925,-516.5 925,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="921,-559.5 791,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="921,-559.5 921,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="921,-559.5 925,-563.5 "/>
+<text text-anchor="middle" x="858" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_3->k2_3 -->
+<g id="edge54" class="edge"><title>k1_3->k2_3</title>
+<path style="fill:none;stroke:#ff3e68;" d="M852,-586C851,-582 851,-578 851,-574"/>
+<polygon style="fill:#ff3e68;stroke:#ff3e68;" points="854.488,-574.299 852,-564 847.522,-573.602 854.488,-574.299"/>
+</g>
+<!-- k2_3->k1_3 -->
+<g id="edge56" class="edge"><title>k2_3->k1_3</title>
+<path style="fill:none;stroke:#ff3e68;" d="M864,-564C865,-568 865,-572 865,-576"/>
+<polygon style="fill:#ff3e68;stroke:#ff3e68;" points="861.512,-575.701 864,-586 868.478,-576.398 861.512,-575.701"/>
+</g>
+<!-- k3_3 -->
+<g id="node57" class="node"><title>k3_3</title>
+<polygon style="fill:#c9d787;stroke:#7d8a2e;" points="934,-489.5 786,-489.5 782,-485.5 782,-438.5 930,-438.5 934,-442.5 934,-489.5"/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="930,-485.5 782,-485.5 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="930,-485.5 930,-438.5 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="930,-485.5 934,-489.5 "/>
+<text text-anchor="middle" x="858" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Up-to-date task</text>
+</g>
+<!-- k2_3->k3_3 -->
+<g id="edge58" class="edge"><title>k2_3->k3_3</title>
+<path style="fill:none;stroke:gray;" d="M858,-512C858,-508 858,-504 858,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="861.5,-500 858,-490 854.5,-500 861.5,-500"/>
+</g>
+<!-- k4_3 -->
+<g id="node59" class="node"><title>k4_3</title>
+<polygon style="fill:none;stroke:black;" points="968,-415.5 752,-415.5 748,-411.5 748,-364.5 964,-364.5 968,-368.5 968,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="964,-411.5 748,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="964,-411.5 964,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="964,-411.5 968,-415.5 "/>
+<text text-anchor="middle" x="858" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_3->k4_3 -->
+<g id="edge60" class="edge"><title>k3_3->k4_3</title>
+<path style="fill:none;stroke:gray;" d="M858,-438C858,-434 858,-430 858,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="861.5,-426 858,-416 854.5,-426 861.5,-426"/>
+</g>
+<!-- k5_3 -->
+<g id="node61" class="node"><title>k5_3</title>
+<polygon style="fill:none;stroke:#bfb5ff;" points="916,-341.5 804,-341.5 800,-337.5 800,-290.5 912,-290.5 916,-294.5 916,-341.5"/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="912,-337.5 800,-337.5 "/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="912,-337.5 912,-290.5 "/>
+<polyline style="fill:none;stroke:#bfb5ff;" points="912,-337.5 916,-341.5 "/>
+<text text-anchor="middle" x="858" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">Task to run</text>
+</g>
+<!-- k4_3->k5_3 -->
+<g id="edge62" class="edge"><title>k4_3->k5_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M858,-364C858,-360 858,-356 858,-352"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="861.5,-352 858,-342 854.5,-352 861.5,-352"/>
+</g>
+<!-- k6_3 -->
+<g id="node63" class="node"><title>k6_3</title>
+<polygon style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="934,-268 786,-268 782,-264 782,-194 930,-194 934,-198 934,-268"/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="930,-264 782,-264 "/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="930,-264 930,-194 "/>
+<polyline style="fill:none;stroke:#bfb5ff;stroke-dasharray:5,2;" points="930,-264 934,-268 "/>
+<text text-anchor="middle" x="858" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">Up-to-date task</text>
+<text text-anchor="middle" x="858" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#bfb5ff;">forced to rerun</text>
+</g>
+<!-- k5_3->k6_3 -->
+<g id="edge64" class="edge"><title>k5_3->k6_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M858,-290C858,-286 858,-282 858,-278"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="861.5,-278 858,-268 854.5,-278 861.5,-278"/>
+</g>
+<!-- k7_3 -->
+<g id="node65" class="node"><title>k7_3</title>
+<polygon style="fill:#fff1dc;stroke:#7d8a2e;" points="918,-172 802,-172 798,-168 798,-98 914,-98 918,-102 918,-172"/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="914,-168 798,-168 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="914,-168 914,-98 "/>
+<polyline style="fill:none;stroke:#7d8a2e;" points="914,-168 918,-172 "/>
+<text text-anchor="middle" x="858" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Up-to-date</text>
+<text text-anchor="middle" x="858" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#7d8a2e;">Final target</text>
+</g>
+<!-- k6_3->k7_3 -->
+<g id="edge66" class="edge"><title>k6_3->k7_3</title>
+<path style="fill:none;stroke:#bfb5ff;" d="M858,-194C858,-190 858,-186 858,-182"/>
+<polygon style="fill:#bfb5ff;stroke:#bfb5ff;" points="861.5,-182 858,-172 854.5,-182 861.5,-182"/>
+</g>
+<!-- k8_3 -->
+<g id="node67" class="node"><title>k8_3</title>
+<polygon style="fill:#fff1dc;stroke:black;" points="918,-75.5 802,-75.5 798,-71.5 798,-24.5 914,-24.5 918,-28.5 918,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="914,-71.5 798,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="914,-71.5 914,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="914,-71.5 918,-75.5 "/>
+<text text-anchor="middle" x="858" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_3->k8_3 -->
+<g id="edge68" class="edge"><title>k7_3->k8_3</title>
+<path style="fill:none;stroke:gray;" d="M858,-98C858,-94 858,-90 858,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="861.5,-86 858,-76 854.5,-86 861.5,-86"/>
+</g>
+<!-- k1_4 -->
+<g id="node70" class="node"><title>k1_4</title>
+<polygon style="fill:#f54f29;stroke:white;" points="1170,-637.5 1038,-637.5 1034,-633.5 1034,-586.5 1166,-586.5 1170,-590.5 1170,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1166,-633.5 1034,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1166,-633.5 1166,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1166,-633.5 1170,-637.5 "/>
+<text text-anchor="middle" x="1102" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_4 -->
+<g id="node71" class="node"><title>k2_4</title>
+<polygon style="fill:white;stroke:gray;" points="1169,-563.5 1039,-563.5 1035,-559.5 1035,-512.5 1165,-512.5 1169,-516.5 1169,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1165,-559.5 1035,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1165,-559.5 1165,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1165,-559.5 1169,-563.5 "/>
+<text text-anchor="middle" x="1102" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_4->k2_4 -->
+<g id="edge71" class="edge"><title>k1_4->k2_4</title>
+<path style="fill:none;stroke:#f54f29;" d="M1096,-586C1095,-582 1095,-578 1095,-574"/>
+<polygon style="fill:#f54f29;stroke:#f54f29;" points="1098.49,-574.299 1096,-564 1091.52,-573.602 1098.49,-574.299"/>
+</g>
+<!-- k2_4->k1_4 -->
+<g id="edge73" class="edge"><title>k2_4->k1_4</title>
+<path style="fill:none;stroke:#f54f29;" d="M1108,-564C1109,-568 1109,-572 1109,-576"/>
+<polygon style="fill:#f54f29;stroke:#f54f29;" points="1105.51,-575.701 1108,-586 1112.48,-576.398 1105.51,-575.701"/>
+</g>
+<!-- k3_4 -->
+<g id="node74" class="node"><title>k3_4</title>
+<polygon style="fill:#b8cc6e;stroke:#4b6000;" points="1178,-489.5 1030,-489.5 1026,-485.5 1026,-438.5 1174,-438.5 1178,-442.5 1178,-489.5"/>
+<polyline style="fill:none;stroke:#4b6000;" points="1174,-485.5 1026,-485.5 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1174,-485.5 1174,-438.5 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1174,-485.5 1178,-489.5 "/>
+<text text-anchor="middle" x="1102" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Up-to-date task</text>
+</g>
+<!-- k2_4->k3_4 -->
+<g id="edge75" class="edge"><title>k2_4->k3_4</title>
+<path style="fill:none;stroke:gray;" d="M1102,-512C1102,-508 1102,-504 1102,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1105.5,-500 1102,-490 1098.5,-500 1105.5,-500"/>
+</g>
+<!-- k4_4 -->
+<g id="node76" class="node"><title>k4_4</title>
+<polygon style="fill:none;stroke:black;" points="1212,-415.5 996,-415.5 992,-411.5 992,-364.5 1208,-364.5 1212,-368.5 1212,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1208,-411.5 992,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1208,-411.5 1208,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1208,-411.5 1212,-415.5 "/>
+<text text-anchor="middle" x="1102" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_4->k4_4 -->
+<g id="edge77" class="edge"><title>k3_4->k4_4</title>
+<path style="fill:none;stroke:gray;" d="M1102,-438C1102,-434 1102,-430 1102,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1105.5,-426 1102,-416 1098.5,-426 1105.5,-426"/>
+</g>
+<!-- k5_4 -->
+<g id="node78" class="node"><title>k5_4</title>
+<polygon style="fill:none;stroke:#004460;" points="1160,-341.5 1048,-341.5 1044,-337.5 1044,-290.5 1156,-290.5 1160,-294.5 1160,-341.5"/>
+<polyline style="fill:none;stroke:#004460;" points="1156,-337.5 1044,-337.5 "/>
+<polyline style="fill:none;stroke:#004460;" points="1156,-337.5 1156,-290.5 "/>
+<polyline style="fill:none;stroke:#004460;" points="1156,-337.5 1160,-341.5 "/>
+<text text-anchor="middle" x="1102" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">Task to run</text>
+</g>
+<!-- k4_4->k5_4 -->
+<g id="edge79" class="edge"><title>k4_4->k5_4</title>
+<path style="fill:none;stroke:#004460;" d="M1102,-364C1102,-360 1102,-356 1102,-352"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1105.5,-352 1102,-342 1098.5,-352 1105.5,-352"/>
+</g>
+<!-- k6_4 -->
+<g id="node80" class="node"><title>k6_4</title>
+<polygon style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1178,-268 1030,-268 1026,-264 1026,-194 1174,-194 1178,-198 1178,-268"/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1174,-264 1026,-264 "/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1174,-264 1174,-194 "/>
+<polyline style="fill:none;stroke:#004460;stroke-dasharray:5,2;" points="1174,-264 1178,-268 "/>
+<text text-anchor="middle" x="1102" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">Up-to-date task</text>
+<text text-anchor="middle" x="1102" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#004460;">forced to rerun</text>
+</g>
+<!-- k5_4->k6_4 -->
+<g id="edge81" class="edge"><title>k5_4->k6_4</title>
+<path style="fill:none;stroke:#004460;" d="M1102,-290C1102,-286 1102,-282 1102,-278"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1105.5,-278 1102,-268 1098.5,-278 1105.5,-278"/>
+</g>
+<!-- k7_4 -->
+<g id="node82" class="node"><title>k7_4</title>
+<polygon style="fill:#fff0a3;stroke:#4b6000;" points="1162,-172 1046,-172 1042,-168 1042,-98 1158,-98 1162,-102 1162,-172"/>
+<polyline style="fill:none;stroke:#4b6000;" points="1158,-168 1042,-168 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1158,-168 1158,-98 "/>
+<polyline style="fill:none;stroke:#4b6000;" points="1158,-168 1162,-172 "/>
+<text text-anchor="middle" x="1102" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Up-to-date</text>
+<text text-anchor="middle" x="1102" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#4b6000;">Final target</text>
+</g>
+<!-- k6_4->k7_4 -->
+<g id="edge83" class="edge"><title>k6_4->k7_4</title>
+<path style="fill:none;stroke:#004460;" d="M1102,-194C1102,-190 1102,-186 1102,-182"/>
+<polygon style="fill:#004460;stroke:#004460;" points="1105.5,-182 1102,-172 1098.5,-182 1105.5,-182"/>
+</g>
+<!-- k8_4 -->
+<g id="node84" class="node"><title>k8_4</title>
+<polygon style="fill:#fff0a3;stroke:black;" points="1162,-75.5 1046,-75.5 1042,-71.5 1042,-24.5 1158,-24.5 1162,-28.5 1162,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1158,-71.5 1042,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1158,-71.5 1158,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1158,-71.5 1162,-75.5 "/>
+<text text-anchor="middle" x="1102" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_4->k8_4 -->
+<g id="edge85" class="edge"><title>k7_4->k8_4</title>
+<path style="fill:none;stroke:gray;" d="M1102,-98C1102,-94 1102,-90 1102,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1105.5,-86 1102,-76 1098.5,-86 1105.5,-86"/>
+</g>
+<!-- k1_5 -->
+<g id="node87" class="node"><title>k1_5</title>
+<polygon style="fill:#ff0000;stroke:white;" points="1414,-637.5 1282,-637.5 1278,-633.5 1278,-586.5 1410,-586.5 1414,-590.5 1414,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1410,-633.5 1278,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1410,-633.5 1410,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1410,-633.5 1414,-637.5 "/>
+<text text-anchor="middle" x="1346" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_5 -->
+<g id="node88" class="node"><title>k2_5</title>
+<polygon style="fill:white;stroke:gray;" points="1413,-563.5 1283,-563.5 1279,-559.5 1279,-512.5 1409,-512.5 1413,-516.5 1413,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1409,-559.5 1279,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1409,-559.5 1409,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1409,-559.5 1413,-563.5 "/>
+<text text-anchor="middle" x="1346" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_5->k2_5 -->
+<g id="edge88" class="edge"><title>k1_5->k2_5</title>
+<path style="fill:none;stroke:#ff0000;" d="M1340,-586C1339,-582 1339,-578 1339,-574"/>
+<polygon style="fill:#ff0000;stroke:#ff0000;" points="1342.49,-574.299 1340,-564 1335.52,-573.602 1342.49,-574.299"/>
+</g>
+<!-- k2_5->k1_5 -->
+<g id="edge90" class="edge"><title>k2_5->k1_5</title>
+<path style="fill:none;stroke:#ff0000;" d="M1352,-564C1353,-568 1353,-572 1353,-576"/>
+<polygon style="fill:#ff0000;stroke:#ff0000;" points="1349.51,-575.701 1352,-586 1356.48,-576.398 1349.51,-575.701"/>
+</g>
+<!-- k3_5 -->
+<g id="node91" class="node"><title>k3_5</title>
+<polygon style="fill:#44ff44;stroke:#007700;" points="1422,-489.5 1274,-489.5 1270,-485.5 1270,-438.5 1418,-438.5 1422,-442.5 1422,-489.5"/>
+<polyline style="fill:none;stroke:#007700;" points="1418,-485.5 1270,-485.5 "/>
+<polyline style="fill:none;stroke:#007700;" points="1418,-485.5 1418,-438.5 "/>
+<polyline style="fill:none;stroke:#007700;" points="1418,-485.5 1422,-489.5 "/>
+<text text-anchor="middle" x="1346" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#007700;">Up-to-date task</text>
+</g>
+<!-- k2_5->k3_5 -->
+<g id="edge92" class="edge"><title>k2_5->k3_5</title>
+<path style="fill:none;stroke:gray;" d="M1346,-512C1346,-508 1346,-504 1346,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1349.5,-500 1346,-490 1342.5,-500 1349.5,-500"/>
+</g>
+<!-- k4_5 -->
+<g id="node93" class="node"><title>k4_5</title>
+<polygon style="fill:none;stroke:black;" points="1456,-415.5 1240,-415.5 1236,-411.5 1236,-364.5 1452,-364.5 1456,-368.5 1456,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1452,-411.5 1236,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1452,-411.5 1452,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1452,-411.5 1456,-415.5 "/>
+<text text-anchor="middle" x="1346" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_5->k4_5 -->
+<g id="edge94" class="edge"><title>k3_5->k4_5</title>
+<path style="fill:none;stroke:gray;" d="M1346,-438C1346,-434 1346,-430 1346,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1349.5,-426 1346,-416 1342.5,-426 1349.5,-426"/>
+</g>
+<!-- k5_5 -->
+<g id="node95" class="node"><title>k5_5</title>
+<polygon style="fill:#aabbff;stroke:#1122ff;" points="1404,-341.5 1292,-341.5 1288,-337.5 1288,-290.5 1400,-290.5 1404,-294.5 1404,-341.5"/>
+<polyline style="fill:none;stroke:#1122ff;" points="1400,-337.5 1288,-337.5 "/>
+<polyline style="fill:none;stroke:#1122ff;" points="1400,-337.5 1400,-290.5 "/>
+<polyline style="fill:none;stroke:#1122ff;" points="1400,-337.5 1404,-341.5 "/>
+<text text-anchor="middle" x="1346" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">Task to run</text>
+</g>
+<!-- k4_5->k5_5 -->
+<g id="edge96" class="edge"><title>k4_5->k5_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1346,-364C1346,-360 1346,-356 1346,-352"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1349.5,-352 1346,-342 1342.5,-352 1349.5,-352"/>
+</g>
+<!-- k6_5 -->
+<g id="node97" class="node"><title>k6_5</title>
+<polygon style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1422,-268 1274,-268 1270,-264 1270,-194 1418,-194 1422,-198 1422,-268"/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1418,-264 1270,-264 "/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1418,-264 1418,-194 "/>
+<polyline style="fill:none;stroke:#1122ff;stroke-dasharray:5,2;" points="1418,-264 1422,-268 "/>
+<text text-anchor="middle" x="1346" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">Up-to-date task</text>
+<text text-anchor="middle" x="1346" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#1122ff;">forced to rerun</text>
+</g>
+<!-- k5_5->k6_5 -->
+<g id="edge98" class="edge"><title>k5_5->k6_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1346,-290C1346,-286 1346,-282 1346,-278"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1349.5,-278 1346,-268 1342.5,-278 1349.5,-278"/>
+</g>
+<!-- k7_5 -->
+<g id="node99" class="node"><title>k7_5</title>
+<polygon style="fill:#efa03b;stroke:#ffcc3b;" points="1406,-172 1290,-172 1286,-168 1286,-98 1402,-98 1406,-102 1406,-172"/>
+<polyline style="fill:none;stroke:#ffcc3b;" points="1402,-168 1286,-168 "/>
+<polyline style="fill:none;stroke:#ffcc3b;" points="1402,-168 1402,-98 "/>
+<polyline style="fill:none;stroke:#ffcc3b;" points="1402,-168 1406,-172 "/>
+<text text-anchor="middle" x="1346" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#ffcc3b;">Up-to-date</text>
+<text text-anchor="middle" x="1346" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#ffcc3b;">Final target</text>
+</g>
+<!-- k6_5->k7_5 -->
+<g id="edge100" class="edge"><title>k6_5->k7_5</title>
+<path style="fill:none;stroke:#1122ff;" d="M1346,-194C1346,-190 1346,-186 1346,-182"/>
+<polygon style="fill:#1122ff;stroke:#1122ff;" points="1349.5,-182 1346,-172 1342.5,-182 1349.5,-182"/>
+</g>
+<!-- k8_5 -->
+<g id="node101" class="node"><title>k8_5</title>
+<polygon style="fill:#efa03b;stroke:black;" points="1406,-75.5 1290,-75.5 1286,-71.5 1286,-24.5 1402,-24.5 1406,-28.5 1406,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1402,-71.5 1286,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1402,-71.5 1402,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1402,-71.5 1406,-75.5 "/>
+<text text-anchor="middle" x="1346" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_5->k8_5 -->
+<g id="edge102" class="edge"><title>k7_5->k8_5</title>
+<path style="fill:none;stroke:gray;" d="M1346,-98C1346,-94 1346,-90 1346,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1349.5,-86 1346,-76 1342.5,-86 1349.5,-86"/>
+</g>
+<!-- k1_6 -->
+<g id="node104" class="node"><title>k1_6</title>
+<polygon style="fill:#ff3232;stroke:white;" points="1658,-637.5 1526,-637.5 1522,-633.5 1522,-586.5 1654,-586.5 1658,-590.5 1658,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1654,-633.5 1522,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1654,-633.5 1654,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1654,-633.5 1658,-637.5 "/>
+<text text-anchor="middle" x="1590" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;">Vicious cycle</text>
+</g>
+<!-- k2_6 -->
+<g id="node105" class="node"><title>k2_6</title>
+<polygon style="fill:white;stroke:gray;" points="1657,-563.5 1527,-563.5 1523,-559.5 1523,-512.5 1653,-512.5 1657,-516.5 1657,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1653,-559.5 1523,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1653,-559.5 1653,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1653,-559.5 1657,-563.5 "/>
+<text text-anchor="middle" x="1590" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_6->k2_6 -->
+<g id="edge105" class="edge"><title>k1_6->k2_6</title>
+<path style="fill:none;stroke:#ff3232;" d="M1584,-586C1583,-582 1583,-578 1583,-574"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="1586.49,-574.299 1584,-564 1579.52,-573.602 1586.49,-574.299"/>
+</g>
+<!-- k2_6->k1_6 -->
+<g id="edge107" class="edge"><title>k2_6->k1_6</title>
+<path style="fill:none;stroke:#ff3232;" d="M1596,-564C1597,-568 1597,-572 1597,-576"/>
+<polygon style="fill:#ff3232;stroke:#ff3232;" points="1593.51,-575.701 1596,-586 1600.48,-576.398 1593.51,-575.701"/>
+</g>
+<!-- k3_6 -->
+<g id="node108" class="node"><title>k3_6</title>
+<polygon style="fill:#6cb924;stroke:black;" points="1666,-489.5 1518,-489.5 1514,-485.5 1514,-438.5 1662,-438.5 1666,-442.5 1666,-489.5"/>
+<polyline style="fill:none;stroke:black;" points="1662,-485.5 1514,-485.5 "/>
+<polyline style="fill:none;stroke:black;" points="1662,-485.5 1662,-438.5 "/>
+<polyline style="fill:none;stroke:black;" points="1662,-485.5 1666,-489.5 "/>
+<text text-anchor="middle" x="1590" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;">Up-to-date task</text>
+</g>
+<!-- k2_6->k3_6 -->
+<g id="edge109" class="edge"><title>k2_6->k3_6</title>
+<path style="fill:none;stroke:gray;" d="M1590,-512C1590,-508 1590,-504 1590,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1593.5,-500 1590,-490 1586.5,-500 1593.5,-500"/>
+</g>
+<!-- k4_6 -->
+<g id="node110" class="node"><title>k4_6</title>
+<polygon style="fill:none;stroke:black;" points="1700,-415.5 1484,-415.5 1480,-411.5 1480,-364.5 1696,-364.5 1700,-368.5 1700,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1696,-411.5 1480,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1696,-411.5 1696,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1696,-411.5 1700,-415.5 "/>
+<text text-anchor="middle" x="1590" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_6->k4_6 -->
+<g id="edge111" class="edge"><title>k3_6->k4_6</title>
+<path style="fill:none;stroke:gray;" d="M1590,-438C1590,-434 1590,-430 1590,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1593.5,-426 1590,-416 1586.5,-426 1593.5,-426"/>
+</g>
+<!-- k5_6 -->
+<g id="node112" class="node"><title>k5_6</title>
+<polygon style="fill:#5f52ee;stroke:#0044a0;" points="1648,-341.5 1536,-341.5 1532,-337.5 1532,-290.5 1644,-290.5 1648,-294.5 1648,-341.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="1644,-337.5 1532,-337.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="1644,-337.5 1644,-290.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="1644,-337.5 1648,-341.5 "/>
+<text text-anchor="middle" x="1590" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:lightgrey;">Task to run</text>
+</g>
+<!-- k4_6->k5_6 -->
+<g id="edge113" class="edge"><title>k4_6->k5_6</title>
+<path style="fill:none;stroke:#0044a0;" d="M1590,-364C1590,-360 1590,-356 1590,-352"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="1593.5,-352 1590,-342 1586.5,-352 1593.5,-352"/>
+</g>
+<!-- k6_6 -->
+<g id="node114" class="node"><title>k6_6</title>
+<polygon style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1666,-268 1518,-268 1514,-264 1514,-194 1662,-194 1666,-198 1666,-268"/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1662,-264 1514,-264 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1662,-264 1662,-194 "/>
+<polyline style="fill:none;stroke:#0044a0;stroke-dasharray:5,2;" points="1662,-264 1666,-268 "/>
+<text text-anchor="middle" x="1590" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Up-to-date task</text>
+<text text-anchor="middle" x="1590" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">forced to rerun</text>
+</g>
+<!-- k5_6->k6_6 -->
+<g id="edge115" class="edge"><title>k5_6->k6_6</title>
+<path style="fill:none;stroke:#0044a0;" d="M1590,-290C1590,-286 1590,-282 1590,-278"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="1593.5,-278 1590,-268 1586.5,-278 1593.5,-278"/>
+</g>
+<!-- k7_6 -->
+<g id="node116" class="node"><title>k7_6</title>
+<polygon style="fill:#ece116;stroke:black;" points="1650,-172 1534,-172 1530,-168 1530,-98 1646,-98 1650,-102 1650,-172"/>
+<polyline style="fill:none;stroke:black;" points="1646,-168 1530,-168 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-168 1646,-98 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-168 1650,-172 "/>
+<text text-anchor="middle" x="1590" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#efa03b;">Up-to-date</text>
+<text text-anchor="middle" x="1590" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#efa03b;">Final target</text>
+</g>
+<!-- k6_6->k7_6 -->
+<g id="edge117" class="edge"><title>k6_6->k7_6</title>
+<path style="fill:none;stroke:#0044a0;" d="M1590,-194C1590,-190 1590,-186 1590,-182"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="1593.5,-182 1590,-172 1586.5,-182 1593.5,-182"/>
+</g>
+<!-- k8_6 -->
+<g id="node118" class="node"><title>k8_6</title>
+<polygon style="fill:#ece116;stroke:black;" points="1650,-75.5 1534,-75.5 1530,-71.5 1530,-24.5 1646,-24.5 1650,-28.5 1650,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1646,-71.5 1530,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-71.5 1646,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1646,-71.5 1650,-75.5 "/>
+<text text-anchor="middle" x="1590" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_6->k8_6 -->
+<g id="edge119" class="edge"><title>k7_6->k8_6</title>
+<path style="fill:none;stroke:gray;" d="M1590,-98C1590,-94 1590,-90 1590,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1593.5,-86 1590,-76 1586.5,-86 1593.5,-86"/>
+</g>
+<!-- k1_7 -->
+<g id="node121" class="node"><title>k1_7</title>
+<polygon style="fill:#b9495e;stroke:white;" points="1902,-637.5 1770,-637.5 1766,-633.5 1766,-586.5 1898,-586.5 1902,-590.5 1902,-637.5"/>
+<polyline style="fill:none;stroke:white;" points="1898,-633.5 1766,-633.5 "/>
+<polyline style="fill:none;stroke:white;" points="1898,-633.5 1898,-586.5 "/>
+<polyline style="fill:none;stroke:white;" points="1898,-633.5 1902,-637.5 "/>
+<text text-anchor="middle" x="1834" y="-605.392" style="font-family:Times New Roman;font-size:20.0px;fill:white;">Vicious cycle</text>
+</g>
+<!-- k2_7 -->
+<g id="node122" class="node"><title>k2_7</title>
+<polygon style="fill:white;stroke:gray;" points="1901,-563.5 1771,-563.5 1767,-559.5 1767,-512.5 1897,-512.5 1901,-516.5 1901,-563.5"/>
+<polyline style="fill:none;stroke:gray;" points="1897,-559.5 1767,-559.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1897,-559.5 1897,-512.5 "/>
+<polyline style="fill:none;stroke:gray;" points="1897,-559.5 1901,-563.5 "/>
+<text text-anchor="middle" x="1834" y="-531.392" style="font-family:Times New Roman;font-size:20.0px;fill:gray;">Down stream</text>
+</g>
+<!-- k1_7->k2_7 -->
+<g id="edge122" class="edge"><title>k1_7->k2_7</title>
+<path style="fill:none;stroke:#b9495e;" d="M1828,-586C1827,-582 1827,-578 1827,-574"/>
+<polygon style="fill:#b9495e;stroke:#b9495e;" points="1830.49,-574.299 1828,-564 1823.52,-573.602 1830.49,-574.299"/>
+</g>
+<!-- k2_7->k1_7 -->
+<g id="edge124" class="edge"><title>k2_7->k1_7</title>
+<path style="fill:none;stroke:#b9495e;" d="M1840,-564C1841,-568 1841,-572 1841,-576"/>
+<polygon style="fill:#b9495e;stroke:#b9495e;" points="1837.51,-575.701 1840,-586 1844.48,-576.398 1837.51,-575.701"/>
+</g>
+<!-- k3_7 -->
+<g id="node125" class="node"><title>k3_7</title>
+<polygon style="fill:#d3fae3;stroke:#87b379;" points="1910,-489.5 1762,-489.5 1758,-485.5 1758,-438.5 1906,-438.5 1910,-442.5 1910,-489.5"/>
+<polyline style="fill:none;stroke:#87b379;" points="1906,-485.5 1758,-485.5 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1906,-485.5 1906,-438.5 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1906,-485.5 1910,-489.5 "/>
+<text text-anchor="middle" x="1834" y="-457.392" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Up-to-date task</text>
+</g>
+<!-- k2_7->k3_7 -->
+<g id="edge126" class="edge"><title>k2_7->k3_7</title>
+<path style="fill:none;stroke:gray;" d="M1834,-512C1834,-508 1834,-504 1834,-500"/>
+<polygon style="fill:gray;stroke:gray;" points="1837.5,-500 1834,-490 1830.5,-500 1837.5,-500"/>
+</g>
+<!-- k4_7 -->
+<g id="node127" class="node"><title>k4_7</title>
+<polygon style="fill:none;stroke:black;" points="1944,-415.5 1728,-415.5 1724,-411.5 1724,-364.5 1940,-364.5 1944,-368.5 1944,-415.5"/>
+<polyline style="fill:none;stroke:black;" points="1940,-411.5 1724,-411.5 "/>
+<polyline style="fill:none;stroke:black;" points="1940,-411.5 1940,-364.5 "/>
+<polyline style="fill:none;stroke:black;" points="1940,-411.5 1944,-415.5 "/>
+<text text-anchor="middle" x="1834" y="-383.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k3_7->k4_7 -->
+<g id="edge128" class="edge"><title>k3_7->k4_7</title>
+<path style="fill:none;stroke:gray;" d="M1834,-438C1834,-434 1834,-430 1834,-426"/>
+<polygon style="fill:gray;stroke:gray;" points="1837.5,-426 1834,-416 1830.5,-426 1837.5,-426"/>
+</g>
+<!-- k5_7 -->
+<g id="node129" class="node"><title>k5_7</title>
+<polygon style="fill:none;stroke:#87bae4;" points="1892,-341.5 1780,-341.5 1776,-337.5 1776,-290.5 1888,-290.5 1892,-294.5 1892,-341.5"/>
+<polyline style="fill:none;stroke:#87bae4;" points="1888,-337.5 1776,-337.5 "/>
+<polyline style="fill:none;stroke:#87bae4;" points="1888,-337.5 1888,-290.5 "/>
+<polyline style="fill:none;stroke:#87bae4;" points="1888,-337.5 1892,-341.5 "/>
+<text text-anchor="middle" x="1834" y="-309.392" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">Task to run</text>
+</g>
+<!-- k4_7->k5_7 -->
+<g id="edge130" class="edge"><title>k4_7->k5_7</title>
+<path style="fill:none;stroke:#87bae4;" d="M1834,-364C1834,-360 1834,-356 1834,-352"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1837.5,-352 1834,-342 1830.5,-352 1837.5,-352"/>
+</g>
+<!-- k6_7 -->
+<g id="node131" class="node"><title>k6_7</title>
+<polygon style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1910,-268 1762,-268 1758,-264 1758,-194 1906,-194 1910,-198 1910,-268"/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1906,-264 1758,-264 "/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1906,-264 1906,-194 "/>
+<polyline style="fill:none;stroke:#87bae4;stroke-dasharray:5,2;" points="1906,-264 1910,-268 "/>
+<text text-anchor="middle" x="1834" y="-235.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">Up-to-date task</text>
+<text text-anchor="middle" x="1834" y="-212.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87bae4;">forced to rerun</text>
+</g>
+<!-- k5_7->k6_7 -->
+<g id="edge132" class="edge"><title>k5_7->k6_7</title>
+<path style="fill:none;stroke:#87bae4;" d="M1834,-290C1834,-286 1834,-282 1834,-278"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1837.5,-278 1834,-268 1830.5,-278 1837.5,-278"/>
+</g>
+<!-- k7_7 -->
+<g id="node133" class="node"><title>k7_7</title>
+<polygon style="fill:#fdba40;stroke:#87b379;" points="1894,-172 1778,-172 1774,-168 1774,-98 1890,-98 1894,-102 1894,-172"/>
+<polyline style="fill:none;stroke:#87b379;" points="1890,-168 1774,-168 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1890,-168 1890,-98 "/>
+<polyline style="fill:none;stroke:#87b379;" points="1890,-168 1894,-172 "/>
+<text text-anchor="middle" x="1834" y="-139.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Up-to-date</text>
+<text text-anchor="middle" x="1834" y="-116.892" style="font-family:Times New Roman;font-size:20.0px;fill:#87b379;">Final target</text>
+</g>
+<!-- k6_7->k7_7 -->
+<g id="edge134" class="edge"><title>k6_7->k7_7</title>
+<path style="fill:none;stroke:#87bae4;" d="M1834,-194C1834,-190 1834,-186 1834,-182"/>
+<polygon style="fill:#87bae4;stroke:#87bae4;" points="1837.5,-182 1834,-172 1830.5,-182 1837.5,-182"/>
+</g>
+<!-- k8_7 -->
+<g id="node135" class="node"><title>k8_7</title>
+<polygon style="fill:#fdba40;stroke:black;" points="1894,-75.5 1778,-75.5 1774,-71.5 1774,-24.5 1890,-24.5 1894,-28.5 1894,-75.5"/>
+<polyline style="fill:none;stroke:black;" points="1890,-71.5 1774,-71.5 "/>
+<polyline style="fill:none;stroke:black;" points="1890,-71.5 1890,-24.5 "/>
+<polyline style="fill:none;stroke:black;" points="1890,-71.5 1894,-75.5 "/>
+<text text-anchor="middle" x="1834" y="-43.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k7_7->k8_7 -->
+<g id="edge136" class="edge"><title>k7_7->k8_7</title>
+<path style="fill:none;stroke:gray;" d="M1834,-98C1834,-94 1834,-90 1834,-86"/>
+<polygon style="fill:gray;stroke:gray;" points="1837.5,-86 1834,-76 1830.5,-86 1837.5,-86"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/front_page_flowchart.png b/doc/images/front_page_flowchart.png
new file mode 100644
index 0000000..2c0a8d2
Binary files /dev/null and b/doc/images/front_page_flowchart.png differ
diff --git a/doc/images/gallery/gallery_big_pipeline.png b/doc/images/gallery/gallery_big_pipeline.png
new file mode 100644
index 0000000..28da4be
Binary files /dev/null and b/doc/images/gallery/gallery_big_pipeline.png differ
diff --git a/doc/images/gallery/gallery_big_pipeline.svg b/doc/images/gallery/gallery_big_pipeline.svg
new file mode 100644
index 0000000..49ca7ed
--- /dev/null
+++ b/doc/images/gallery/gallery_big_pipeline.svg
@@ -0,0 +1,2699 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (andreas) Andreas Heger -->
+<!-- Title: tree Pages: 1 -->
+<svg width="4320pt" height="480pt"
+ viewBox="0.00 0.00 432.00 48.31" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.0388979 0.0388979) rotate(0) translate(4 1238)">
+<title>tree</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1238 11102,-1238 11102,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="12,-16 12,-1218 10914,-1218 10914,-16 12,-16"/>
+<text text-anchor="middle" x="5463" y="-1187" style="font-family:Times New Roman;font-size:30.00;">Pipeline:</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="10922,-872 10922,-1226 11090,-1226 11090,-872 10922,-872"/>
+<text text-anchor="middle" x="11006" y="-1195" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node2" class="node"><title>t0</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1983,-1168 1861,-1168 1857,-1164 1857,-1132 1979,-1132 1983,-1136 1983,-1168"/>
+<polyline style="fill:none;stroke:green;" points="1979,-1164 1857,-1164 "/>
+<polyline style="fill:none;stroke:green;" points="1979,-1164 1979,-1132 "/>
+<polyline style="fill:none;stroke:green;" points="1979,-1164 1983,-1168 "/>
+<text text-anchor="middle" x="1920" y="-1143.5" style="font-family:Times New Roman;font-size:20.00;">indexGenome</text>
+</g>
+<!-- t181 -->
+<g id="node32" class="node"><title>t181</title>
+<polygon style="fill:none;stroke:blue;" points="1134,-312 1062,-312 1058,-308 1058,-276 1130,-276 1134,-280 1134,-312"/>
+<polyline style="fill:none;stroke:blue;" points="1130,-308 1058,-308 "/>
+<polyline style="fill:none;stroke:blue;" points="1130,-308 1130,-276 "/>
+<polyline style="fill:none;stroke:blue;" points="1130,-308 1134,-312 "/>
+<text text-anchor="middle" x="1096" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">prepare</text>
+</g>
+<!-- t0->t181 -->
+<g id="edge383" class="edge"><title>t0->t181</title>
+<path style="fill:none;stroke:gray;" d="M1913,-1132C1903,-1102 1885,-1043 1885,-991 1885,-991 1885,-991 1885,-664 1885,-602 1902,-577 1865,-528 1798,-436 1744,-449 1634,-418 1555,-395 1520,-440 1450,-396 1435,-385 1444,-369 1428,-360 1376,-327 1208,-361 1150,-338 1139,-333 1129,-326 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t16 -->
+<g id="node34" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5835,-1094 5737,-1094 5733,-1090 5733,-1058 5831,-1058 5835,-1062 5835,-1094"/>
+<polyline style="fill:none;stroke:green;" points="5831,-1090 5733,-1090 "/>
+<polyline style="fill:none;stroke:green;" points="5831,-1090 5831,-1058 "/>
+<polyline style="fill:none;stroke:green;" points="5831,-1090 5835,-1094 "/>
+<text text-anchor="middle" x="5784" y="-1069.5" style="font-family:Times New Roman;font-size:20.00;">buildBAM</text>
+</g>
+<!-- t0->t16 -->
+<g id="edge21" class="edge"><title>t0->t16</title>
+<path style="fill:none;stroke:gray;" d="M1983,-1149C2442,-1140 5269,-1086 5723,-1077"/>
+<polygon style="fill:gray;stroke:gray;" points="5723,-1080.5 5733,-1077 5723,-1073.5 5723,-1080.5"/>
+</g>
+<!-- t39 -->
+<g id="node3" class="node"><title>t39</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2811,-396 2693,-396 2689,-392 2689,-360 2807,-360 2811,-364 2811,-396"/>
+<polyline style="fill:none;stroke:green;" points="2807,-392 2689,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2807,-392 2807,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2807,-392 2811,-396 "/>
+<text text-anchor="middle" x="2750" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildGenome</text>
+</g>
+<!-- t39->t181 -->
+<g id="edge349" class="edge"><title>t39->t181</title>
+<path style="fill:none;stroke:gray;" d="M2689,-362C2684,-361 2680,-361 2675,-360 2591,-349 1228,-367 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t36 -->
+<g id="node4" class="node"><title>t36</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7517,-480 7359,-480 7355,-476 7355,-444 7513,-444 7517,-448 7517,-480"/>
+<polyline style="fill:none;stroke:green;" points="7513,-476 7355,-476 "/>
+<polyline style="fill:none;stroke:green;" points="7513,-476 7513,-444 "/>
+<polyline style="fill:none;stroke:green;" points="7513,-476 7517,-480 "/>
+<text text-anchor="middle" x="7436" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildGeneRegions</text>
+</g>
+<!-- t42 -->
+<g id="node20" class="node"><title>t42</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10199,-396 10001,-396 9997,-392 9997,-360 10195,-360 10199,-364 10199,-396"/>
+<polyline style="fill:none;stroke:green;" points="10195,-392 9997,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10195,-392 10195,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10195,-392 10199,-396 "/>
+<text text-anchor="middle" x="10098" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">exportReferenceAsBed</text>
+</g>
+<!-- t36->t42 -->
+<g id="edge339" class="edge"><title>t36->t42</title>
+<path style="fill:none;stroke:gray;" d="M7517,-461C7894,-458 9471,-443 9987,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9987.4,-399.478 9997,-395 9986.7,-392.512 9987.4,-399.478"/>
+</g>
+<!-- t36->t181 -->
+<g id="edge355" class="edge"><title>t36->t181</title>
+<path style="fill:none;stroke:gray;" d="M7355,-460C7088,-453 6213,-430 5489,-418 5459,-417 3311,-411 3284,-396 3269,-386 3280,-369 3264,-360 3214,-330 1219,-355 1163,-338 1149,-334 1137,-326 1125,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="1127.1,-315.2 1117,-312 1122.9,-320.8 1127.1,-315.2"/>
+</g>
+<!-- t97 -->
+<g id="node160" class="node"><title>t97</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8589,-396 8473,-396 8469,-392 8469,-360 8585,-360 8589,-364 8589,-396"/>
+<polyline style="fill:none;stroke:green;" points="8585,-392 8469,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8585,-392 8585,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8585,-392 8589,-396 "/>
+<text text-anchor="middle" x="8529" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTSS</text>
+</g>
+<!-- t36->t97 -->
+<g id="edge143" class="edge"><title>t36->t97</title>
+<path style="fill:none;stroke:gray;" d="M7517,-459C7722,-450 8260,-426 8459,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="8459.88,-399.393 8469,-394 8458.51,-392.529 8459.88,-399.393"/>
+</g>
+<!-- t167 -->
+<g id="node167" class="node"><title>t167</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8889,-396 8611,-396 8607,-392 8607,-360 8885,-360 8889,-364 8889,-396"/>
+<polyline style="fill:none;stroke:green;" points="8885,-392 8607,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8885,-392 8885,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8885,-392 8889,-396 "/>
+<text text-anchor="middle" x="8748" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTSSIntervalAssociations</text>
+</g>
+<!-- t36->t167 -->
+<g id="edge149" class="edge"><title>t36->t167</title>
+<path style="fill:none;stroke:gray;" d="M7517,-459C7705,-452 8185,-431 8597,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="8597.4,-399.478 8607,-395 8596.7,-392.512 8597.4,-399.478"/>
+</g>
+<!-- t165 -->
+<g id="node171" class="node"><title>t165</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8451,-396 8203,-396 8199,-392 8199,-360 8447,-360 8451,-364 8451,-396"/>
+<polyline style="fill:none;stroke:green;" points="8447,-392 8199,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8447,-392 8447,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8447,-392 8451,-396 "/>
+<text text-anchor="middle" x="8325" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTSSIntervalDistance</text>
+</g>
+<!-- t36->t165 -->
+<g id="edge153" class="edge"><title>t36->t165</title>
+<path style="fill:none;stroke:gray;" d="M7517,-456C7655,-445 7942,-422 8185,-396 8186,-396 8188,-396 8189,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="8189.4,-398.478 8199,-394 8188.7,-391.512 8189.4,-398.478"/>
+</g>
+<!-- t106 -->
+<g id="node178" class="node"><title>t106</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9083,-396 8911,-396 8907,-392 8907,-360 9079,-360 9083,-364 9083,-396"/>
+<polyline style="fill:none;stroke:green;" points="9079,-392 8907,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9079,-392 9079,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9079,-392 9083,-396 "/>
+<text text-anchor="middle" x="8995" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildIntervalCounts</text>
+</g>
+<!-- t36->t106 -->
+<g id="edge159" class="edge"><title>t36->t106</title>
+<path style="fill:none;stroke:gray;" d="M7517,-460C7780,-452 8607,-428 8897,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="8897.4,-399.478 8907,-395 8896.7,-392.512 8897.4,-399.478"/>
+</g>
+<!-- t178 -->
+<g id="node186" class="node"><title>t178</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9651,-396 9457,-396 9453,-392 9453,-360 9647,-360 9651,-364 9651,-396"/>
+<polyline style="fill:none;stroke:green;" points="9647,-392 9453,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9647,-392 9647,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9647,-392 9651,-396 "/>
+<text text-anchor="middle" x="9552" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importOverlapRegions</text>
+</g>
+<!-- t36->t178 -->
+<g id="edge167" class="edge"><title>t36->t178</title>
+<path style="fill:none;stroke:gray;" d="M7517,-461C7840,-456 9040,-434 9443,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9443.4,-399.478 9453,-395 9442.7,-392.512 9443.4,-399.478"/>
+</g>
+<!-- t114 -->
+<g id="node195" class="node"><title>t114</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5889,-202 5683,-202 5679,-198 5679,-166 5885,-166 5889,-170 5889,-202"/>
+<polyline style="fill:none;stroke:green;" points="5885,-198 5679,-198 "/>
+<polyline style="fill:none;stroke:green;" points="5885,-198 5885,-166 "/>
+<polyline style="fill:none;stroke:green;" points="5885,-198 5889,-202 "/>
+<text text-anchor="middle" x="5784" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorDistance</text>
+</g>
+<!-- t36->t114 -->
+<g id="edge175" class="edge"><title>t36->t114</title>
+<path style="fill:none;stroke:gray;" d="M7426,-444C7419,-431 7409,-412 7401,-396 7394,-380 7401,-369 7386,-360 7344,-331 6509,-370 6469,-338 6438,-312 6480,-275 6449,-250 6409,-216 6030,-235 5977,-228 5938,-223 5895,-213 5860,-204"/>
+<polygon style="fill:gray;stroke:gray;" points="5860.49,-200.529 5850,-202 5859.12,-207.393 5860.49,-200.529"/>
+</g>
+<!-- t117 -->
+<g id="node197" class="node"><title>t117</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8161,-396 7925,-396 7921,-392 7921,-360 8157,-360 8161,-364 8161,-396"/>
+<polyline style="fill:none;stroke:green;" points="8157,-392 7921,-392 "/>
+<polyline style="fill:none;stroke:green;" points="8157,-392 8157,-360 "/>
+<polyline style="fill:none;stroke:green;" points="8157,-392 8161,-396 "/>
+<text text-anchor="middle" x="8041" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorArchitecture</text>
+</g>
+<!-- t36->t117 -->
+<g id="edge177" class="edge"><title>t36->t117</title>
+<path style="fill:none;stroke:gray;" d="M7517,-451C7618,-437 7791,-412 7911,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="7911.4,-399.478 7921,-395 7910.7,-392.512 7911.4,-399.478"/>
+</g>
+<!-- t130 -->
+<g id="node205" class="node"><title>t130</title>
+<polygon style="fill:#90ee90;stroke:green;" points="6728,-312 6482,-312 6478,-308 6478,-276 6724,-276 6728,-280 6728,-312"/>
+<polyline style="fill:none;stroke:green;" points="6724,-308 6478,-308 "/>
+<polyline style="fill:none;stroke:green;" points="6724,-308 6724,-276 "/>
+<polyline style="fill:none;stroke:green;" points="6724,-308 6728,-312 "/>
+<text text-anchor="middle" x="6603" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorPromotorsGO</text>
+</g>
+<!-- t36->t130 -->
+<g id="edge185" class="edge"><title>t36->t130</title>
+<path style="fill:none;stroke:gray;" d="M7436,-444C7434,-420 7428,-379 7401,-360 7372,-338 6774,-344 6737,-338 6711,-334 6682,-324 6657,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="6658.56,-312.863 6648,-312 6655.72,-319.26 6658.56,-312.863"/>
+</g>
+<!-- t131 -->
+<g id="node209" class="node"><title>t131</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7034,-312 6750,-312 6746,-308 6746,-276 7030,-276 7034,-280 7034,-312"/>
+<polyline style="fill:none;stroke:green;" points="7030,-308 6746,-308 "/>
+<polyline style="fill:none;stroke:green;" points="7030,-308 7030,-276 "/>
+<polyline style="fill:none;stroke:green;" points="7030,-308 7034,-312 "/>
+<text text-anchor="middle" x="6890" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorPromotorsGOSlim</text>
+</g>
+<!-- t36->t131 -->
+<g id="edge189" class="edge"><title>t36->t131</title>
+<path style="fill:none;stroke:gray;" d="M7517,-456C7642,-446 7868,-423 7892,-396 7903,-383 7904,-371 7892,-360 7860,-326 7090,-345 7043,-338 7013,-334 6979,-324 6951,-315"/>
+<polygon style="fill:gray;stroke:gray;" points="6951.58,-311.521 6941,-312 6949.57,-318.226 6951.58,-311.521"/>
+</g>
+<!-- t132 -->
+<g id="node213" class="node"><title>t132</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4810,-312 4562,-312 4558,-308 4558,-276 4806,-276 4810,-280 4810,-312"/>
+<polyline style="fill:none;stroke:green;" points="4806,-308 4558,-308 "/>
+<polyline style="fill:none;stroke:green;" points="4806,-308 4806,-276 "/>
+<polyline style="fill:none;stroke:green;" points="4806,-308 4810,-312 "/>
+<text text-anchor="middle" x="4684" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTerritoriesGO</text>
+</g>
+<!-- t36->t132 -->
+<g id="edge193" class="edge"><title>t36->t132</title>
+<path style="fill:none;stroke:gray;" d="M7355,-461C7056,-456 6030,-435 5973,-396 5959,-386 5973,-369 5958,-360 5906,-324 4882,-348 4819,-338 4792,-334 4763,-324 4738,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="4739.56,-312.863 4729,-312 4736.72,-319.26 4739.56,-312.863"/>
+</g>
+<!-- t133 -->
+<g id="node217" class="node"><title>t133</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5116,-312 4832,-312 4828,-308 4828,-276 5112,-276 5116,-280 5116,-312"/>
+<polyline style="fill:none;stroke:green;" points="5112,-308 4828,-308 "/>
+<polyline style="fill:none;stroke:green;" points="5112,-308 5112,-276 "/>
+<polyline style="fill:none;stroke:green;" points="5112,-308 5116,-312 "/>
+<text text-anchor="middle" x="4972" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTerritoriesGOSlim</text>
+</g>
+<!-- t36->t133 -->
+<g id="edge197" class="edge"><title>t36->t133</title>
+<path style="fill:none;stroke:gray;" d="M7355,-460C7060,-453 6058,-428 5998,-396 5981,-386 5991,-369 5973,-360 5932,-337 5172,-345 5125,-338 5095,-334 5061,-324 5033,-315"/>
+<polygon style="fill:gray;stroke:gray;" points="5033.58,-311.521 5023,-312 5031.57,-318.226 5033.58,-311.521"/>
+</g>
+<!-- t118 -->
+<g id="node225" class="node"><title>t118</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7651,-396 7459,-396 7455,-392 7455,-360 7647,-360 7651,-364 7651,-396"/>
+<polyline style="fill:none;stroke:green;" points="7647,-392 7455,-392 "/>
+<polyline style="fill:none;stroke:green;" points="7647,-392 7647,-360 "/>
+<polyline style="fill:none;stroke:green;" points="7647,-392 7651,-396 "/>
+<text text-anchor="middle" x="7553" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorGWAS</text>
+</g>
+<!-- t36->t118 -->
+<g id="edge205" class="edge"><title>t36->t118</title>
+<path style="fill:none;stroke:gray;" d="M7461,-444C7477,-432 7500,-416 7519,-402"/>
+<polygon style="fill:gray;stroke:gray;" points="7521.1,-404.8 7527,-396 7516.9,-399.2 7521.1,-404.8"/>
+</g>
+<!-- t119 -->
+<g id="node229" class="node"><title>t119</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7883,-396 7673,-396 7669,-392 7669,-360 7879,-360 7883,-364 7883,-396"/>
+<polyline style="fill:none;stroke:green;" points="7879,-392 7669,-392 "/>
+<polyline style="fill:none;stroke:green;" points="7879,-392 7879,-360 "/>
+<polyline style="fill:none;stroke:green;" points="7879,-392 7883,-396 "/>
+<text text-anchor="middle" x="7776" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorSelection</text>
+</g>
+<!-- t36->t119 -->
+<g id="edge209" class="edge"><title>t36->t119</title>
+<path style="fill:none;stroke:gray;" d="M7509,-444C7563,-431 7636,-412 7693,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="7693.88,-401.393 7703,-396 7692.51,-394.529 7693.88,-401.393"/>
+</g>
+<!-- t147 -->
+<g id="node249" class="node"><title>t147</title>
+<polygon style="fill:none;stroke:blue;" points="5737,-396 5417,-396 5413,-392 5413,-360 5733,-360 5737,-364 5737,-396"/>
+<polyline style="fill:none;stroke:blue;" points="5733,-392 5413,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="5733,-392 5733,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="5733,-392 5737,-396 "/>
+<text text-anchor="middle" x="5575" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorArchitectureWithMotif</text>
+</g>
+<!-- t36->t147 -->
+<g id="edge233" class="edge"><title>t36->t147</title>
+<path style="fill:none;stroke:gray;" d="M7355,-461C7112,-456 6372,-440 5747,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="5747.3,-392.512 5737,-395 5746.6,-399.478 5747.3,-392.512"/>
+</g>
+<!-- t148 -->
+<g id="node253" class="node"><title>t148</title>
+<polygon style="fill:none;stroke:blue;" points="5395,-396 5049,-396 5045,-392 5045,-360 5391,-360 5395,-364 5395,-396"/>
+<polyline style="fill:none;stroke:blue;" points="5391,-392 5045,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="5391,-392 5391,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="5391,-392 5395,-396 "/>
+<text text-anchor="middle" x="5220" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorArchitectureWithoutMotif</text>
+</g>
+<!-- t36->t148 -->
+<g id="edge237" class="edge"><title>t36->t148</title>
+<path style="fill:none;stroke:gray;" d="M7355,-461C7081,-458 6170,-445 5405,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="5405.3,-392.512 5395,-395 5404.6,-399.478 5405.3,-392.512"/>
+</g>
+<!-- t139 -->
+<g id="node284" class="node"><title>t139</title>
+<polygon style="fill:none;stroke:blue;" points="6771,-396 6573,-396 6569,-392 6569,-360 6767,-360 6771,-364 6771,-396"/>
+<polyline style="fill:none;stroke:blue;" points="6767,-392 6569,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="6767,-392 6767,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="6767,-392 6771,-396 "/>
+<text text-anchor="middle" x="6670" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIGO</text>
+</g>
+<!-- t36->t139 -->
+<g id="edge277" class="edge"><title>t36->t139</title>
+<path style="fill:none;stroke:gray;" d="M7355,-455C7233,-445 6995,-424 6781,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6781.3,-392.512 6771,-395 6780.6,-399.478 6781.3,-392.512"/>
+</g>
+<!-- t138 -->
+<g id="node288" class="node"><title>t138</title>
+<polygon style="fill:none;stroke:blue;" points="6247,-396 6011,-396 6007,-392 6007,-360 6243,-360 6247,-364 6247,-396"/>
+<polyline style="fill:none;stroke:blue;" points="6243,-392 6007,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="6243,-392 6243,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="6243,-392 6247,-396 "/>
+<text text-anchor="middle" x="6127" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIGOSlim</text>
+</g>
+<!-- t36->t138 -->
+<g id="edge281" class="edge"><title>t36->t138</title>
+<path style="fill:none;stroke:gray;" d="M7355,-460C7165,-454 6676,-436 6257,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6257.3,-392.512 6247,-395 6256.6,-399.478 6257.3,-392.512"/>
+</g>
+<!-- t141 -->
+<g id="node292" class="node"><title>t141</title>
+<polygon style="fill:none;stroke:blue;" points="7055,-396 6793,-396 6789,-392 6789,-360 7051,-360 7055,-364 7055,-396"/>
+<polyline style="fill:none;stroke:blue;" points="7051,-392 6789,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="7051,-392 7051,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="7051,-392 7055,-396 "/>
+<text text-anchor="middle" x="6922" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIOverlapGO</text>
+</g>
+<!-- t36->t141 -->
+<g id="edge285" class="edge"><title>t36->t141</title>
+<path style="fill:none;stroke:gray;" d="M7355,-449C7271,-435 7138,-414 7042,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="7042.49,-394.529 7032,-396 7041.12,-401.393 7042.49,-394.529"/>
+</g>
+<!-- t140 -->
+<g id="node296" class="node"><title>t140</title>
+<polygon style="fill:none;stroke:blue;" points="7377,-396 7077,-396 7073,-392 7073,-360 7373,-360 7377,-364 7377,-396"/>
+<polyline style="fill:none;stroke:blue;" points="7373,-392 7073,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="7373,-392 7373,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="7373,-392 7377,-396 "/>
+<text text-anchor="middle" x="7225" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorROIOverlapGOSlim</text>
+</g>
+<!-- t36->t140 -->
+<g id="edge289" class="edge"><title>t36->t140</title>
+<path style="fill:none;stroke:gray;" d="M7391,-444C7359,-431 7314,-413 7280,-400"/>
+<polygon style="fill:gray;stroke:gray;" points="7281.56,-396.863 7271,-396 7278.72,-403.26 7281.56,-396.863"/>
+</g>
+<!-- t37 -->
+<g id="node5" class="node"><title>t37</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2618,-480 2498,-480 2494,-476 2494,-444 2614,-444 2618,-448 2618,-480"/>
+<polyline style="fill:none;stroke:green;" points="2614,-476 2494,-476 "/>
+<polyline style="fill:none;stroke:green;" points="2614,-476 2614,-444 "/>
+<polyline style="fill:none;stroke:green;" points="2614,-476 2618,-480 "/>
+<text text-anchor="middle" x="2556" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildGeneSet</text>
+</g>
+<!-- t38 -->
+<g id="node27" class="node"><title>t38</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2631,-396 2485,-396 2481,-392 2481,-360 2627,-360 2631,-364 2631,-396"/>
+<polyline style="fill:none;stroke:green;" points="2627,-392 2481,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2627,-392 2627,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2627,-392 2631,-396 "/>
+<text text-anchor="middle" x="2556" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importGeneStats</text>
+</g>
+<!-- t37->t38 -->
+<g id="edge15" class="edge"><title>t37->t38</title>
+<path style="fill:none;stroke:gray;" d="M2556,-444C2556,-433 2556,-419 2556,-406"/>
+<polygon style="fill:gray;stroke:gray;" points="2559.5,-406 2556,-396 2552.5,-406 2559.5,-406"/>
+</g>
+<!-- t37->t181 -->
+<g id="edge353" class="edge"><title>t37->t181</title>
+<path style="fill:none;stroke:gray;" d="M2528,-444C2510,-432 2488,-415 2472,-396 2461,-382 2468,-369 2452,-360 2390,-322 1216,-363 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t176 -->
+<g id="node188" class="node"><title>t176</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8328,-312 7984,-312 7980,-308 7980,-276 8324,-276 8328,-280 8328,-312"/>
+<polyline style="fill:none;stroke:green;" points="8324,-308 7980,-308 "/>
+<polyline style="fill:none;stroke:green;" points="8324,-308 8324,-276 "/>
+<polyline style="fill:none;stroke:green;" points="8324,-308 8328,-312 "/>
+<text text-anchor="middle" x="8154" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importOverlapRegionsOfInterestEnsembl</text>
+</g>
+<!-- t37->t176 -->
+<g id="edge169" class="edge"><title>t37->t176</title>
+<path style="fill:none;stroke:gray;" d="M2618,-450C2679,-440 2776,-424 2860,-418 2986,-408 7265,-429 7386,-396 7416,-387 7417,-369 7446,-360 7546,-330 7809,-349 7911,-338 7960,-332 8015,-323 8059,-314"/>
+<polygon style="fill:gray;stroke:gray;" points="8059.88,-317.393 8069,-312 8058.51,-310.529 8059.88,-317.393"/>
+</g>
+<!-- t32 -->
+<g id="node6" class="node"><title>t32</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4626,-590 4484,-590 4480,-586 4480,-554 4622,-554 4626,-558 4626,-590"/>
+<polyline style="fill:none;stroke:green;" points="4622,-586 4480,-586 "/>
+<polyline style="fill:none;stroke:green;" points="4622,-586 4622,-554 "/>
+<polyline style="fill:none;stroke:green;" points="4622,-586 4626,-590 "/>
+<text text-anchor="middle" x="4553" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildTranscripts</text>
+</g>
+<!-- t40 -->
+<g id="node7" class="node"><title>t40</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4662,-480 4472,-480 4468,-476 4468,-444 4658,-444 4662,-448 4662,-480"/>
+<polyline style="fill:none;stroke:green;" points="4658,-476 4468,-476 "/>
+<polyline style="fill:none;stroke:green;" points="4658,-476 4658,-444 "/>
+<polyline style="fill:none;stroke:green;" points="4658,-476 4662,-480 "/>
+<text text-anchor="middle" x="4565" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildPromotorRegions</text>
+</g>
+<!-- t32->t40 -->
+<g id="edge3" class="edge"><title>t32->t40</title>
+<path style="fill:none;stroke:gray;" d="M4555,-554C4557,-537 4560,-511 4562,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="4565.49,-490.299 4563,-480 4558.52,-489.602 4565.49,-490.299"/>
+</g>
+<!-- t41 -->
+<g id="node9" class="node"><title>t41</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4836,-480 4684,-480 4680,-476 4680,-444 4832,-444 4836,-448 4836,-480"/>
+<polyline style="fill:none;stroke:green;" points="4832,-476 4680,-476 "/>
+<polyline style="fill:none;stroke:green;" points="4832,-476 4832,-444 "/>
+<polyline style="fill:none;stroke:green;" points="4832,-476 4836,-480 "/>
+<text text-anchor="middle" x="4758" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildTSSRegions</text>
+</g>
+<!-- t32->t41 -->
+<g id="edge5" class="edge"><title>t32->t41</title>
+<path style="fill:none;stroke:gray;" d="M4587,-554C4622,-535 4677,-506 4715,-485"/>
+<polygon style="fill:gray;stroke:gray;" points="4716.96,-487.916 4724,-480 4713.56,-481.797 4716.96,-487.916"/>
+</g>
+<!-- t34 -->
+<g id="node12" class="node"><title>t34</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3027,-480 2873,-480 2869,-476 2869,-444 3023,-444 3027,-448 3027,-480"/>
+<polyline style="fill:none;stroke:green;" points="3023,-476 2869,-476 "/>
+<polyline style="fill:none;stroke:green;" points="3023,-476 3023,-444 "/>
+<polyline style="fill:none;stroke:green;" points="3023,-476 3027,-480 "/>
+<text text-anchor="middle" x="2948" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importTranscripts</text>
+</g>
+<!-- t32->t34 -->
+<g id="edge7" class="edge"><title>t32->t34</title>
+<path style="fill:none;stroke:gray;" d="M4480,-565C4372,-555 4166,-536 3989,-528 3884,-522 3139,-531 3036,-506 3019,-502 3001,-494 2986,-485"/>
+<polygon style="fill:gray;stroke:gray;" points="2987.44,-481.797 2977,-480 2984.04,-487.916 2987.44,-481.797"/>
+</g>
+<!-- t32->t181 -->
+<g id="edge363" class="edge"><title>t32->t181</title>
+<path style="fill:none;stroke:gray;" d="M4480,-565C4372,-555 4166,-536 3989,-528 3864,-521 2982,-537 2860,-506 2760,-479 2725,-472 2655,-396 2644,-383 2655,-369 2640,-360 2572,-313 1228,-366 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t40->t42 -->
+<g id="edge337" class="edge"><title>t40->t42</title>
+<path style="fill:none;stroke:gray;" d="M4599,-444C4619,-434 4646,-423 4671,-418 4814,-388 9690,-407 9987,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9987.4,-399.478 9997,-395 9986.7,-392.512 9987.4,-399.478"/>
+</g>
+<!-- t40->t181 -->
+<g id="edge347" class="edge"><title>t40->t181</title>
+<path style="fill:none;stroke:gray;" d="M4531,-444C4511,-434 4484,-423 4459,-418 4374,-400 2964,-434 2885,-396 2867,-386 2876,-369 2857,-360 2773,-317 1240,-370 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t41->t42 -->
+<g id="edge11" class="edge"><title>t41->t42</title>
+<path style="fill:none;stroke:gray;" d="M4836,-454C4943,-443 5139,-425 5307,-418 5824,-395 9426,-438 9987,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9987.4,-399.478 9997,-395 9986.7,-392.512 9987.4,-399.478"/>
+</g>
+<!-- t41->t181 -->
+<g id="edge345" class="edge"><title>t41->t181</title>
+<path style="fill:none;stroke:gray;" d="M4731,-444C4715,-434 4693,-423 4671,-418 4624,-406 2943,-421 2900,-396 2885,-386 2896,-369 2880,-360 2798,-310 1241,-370 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t35 -->
+<g id="node11" class="node"><title>t35</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2443,-396 2327,-396 2323,-392 2323,-360 2439,-360 2443,-364 2443,-396"/>
+<polyline style="fill:none;stroke:green;" points="2439,-392 2323,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2439,-392 2439,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2439,-392 2443,-396 "/>
+<text text-anchor="middle" x="2383" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildRepeats</text>
+</g>
+<!-- t35->t181 -->
+<g id="edge357" class="edge"><title>t35->t181</title>
+<path style="fill:none;stroke:gray;" d="M2323,-362C2318,-361 2314,-361 2309,-360 2246,-351 1209,-360 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t34->t181 -->
+<g id="edge359" class="edge"><title>t34->t181</title>
+<path style="fill:none;stroke:gray;" d="M2923,-444C2906,-431 2883,-413 2865,-396 2851,-381 2854,-368 2835,-360 2751,-319 1238,-369 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t8 -->
+<g id="node14" class="node"><title>t8</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3105,-590 2895,-590 2891,-586 2891,-554 3101,-554 3105,-558 3105,-590"/>
+<polyline style="fill:none;stroke:green;" points="3101,-586 2891,-586 "/>
+<polyline style="fill:none;stroke:green;" points="3101,-586 3101,-554 "/>
+<polyline style="fill:none;stroke:green;" points="3101,-586 3105,-590 "/>
+<text text-anchor="middle" x="2998" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importRegionsOfInterest</text>
+</g>
+<!-- t14 -->
+<g id="node17" class="node"><title>t14</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3257,-480 3049,-480 3045,-476 3045,-444 3253,-444 3257,-448 3257,-480"/>
+<polyline style="fill:none;stroke:green;" points="3253,-476 3045,-476 "/>
+<polyline style="fill:none;stroke:green;" points="3253,-476 3253,-444 "/>
+<polyline style="fill:none;stroke:green;" points="3253,-476 3257,-480 "/>
+<text text-anchor="middle" x="3151" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">exportRegionsOfInterest</text>
+</g>
+<!-- t8->t14 -->
+<g id="edge335" class="edge"><title>t8->t14</title>
+<path style="fill:none;stroke:gray;" d="M3023,-554C3049,-536 3090,-507 3118,-486"/>
+<polygon style="fill:gray;stroke:gray;" points="3120.1,-488.8 3126,-480 3115.9,-483.2 3120.1,-488.8"/>
+</g>
+<!-- t12 -->
+<g id="node30" class="node"><title>t12</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2310,-480 2050,-480 2046,-476 2046,-444 2306,-444 2310,-448 2310,-480"/>
+<polyline style="fill:none;stroke:green;" points="2306,-476 2046,-476 "/>
+<polyline style="fill:none;stroke:green;" points="2306,-476 2306,-444 "/>
+<polyline style="fill:none;stroke:green;" points="2306,-476 2310,-480 "/>
+<text text-anchor="middle" x="2178" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importRegionsOfInterestGenes</text>
+</g>
+<!-- t8->t12 -->
+<g id="edge343" class="edge"><title>t8->t12</title>
+<path style="fill:none;stroke:gray;" d="M2891,-560C2805,-551 2683,-538 2576,-528 2465,-517 2435,-527 2324,-506 2296,-501 2265,-491 2239,-483"/>
+<polygon style="fill:gray;stroke:gray;" points="2239.58,-479.521 2229,-480 2237.57,-486.226 2239.58,-479.521"/>
+</g>
+<!-- t8->t181 -->
+<g id="edge373" class="edge"><title>t8->t181</title>
+<path style="fill:none;stroke:gray;" d="M2933,-554C2907,-544 2879,-528 2860,-506 2850,-493 2834,-370 2820,-360 2784,-331 1194,-353 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t8->t178 -->
+<g id="edge483" class="edge"><title>t8->t178</title>
+<path style="fill:none;stroke:gray;" d="M3105,-564C3249,-553 3512,-535 3737,-528 5421,-473 5843,-559 7526,-506 8377,-479 8594,-490 9443,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9443.4,-399.478 9453,-395 9442.7,-392.512 9443.4,-399.478"/>
+</g>
+<!-- t10 -->
+<g id="node15" class="node"><title>t10</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2110.05,-559.794 2178,-532.5 2245.95,-559.794 2245.89,-603.956 2110.11,-603.956 2110.05,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="2106.05,-557.091 2178,-528.189 2249.95,-557.091 2249.88,-607.956 2106.12,-607.956 2106.05,-557.091"/>
+<text text-anchor="middle" x="2178" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importGWAS</text>
+</g>
+<!-- t10->t14 -->
+<g id="edge333" class="edge"><title>t10->t14</title>
+<path style="fill:none;stroke:gray;" d="M2239,-553C2271,-543 2311,-533 2348,-528 2500,-506 2887,-536 3036,-506 3059,-502 3083,-493 3104,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="3105.28,-487.26 3113,-480 3102.44,-480.863 3105.28,-487.26"/>
+</g>
+<!-- t10->t12 -->
+<g id="edge341" class="edge"><title>t10->t12</title>
+<path style="fill:none;stroke:gray;" d="M2178,-528C2178,-515 2178,-502 2178,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="2181.5,-490 2178,-480 2174.5,-490 2181.5,-490"/>
+</g>
+<!-- t10->t181 -->
+<g id="edge371" class="edge"><title>t10->t181</title>
+<path style="fill:none;stroke:gray;" d="M2119,-552C2092,-541 2061,-525 2037,-506 1990,-467 1988,-447 1954,-396 1945,-380 1950,-369 1934,-360 1860,-314 1232,-368 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t11 -->
+<g id="node16" class="node"><title>t11</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2360.83,-559.794 2462,-532.5 2563.17,-559.794 2563.08,-603.956 2360.92,-603.956 2360.83,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="2356.84,-556.728 2462,-528.358 2567.16,-556.728 2567.05,-607.956 2356.95,-607.956 2356.84,-556.728"/>
+<text text-anchor="middle" x="2462" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importMergedGWAS</text>
+</g>
+<!-- t11->t14 -->
+<g id="edge9" class="edge"><title>t11->t14</title>
+<path style="fill:none;stroke:gray;" d="M2567,-561C2720,-546 2992,-517 3036,-506 3058,-500 3082,-492 3102,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="3103.28,-487.26 3111,-480 3100.44,-480.863 3103.28,-487.26"/>
+</g>
+<!-- t11->t12 -->
+<g id="edge17" class="edge"><title>t11->t12</title>
+<path style="fill:none;stroke:gray;" d="M2395,-546C2346,-527 2280,-502 2234,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="2235.56,-480.863 2225,-480 2232.72,-487.26 2235.56,-480.863"/>
+</g>
+<!-- t14->t181 -->
+<g id="edge365" class="edge"><title>t14->t181</title>
+<path style="fill:none;stroke:gray;" d="M3114,-444C3092,-434 3063,-423 3036,-418 2956,-401 2373,-431 2299,-396 2281,-386 2288,-369 2269,-360 2157,-306 1267,-380 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t121 -->
+<g id="node221" class="node"><title>t121</title>
+<polygon style="fill:none;stroke:blue;" points="6551,-396 6269,-396 6265,-392 6265,-360 6547,-360 6551,-364 6551,-396"/>
+<polyline style="fill:none;stroke:blue;" points="6547,-392 6265,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="6547,-392 6547,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="6547,-392 6551,-396 "/>
+<text text-anchor="middle" x="6408" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterest</text>
+</g>
+<!-- t14->t121 -->
+<g id="edge201" class="edge"><title>t14->t121</title>
+<path style="fill:none;stroke:gray;" d="M3257,-452C3374,-441 3569,-424 3736,-418 4293,-394 5682,-431 6255,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6255.4,-399.478 6265,-395 6254.7,-392.512 6255.4,-399.478"/>
+</g>
+<!-- t160 -->
+<g id="node236" class="node"><title>t160</title>
+<polygon style="fill:none;stroke:blue;" points="3609,-396 3297,-396 3293,-392 3293,-360 3605,-360 3609,-364 3609,-396"/>
+<polyline style="fill:none;stroke:blue;" points="3605,-392 3293,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="3605,-392 3605,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="3605,-392 3609,-396 "/>
+<text text-anchor="middle" x="3451" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestTop</text>
+</g>
+<!-- t14->t160 -->
+<g id="edge217" class="edge"><title>t14->t160</title>
+<path style="fill:none;stroke:gray;" d="M3215,-444C3263,-431 3327,-413 3376,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="3377.43,-402.226 3386,-396 3375.42,-395.521 3377.43,-402.226"/>
+</g>
+<!-- t161 -->
+<g id="node240" class="node"><title>t161</title>
+<polygon style="fill:none;stroke:blue;" points="3255,-396 2913,-396 2909,-392 2909,-360 3251,-360 3255,-364 3255,-396"/>
+<polyline style="fill:none;stroke:blue;" points="3251,-392 2909,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="3251,-392 3251,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="3251,-392 3255,-396 "/>
+<text text-anchor="middle" x="3082" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestBottom</text>
+</g>
+<!-- t14->t161 -->
+<g id="edge221" class="edge"><title>t14->t161</title>
+<path style="fill:none;stroke:gray;" d="M3136,-444C3127,-432 3114,-417 3103,-404"/>
+<polygon style="fill:gray;stroke:gray;" points="3105.8,-401.9 3097,-396 3100.2,-406.1 3105.8,-401.9"/>
+</g>
+<!-- t151 -->
+<g id="node265" class="node"><title>t151</title>
+<polygon style="fill:none;stroke:blue;" points="4411,-396 4045,-396 4041,-392 4041,-360 4407,-360 4411,-364 4411,-396"/>
+<polyline style="fill:none;stroke:blue;" points="4407,-392 4041,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="4407,-392 4407,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="4407,-392 4411,-396 "/>
+<text text-anchor="middle" x="4226" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestWithMotif</text>
+</g>
+<!-- t14->t151 -->
+<g id="edge249" class="edge"><title>t14->t151</title>
+<path style="fill:none;stroke:gray;" d="M3257,-454C3374,-445 3569,-430 3736,-418 3865,-408 3899,-406 4031,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4031.4,-399.478 4041,-395 4030.7,-392.512 4031.4,-399.478"/>
+</g>
+<!-- t152 -->
+<g id="node269" class="node"><title>t152</title>
+<polygon style="fill:none;stroke:blue;" points="4023,-396 3631,-396 3627,-392 3627,-360 4019,-360 4023,-364 4023,-396"/>
+<polyline style="fill:none;stroke:blue;" points="4019,-392 3627,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="4019,-392 4019,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="4019,-392 4023,-396 "/>
+<text text-anchor="middle" x="3825" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorRegionsOfInterestWithoutMotif</text>
+</g>
+<!-- t14->t152 -->
+<g id="edge253" class="edge"><title>t14->t152</title>
+<path style="fill:none;stroke:gray;" d="M3257,-449C3368,-435 3543,-413 3670,-397"/>
+<polygon style="fill:gray;stroke:gray;" points="3670.4,-400.478 3680,-396 3669.7,-393.512 3670.4,-400.478"/>
+</g>
+<!-- t13 -->
+<g id="node19" class="node"><title>t13</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3749.02,-449.794 3854,-422.5 3958.98,-449.794 3958.88,-493.956 3749.12,-493.956 3749.02,-449.794"/>
+<polygon style="fill:none;stroke:green;" points="3745.03,-446.699 3854,-418.368 3962.97,-446.699 3962.85,-497.956 3745.15,-497.956 3745.03,-446.699"/>
+<text text-anchor="middle" x="3854" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importSNPsOfInterest</text>
+</g>
+<!-- t13->t181 -->
+<g id="edge367" class="edge"><title>t13->t181</title>
+<path style="fill:none;stroke:gray;" d="M3745,-452C3627,-442 3433,-427 3266,-418 3201,-414 2738,-427 2680,-396 2663,-386 2673,-369 2655,-360 2582,-320 1229,-366 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t179 -->
+<g id="node182" class="node"><title>t179</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9435,-396 9269,-396 9265,-392 9265,-360 9431,-360 9435,-364 9435,-396"/>
+<polyline style="fill:none;stroke:green;" points="9431,-392 9265,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9431,-392 9431,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9431,-392 9435,-396 "/>
+<text text-anchor="middle" x="9350" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildSNPCoverage</text>
+</g>
+<!-- t13->t179 -->
+<g id="edge481" class="edge"><title>t13->t179</title>
+<path style="fill:none;stroke:gray;" d="M3959,-446C4029,-435 4124,-423 4207,-418 4764,-382 8652,-449 9255,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="9255.4,-399.478 9265,-395 9254.7,-392.512 9255.4,-399.478"/>
+</g>
+<!-- t42->t181 -->
+<g id="edge19" class="edge"><title>t42->t181</title>
+<path style="fill:none;stroke:gray;" d="M9997,-361C9994,-361 9991,-360 9988,-360 9866,-349 1281,-373 1163,-338 1149,-334 1137,-326 1125,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="1127.1,-315.2 1117,-312 1122.9,-320.8 1127.1,-315.2"/>
+</g>
+<!-- t60 -->
+<g id="node141" class="node"><title>t60</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10088.2,-281.794 10154,-254.5 10219.8,-281.794 10219.7,-325.956 10088.3,-325.956 10088.2,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="10084.2,-279.124 10154,-250.169 10223.8,-279.124 10223.7,-329.956 10084.3,-329.956 10084.2,-279.124"/>
+<text text-anchor="middle" x="10154" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">buildOverlap</text>
+</g>
+<!-- t42->t60 -->
+<g id="edge125" class="edge"><title>t42->t60</title>
+<path style="fill:none;stroke:gray;" d="M10110,-360C10114,-354 10119,-346 10124,-339"/>
+<polygon style="fill:gray;stroke:gray;" points="10127.4,-340.262 10130,-330 10121.5,-336.379 10127.4,-340.262"/>
+</g>
+<!-- t62 -->
+<g id="node145" class="node"><title>t62</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10693.9,-281.794 10788,-254.5 10882.1,-281.794 10882,-325.956 10694,-325.956 10693.9,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="10689.9,-278.787 10788,-250.337 10886.1,-278.787 10886,-329.956 10690,-329.956 10689.9,-278.787"/>
+<text text-anchor="middle" x="10788" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">buildUCSCOverlap</text>
+</g>
+<!-- t42->t62 -->
+<g id="edge129" class="edge"><title>t42->t62</title>
+<path style="fill:none;stroke:gray;" d="M10199,-368C10234,-365 10273,-362 10308,-360 10474,-348 10520,-374 10681,-338 10687,-337 10693,-335 10698,-333"/>
+<polygon style="fill:gray;stroke:gray;" points="10699.4,-336.226 10708,-330 10697.4,-329.521 10699.4,-336.226"/>
+</g>
+<!-- t3 -->
+<g id="node22" class="node"><title>t3</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3980,-590 3750,-590 3746,-586 3746,-554 3976,-554 3980,-558 3980,-590"/>
+<polyline style="fill:none;stroke:green;" points="3976,-586 3746,-586 "/>
+<polyline style="fill:none;stroke:green;" points="3976,-586 3976,-554 "/>
+<polyline style="fill:none;stroke:green;" points="3976,-586 3980,-590 "/>
+<text text-anchor="middle" x="3863" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importUCSCEncodeTracks</text>
+</g>
+<!-- t5 -->
+<g id="node23" class="node"><title>t5</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4450,-480 4220,-480 4216,-476 4216,-444 4446,-444 4450,-448 4450,-480"/>
+<polyline style="fill:none;stroke:green;" points="4446,-476 4216,-476 "/>
+<polyline style="fill:none;stroke:green;" points="4446,-476 4446,-444 "/>
+<polyline style="fill:none;stroke:green;" points="4446,-476 4450,-480 "/>
+<text text-anchor="middle" x="4333" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">exportUCSCEncodeTracks</text>
+</g>
+<!-- t3->t5 -->
+<g id="edge13" class="edge"><title>t3->t5</title>
+<path style="fill:none;stroke:gray;" d="M3940,-554C4024,-535 4159,-503 4246,-482"/>
+<polygon style="fill:gray;stroke:gray;" points="4246.88,-485.393 4256,-480 4245.51,-478.529 4246.88,-485.393"/>
+</g>
+<!-- t3->t181 -->
+<g id="edge381" class="edge"><title>t3->t181</title>
+<path style="fill:none;stroke:gray;" d="M3746,-564C3599,-554 3338,-538 3114,-528 3045,-524 2553,-524 2485,-506 2405,-484 2395,-454 2319,-418 2298,-407 2288,-411 2269,-396 2254,-383 2262,-369 2244,-360 2191,-331 1208,-358 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t5->t181 -->
+<g id="edge379" class="edge"><title>t5->t181</title>
+<path style="fill:none;stroke:gray;" d="M4216,-444C4145,-434 4054,-423 3972,-418 3927,-414 2355,-418 2314,-396 2298,-386 2309,-369 2292,-360 2237,-328 1210,-359 1150,-338 1139,-334 1129,-327 1120,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1122.55,-316.596 1113,-312 1117.6,-321.546 1122.55,-316.596"/>
+</g>
+<!-- t5->t62 -->
+<g id="edge475" class="edge"><title>t5->t62</title>
+<path style="fill:none;stroke:gray;" d="M4374,-444C4398,-434 4430,-423 4459,-418 4502,-410 10633,-419 10668,-396 10683,-386 10672,-373 10683,-360 10691,-351 10699,-343 10709,-336"/>
+<polygon style="fill:gray;stroke:gray;" points="10711.1,-338.8 10717,-330 10706.9,-333.2 10711.1,-338.8"/>
+</g>
+<!-- t120 -->
+<g id="node201" class="node"><title>t120</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5949,-396 5759,-396 5755,-392 5755,-360 5945,-360 5949,-364 5949,-396"/>
+<polyline style="fill:none;stroke:green;" points="5945,-392 5755,-392 "/>
+<polyline style="fill:none;stroke:green;" points="5945,-392 5945,-360 "/>
+<polyline style="fill:none;stroke:green;" points="5945,-392 5949,-396 "/>
+<text text-anchor="middle" x="5852" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTracks</text>
+</g>
+<!-- t5->t120 -->
+<g id="edge181" class="edge"><title>t5->t120</title>
+<path style="fill:none;stroke:gray;" d="M4374,-444C4398,-434 4430,-423 4459,-418 4597,-393 5564,-408 5745,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="5745.4,-399.478 5755,-395 5744.7,-392.512 5745.4,-399.478"/>
+</g>
+<!-- t149 -->
+<g id="node257" class="node"><title>t149</title>
+<polygon style="fill:none;stroke:blue;" points="4707,-396 4433,-396 4429,-392 4429,-360 4703,-360 4707,-364 4707,-396"/>
+<polyline style="fill:none;stroke:blue;" points="4703,-392 4429,-392 "/>
+<polyline style="fill:none;stroke:blue;" points="4703,-392 4703,-360 "/>
+<polyline style="fill:none;stroke:blue;" points="4703,-392 4707,-396 "/>
+<text text-anchor="middle" x="4568" y="-371.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">makeAnnotatorTracksWithMotif</text>
+</g>
+<!-- t5->t149 -->
+<g id="edge241" class="edge"><title>t5->t149</title>
+<path style="fill:none;stroke:gray;" d="M4383,-444C4419,-431 4469,-413 4507,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="4508.43,-402.226 4517,-396 4506.42,-395.521 4508.43,-402.226"/>
+</g>
+<!-- t150 -->
+<g id="node261" class="node"><title>t150</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5027,-396 4729,-396 4725,-392 4725,-360 5023,-360 5027,-364 5027,-396"/>
+<polyline style="fill:none;stroke:green;" points="5023,-392 4725,-392 "/>
+<polyline style="fill:none;stroke:green;" points="5023,-392 5023,-360 "/>
+<polyline style="fill:none;stroke:green;" points="5023,-392 5027,-396 "/>
+<text text-anchor="middle" x="4876" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeAnnotatorTracksWithoutMotif</text>
+</g>
+<!-- t5->t150 -->
+<g id="edge245" class="edge"><title>t5->t150</title>
+<path style="fill:none;stroke:gray;" d="M4376,-444C4400,-435 4431,-424 4459,-418 4569,-395 4600,-406 4715,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4715.4,-399.478 4725,-395 4714.7,-392.512 4715.4,-399.478"/>
+</g>
+<!-- t6 -->
+<g id="node25" class="node"><title>t6</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1925,-396 1685,-396 1681,-392 1681,-360 1921,-360 1925,-364 1925,-396"/>
+<polyline style="fill:none;stroke:green;" points="1921,-392 1681,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1921,-392 1921,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1921,-392 1925,-396 "/>
+<text text-anchor="middle" x="1803" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importTranscriptInformation</text>
+</g>
+<!-- t6->t181 -->
+<g id="edge377" class="edge"><title>t6->t181</title>
+<path style="fill:none;stroke:gray;" d="M1681,-361C1678,-361 1675,-360 1672,-360 1614,-354 1203,-359 1148,-338 1137,-333 1128,-326 1119,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.55,-316.596 1112,-312 1116.6,-321.546 1121.55,-316.596"/>
+</g>
+<!-- t7 -->
+<g id="node26" class="node"><title>t7</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1663,-396 1463,-396 1459,-392 1459,-360 1659,-360 1663,-364 1663,-396"/>
+<polyline style="fill:none;stroke:green;" points="1659,-392 1459,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1659,-392 1659,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1659,-392 1663,-396 "/>
+<text text-anchor="middle" x="1561" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importGeneInformation</text>
+</g>
+<!-- t7->t181 -->
+<g id="edge375" class="edge"><title>t7->t181</title>
+<path style="fill:none;stroke:gray;" d="M1459,-361C1454,-361 1450,-360 1445,-360 1380,-352 1210,-363 1148,-338 1137,-333 1128,-326 1119,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.55,-316.596 1112,-312 1116.6,-321.546 1121.55,-316.596"/>
+</g>
+<!-- t38->t181 -->
+<g id="edge351" class="edge"><title>t38->t181</title>
+<path style="fill:none;stroke:gray;" d="M2481,-362C2476,-361 2471,-360 2467,-360 2395,-352 1217,-363 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t33 -->
+<g id="node29" class="node"><title>t33</title>
+<polygon style="fill:#90ee90;stroke:green;" points="2225,-396 1967,-396 1963,-392 1963,-360 2221,-360 2225,-364 2225,-396"/>
+<polyline style="fill:none;stroke:green;" points="2221,-392 1963,-392 "/>
+<polyline style="fill:none;stroke:green;" points="2221,-392 2221,-360 "/>
+<polyline style="fill:none;stroke:green;" points="2221,-392 2225,-396 "/>
+<text text-anchor="middle" x="2094" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">buildAnnotatorGeneTerritories</text>
+</g>
+<!-- t33->t181 -->
+<g id="edge361" class="edge"><title>t33->t181</title>
+<path style="fill:none;stroke:gray;" d="M1963,-361C1958,-361 1954,-360 1949,-360 1861,-353 1232,-369 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t12->t181 -->
+<g id="edge369" class="edge"><title>t12->t181</title>
+<path style="fill:none;stroke:gray;" d="M2201,-444C2225,-421 2259,-384 2234,-360 2192,-316 1205,-359 1148,-338 1137,-334 1128,-327 1119,-320"/>
+<polygon style="fill:gray;stroke:gray;" points="1121.22,-317.221 1112,-312 1115.95,-321.831 1121.22,-317.221"/>
+</g>
+<!-- t191 -->
+<g id="node314" class="node"><title>t191</title>
+<polygon style="fill:#fff68f;stroke:black;" points="3551,-60 3501,-60 3497,-56 3497,-24 3547,-24 3551,-28 3551,-60"/>
+<polyline style="fill:none;stroke:black;" points="3547,-56 3497,-56 "/>
+<polyline style="fill:none;stroke:black;" points="3547,-56 3547,-24 "/>
+<polyline style="fill:none;stroke:black;" points="3547,-56 3551,-60 "/>
+<text text-anchor="middle" x="3524" y="-35.5" style="font-family:Times New Roman;font-size:20.00;">full</text>
+</g>
+<!-- t181->t191 -->
+<g id="edge331" class="edge"><title>t181->t191</title>
+<path style="fill:none;stroke:blue;" d="M1113,-276C1123,-266 1136,-255 1150,-250 1206,-227 1368,-257 1420,-228 1462,-204 1441,-163 1483,-140 1575,-90 3218,-49 3487,-43"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-46.5001 3497,-43 3487,-39.5001 3487,-46.5001"/>
+</g>
+<!-- t17 -->
+<g id="node36" class="node"><title>t17</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9753,-1009 9617,-1009 9613,-1005 9613,-973 9749,-973 9753,-977 9753,-1009"/>
+<polyline style="fill:none;stroke:green;" points="9749,-1005 9613,-1005 "/>
+<polyline style="fill:none;stroke:green;" points="9749,-1005 9749,-973 "/>
+<polyline style="fill:none;stroke:green;" points="9749,-1005 9753,-1009 "/>
+<text text-anchor="middle" x="9683" y="-984.5" style="font-family:Times New Roman;font-size:20.00;">buildBAMStats</text>
+</g>
+<!-- t16->t17 -->
+<g id="edge23" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:gray;" d="M5835,-1075C6252,-1066 9086,-1004 9603,-993"/>
+<polygon style="fill:gray;stroke:gray;" points="9603,-996.5 9613,-993 9603,-989.5 9603,-996.5"/>
+</g>
+<!-- t18 -->
+<g id="node38" class="node"><title>t18</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10048,-924 9810,-924 9806,-920 9806,-888 10044,-888 10048,-892 10048,-924"/>
+<polyline style="fill:none;stroke:green;" points="10044,-920 9806,-920 "/>
+<polyline style="fill:none;stroke:green;" points="10044,-920 10044,-888 "/>
+<polyline style="fill:none;stroke:green;" points="10044,-920 10048,-924 "/>
+<text text-anchor="middle" x="9927" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">normalizeBAMPerReplicate</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge25" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:gray;" d="M9735,-973C9773,-960 9825,-942 9865,-927"/>
+<polygon style="fill:gray;stroke:gray;" points="9866.43,-930.226 9875,-924 9864.42,-923.521 9866.43,-930.226"/>
+</g>
+<!-- t20 -->
+<g id="node40" class="node"><title>t20</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9788,-924 9582,-924 9578,-920 9578,-888 9784,-888 9788,-892 9788,-924"/>
+<polyline style="fill:none;stroke:green;" points="9784,-920 9578,-920 "/>
+<polyline style="fill:none;stroke:green;" points="9784,-920 9784,-888 "/>
+<polyline style="fill:none;stroke:green;" points="9784,-920 9788,-924 "/>
+<text text-anchor="middle" x="9683" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">normalizeBAMControls</text>
+</g>
+<!-- t17->t20 -->
+<g id="edge27" class="edge"><title>t17->t20</title>
+<path style="fill:none;stroke:gray;" d="M9683,-973C9683,-962 9683,-947 9683,-934"/>
+<polygon style="fill:gray;stroke:gray;" points="9686.5,-934 9683,-924 9679.5,-934 9686.5,-934"/>
+</g>
+<!-- t21 -->
+<g id="node42" class="node"><title>t21</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9917,-858 9823,-858 9819,-854 9819,-822 9913,-822 9917,-826 9917,-858"/>
+<polyline style="fill:none;stroke:green;" points="9913,-854 9819,-854 "/>
+<polyline style="fill:none;stroke:green;" points="9913,-854 9913,-822 "/>
+<polyline style="fill:none;stroke:green;" points="9913,-854 9917,-858 "/>
+<text text-anchor="middle" x="9868" y="-833.5" style="font-family:Times New Roman;font-size:20.00;">runMACS</text>
+</g>
+<!-- t18->t21 -->
+<g id="edge385" class="edge"><title>t18->t21</title>
+<path style="fill:none;stroke:gray;" d="M9911,-888C9905,-881 9898,-873 9891,-866"/>
+<polygon style="fill:gray;stroke:gray;" points="9893.22,-863.221 9884,-858 9887.95,-867.831 9893.22,-863.221"/>
+</g>
+<!-- t29 -->
+<g id="node54" class="node"><title>t29</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10462,-312 10246,-312 10242,-308 10242,-276 10458,-276 10462,-280 10462,-312"/>
+<polyline style="fill:none;stroke:green;" points="10458,-308 10242,-308 "/>
+<polyline style="fill:none;stroke:green;" points="10458,-308 10458,-276 "/>
+<polyline style="fill:none;stroke:green;" points="10458,-308 10462,-312 "/>
+<text text-anchor="middle" x="10352" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importCombinedIntervals</text>
+</g>
+<!-- t18->t29 -->
+<g id="edge393" class="edge"><title>t18->t29</title>
+<path style="fill:none;stroke:gray;" d="M9939,-888C9957,-860 9987,-806 9987,-756 9987,-756 9987,-756 9987,-664 9987,-554 9930,-496 10006,-418 10040,-384 10175,-413 10218,-396 10242,-386 10243,-375 10263,-360 10282,-346 10302,-330 10319,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="10321.1,-320.8 10327,-312 10316.9,-315.2 10321.1,-320.8"/>
+</g>
+<!-- t20->t21 -->
+<g id="edge29" class="edge"><title>t20->t21</title>
+<path style="fill:none;stroke:gray;" d="M9734,-888C9757,-879 9785,-870 9809,-861"/>
+<polygon style="fill:gray;stroke:gray;" points="9811.02,-863.964 9819,-857 9808.42,-857.464 9811.02,-863.964"/>
+</g>
+<!-- t22 -->
+<g id="node44" class="node"><title>t22</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10147,-774 10027,-774 10023,-770 10023,-738 10143,-738 10147,-742 10147,-774"/>
+<polyline style="fill:none;stroke:green;" points="10143,-770 10023,-770 "/>
+<polyline style="fill:none;stroke:green;" points="10143,-770 10143,-738 "/>
+<polyline style="fill:none;stroke:green;" points="10143,-770 10147,-774 "/>
+<text text-anchor="middle" x="10085" y="-749.5" style="font-family:Times New Roman;font-size:20.00;">importMACS</text>
+</g>
+<!-- t21->t22 -->
+<g id="edge31" class="edge"><title>t21->t22</title>
+<path style="fill:none;stroke:gray;" d="M9915,-822C9948,-809 9993,-791 10028,-778"/>
+<polygon style="fill:gray;stroke:gray;" points="10030,-780.964 10038,-774 10027.4,-774.464 10030,-780.964"/>
+</g>
+<!-- t23 -->
+<g id="node56" class="node"><title>t23</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9782.04,-743.794 9868,-716.5 9953.96,-743.794 9953.87,-787.956 9782.13,-787.956 9782.04,-743.794"/>
+<polygon style="fill:none;stroke:green;" points="9778.06,-740.864 9868,-712.305 9957.94,-740.864 9957.85,-791.956 9778.15,-791.956 9778.06,-740.864"/>
+<text text-anchor="middle" x="9868" y="-749.5" style="font-family:Times New Roman;font-size:20.00;">summarizeMACS</text>
+</g>
+<!-- t21->t23 -->
+<g id="edge43" class="edge"><title>t21->t23</title>
+<path style="fill:none;stroke:gray;" d="M9868,-822C9868,-816 9868,-809 9868,-802"/>
+<polygon style="fill:gray;stroke:gray;" points="9871.5,-802 9868,-792 9864.5,-802 9871.5,-802"/>
+</g>
+<!-- t21->t179 -->
+<g id="edge163" class="edge"><title>t21->t179</title>
+<path style="fill:none;stroke:gray;" d="M9819,-832C9698,-812 9396,-753 9396,-664 9396,-664 9396,-664 9396,-572 9396,-511 9374,-443 9361,-406"/>
+<polygon style="fill:gray;stroke:gray;" points="9363.96,-403.985 9357,-396 9357.46,-406.585 9363.96,-403.985"/>
+</g>
+<!-- t25 -->
+<g id="node46" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10198,-682 10010,-682 10006,-678 10006,-646 10194,-646 10198,-650 10198,-682"/>
+<polyline style="fill:none;stroke:green;" points="10194,-678 10006,-678 "/>
+<polyline style="fill:none;stroke:green;" points="10194,-678 10194,-646 "/>
+<polyline style="fill:none;stroke:green;" points="10194,-678 10198,-682 "/>
+<text text-anchor="middle" x="10102" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">exportIntervalsAsBed</text>
+</g>
+<!-- t22->t25 -->
+<g id="edge33" class="edge"><title>t22->t25</title>
+<path style="fill:none;stroke:gray;" d="M10088,-738C10090,-725 10093,-707 10097,-692"/>
+<polygon style="fill:gray;stroke:gray;" points="10100.5,-692.492 10099,-682 10093.6,-691.119 10100.5,-692.492"/>
+</g>
+<!-- t26 -->
+<g id="node48" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10264,-590 10102,-590 10098,-586 10098,-554 10260,-554 10264,-558 10264,-590"/>
+<polyline style="fill:none;stroke:green;" points="10260,-586 10098,-586 "/>
+<polyline style="fill:none;stroke:green;" points="10260,-586 10260,-554 "/>
+<polyline style="fill:none;stroke:green;" points="10260,-586 10264,-590 "/>
+<text text-anchor="middle" x="10181" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">combineReplicates</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge35" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M10118,-646C10130,-632 10146,-613 10159,-598"/>
+<polygon style="fill:gray;stroke:gray;" points="10162,-599.831 10166,-590 10156.8,-595.221 10162,-599.831"/>
+</g>
+<!-- t27 -->
+<g id="node50" class="node"><title>t27</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10206,-480 10040,-480 10036,-476 10036,-444 10202,-444 10206,-448 10206,-480"/>
+<polyline style="fill:none;stroke:green;" points="10202,-476 10036,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10202,-476 10202,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10202,-476 10206,-480 "/>
+<text text-anchor="middle" x="10121" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">combineConditions</text>
+</g>
+<!-- t25->t27 -->
+<g id="edge387" class="edge"><title>t25->t27</title>
+<path style="fill:none;stroke:gray;" d="M10096,-646C10093,-637 10090,-626 10089,-616 10084,-577 10081,-566 10089,-528 10092,-515 10098,-501 10105,-489"/>
+<polygon style="fill:gray;stroke:gray;" points="10108.2,-490.441 10110,-480 10102.1,-487.042 10108.2,-490.441"/>
+</g>
+<!-- t31 -->
+<g id="node131" class="node"><title>t31</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10432,-590 10316,-590 10312,-586 10312,-554 10428,-554 10432,-558 10432,-590"/>
+<polyline style="fill:none;stroke:green;" points="10428,-586 10312,-586 "/>
+<polyline style="fill:none;stroke:green;" points="10428,-586 10428,-554 "/>
+<polyline style="fill:none;stroke:green;" points="10428,-586 10432,-590 "/>
+<text text-anchor="middle" x="10372" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">makeMerged</text>
+</g>
+<!-- t25->t31 -->
+<g id="edge115" class="edge"><title>t25->t31</title>
+<path style="fill:none;stroke:gray;" d="M10174,-646C10205,-638 10241,-627 10273,-616 10291,-609 10309,-602 10325,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="10326.3,-597.26 10334,-590 10323.4,-590.863 10326.3,-597.26"/>
+</g>
+<!-- t25->t60 -->
+<g id="edge467" class="edge"><title>t25->t60</title>
+<path style="fill:none;stroke:gray;" d="M10084,-646C10041,-602 9944,-487 10006,-418 10039,-382 10191,-431 10223,-396 10239,-378 10228,-356 10210,-337"/>
+<polygon style="fill:gray;stroke:gray;" points="10212.5,-334.596 10203,-330 10207.6,-339.546 10212.5,-334.596"/>
+</g>
+<!-- t26->t27 -->
+<g id="edge37" class="edge"><title>t26->t27</title>
+<path style="fill:none;stroke:gray;" d="M10171,-554C10161,-537 10147,-510 10136,-489"/>
+<polygon style="fill:gray;stroke:gray;" points="10138.9,-487.042 10131,-480 10132.8,-490.441 10138.9,-487.042"/>
+</g>
+<!-- t26->t29 -->
+<g id="edge391" class="edge"><title>t26->t29</title>
+<path style="fill:none;stroke:gray;" d="M10098,-556C10071,-546 10043,-530 10027,-506 10006,-473 10001,-446 10027,-418 10063,-379 10219,-424 10263,-396 10279,-385 10272,-374 10283,-360 10295,-345 10309,-330 10322,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="10324.8,-321.219 10330,-312 10320.2,-315.951 10324.8,-321.219"/>
+</g>
+<!-- t26->t62 -->
+<g id="edge473" class="edge"><title>t26->t62</title>
+<path style="fill:none;stroke:gray;" d="M10192,-554C10200,-541 10209,-523 10215,-506 10229,-468 10205,-443 10235,-418 10275,-384 10656,-420 10701,-396 10719,-386 10716,-375 10728,-360 10734,-353 10741,-345 10747,-338"/>
+<polygon style="fill:gray;stroke:gray;" points="10750,-339.831 10754,-330 10744.8,-335.221 10750,-339.831"/>
+</g>
+<!-- t28 -->
+<g id="node52" class="node"><title>t28</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10659,-396 10521,-396 10517,-392 10517,-360 10655,-360 10659,-364 10659,-396"/>
+<polyline style="fill:none;stroke:green;" points="10655,-392 10517,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10655,-392 10655,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10655,-392 10659,-396 "/>
+<text text-anchor="middle" x="10588" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">combineUnstim</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge39" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M10154,-444C10173,-434 10197,-423 10220,-418 10342,-387 10380,-414 10507,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="10507.9,-399.393 10517,-394 10506.5,-392.529 10507.9,-399.393"/>
+</g>
+<!-- t27->t29 -->
+<g id="edge389" class="edge"><title>t27->t29</title>
+<path style="fill:none;stroke:gray;" d="M10160,-444C10178,-435 10200,-426 10220,-418 10248,-407 10260,-414 10283,-396 10285,-394 10315,-350 10334,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="10337.4,-322.262 10340,-312 10331.5,-318.379 10337.4,-322.262"/>
+</g>
+<!-- t27->t62 -->
+<g id="edge471" class="edge"><title>t27->t62</title>
+<path style="fill:none;stroke:gray;" d="M10154,-444C10173,-434 10197,-423 10220,-418 10321,-394 10591,-441 10683,-396 10702,-386 10699,-374 10713,-360 10721,-352 10729,-345 10738,-337"/>
+<polygon style="fill:gray;stroke:gray;" points="10740.4,-339.546 10745,-330 10735.5,-334.596 10740.4,-339.546"/>
+</g>
+<!-- t28->t29 -->
+<g id="edge41" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M10535,-360C10515,-353 10492,-345 10471,-338 10451,-331 10429,-323 10409,-315"/>
+<polygon style="fill:gray;stroke:gray;" points="10410.6,-311.842 10400,-312 10408.4,-318.483 10410.6,-311.842"/>
+</g>
+<!-- t28->t62 -->
+<g id="edge469" class="edge"><title>t28->t62</title>
+<path style="fill:none;stroke:gray;" d="M10631,-360C10649,-352 10671,-343 10693,-334"/>
+<polygon style="fill:gray;stroke:gray;" points="10694.3,-337.26 10702,-330 10691.4,-330.863 10694.3,-337.26"/>
+</g>
+<!-- t30 -->
+<g id="node60" class="node"><title>t30</title>
+<polygon style="fill:none;stroke:blue;" points="10128,-202 10006,-202 10002,-198 10002,-166 10124,-166 10128,-170 10128,-202"/>
+<polyline style="fill:none;stroke:blue;" points="10124,-198 10002,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="10124,-198 10124,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="10124,-198 10128,-202 "/>
+<text text-anchor="middle" x="10065" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">buildIntervals</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge47" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M10305,-276C10255,-257 10176,-227 10121,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="10122.6,-202.863 10112,-202 10119.7,-209.26 10122.6,-202.863"/>
+</g>
+<!-- t24 -->
+<g id="node58" class="node"><title>t24</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9968,-682 9772,-682 9768,-678 9768,-646 9964,-646 9968,-650 9968,-682"/>
+<polyline style="fill:none;stroke:green;" points="9964,-678 9768,-678 "/>
+<polyline style="fill:none;stroke:green;" points="9964,-678 9964,-646 "/>
+<polyline style="fill:none;stroke:green;" points="9964,-678 9968,-682 "/>
+<text text-anchor="middle" x="9868" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">importMACSSummary</text>
+</g>
+<!-- t23->t24 -->
+<g id="edge45" class="edge"><title>t23->t24</title>
+<path style="fill:none;stroke:gray;" d="M9868,-712C9868,-706 9868,-699 9868,-692"/>
+<polygon style="fill:gray;stroke:gray;" points="9871.5,-692 9868,-682 9864.5,-692 9871.5,-692"/>
+</g>
+<!-- t24->t30 -->
+<g id="edge395" class="edge"><title>t24->t30</title>
+<path style="fill:none;stroke:gray;" d="M9870,-646C9878,-599 9904,-474 9983,-418 10025,-388 10175,-433 10208,-396 10219,-384 10219,-372 10208,-360 10170,-314 10116,-382 10075,-338 10044,-304 10050,-246 10058,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="10061.5,-212.492 10060,-202 10054.6,-211.119 10061.5,-212.492"/>
+</g>
+<!-- t182 -->
+<g id="node63" class="node"><title>t182</title>
+<polygon style="fill:none;stroke:blue;" points="9700,-118 9620,-118 9616,-114 9616,-82 9696,-82 9700,-86 9700,-118"/>
+<polyline style="fill:none;stroke:blue;" points="9696,-114 9616,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="9696,-114 9696,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="9696,-114 9700,-118 "/>
+<text text-anchor="middle" x="9658" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">intervals</text>
+</g>
+<!-- t30->t182 -->
+<g id="edge49" class="edge"><title>t30->t182</title>
+<path style="fill:none;stroke:blue;" d="M10002,-171C9922,-154 9785,-126 9710,-111"/>
+<polygon style="fill:blue;stroke:blue;" points="9710.49,-107.529 9700,-109 9709.12,-114.393 9710.49,-107.529"/>
+</g>
+<!-- t15 -->
+<g id="node62" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9768,-202 9552,-202 9548,-198 9548,-166 9764,-166 9768,-170 9768,-202"/>
+<polyline style="fill:none;stroke:green;" points="9764,-198 9548,-198 "/>
+<polyline style="fill:none;stroke:green;" points="9764,-198 9764,-166 "/>
+<polyline style="fill:none;stroke:green;" points="9764,-198 9768,-202 "/>
+<text text-anchor="middle" x="9658" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importReferenceIntervals</text>
+</g>
+<!-- t15->t182 -->
+<g id="edge397" class="edge"><title>t15->t182</title>
+<path style="fill:none;stroke:gray;" d="M9658,-166C9658,-155 9658,-141 9658,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="9661.5,-128 9658,-118 9654.5,-128 9661.5,-128"/>
+</g>
+<!-- t182->t191 -->
+<g id="edge329" class="edge"><title>t182->t191</title>
+<path style="fill:none;stroke:blue;" d="M9616,-100C9102,-95 4048,-47 3561,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-38.5001 3551,-42 3561,-45.5001 3561,-38.5001"/>
+</g>
+<!-- t73 -->
+<g id="node65" class="node"><title>t73</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1826,-590 1720,-590 1716,-586 1716,-554 1822,-554 1826,-558 1826,-590"/>
+<polyline style="fill:none;stroke:green;" points="1822,-586 1716,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1822,-586 1822,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1822,-586 1826,-590 "/>
+<text text-anchor="middle" x="1771" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">makeMotifs</text>
+</g>
+<!-- t76 -->
+<g id="node69" class="node"><title>t76</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1214,-480 1104,-480 1100,-476 1100,-444 1210,-444 1214,-448 1214,-480"/>
+<polyline style="fill:none;stroke:green;" points="1210,-476 1100,-476 "/>
+<polyline style="fill:none;stroke:green;" points="1210,-476 1210,-444 "/>
+<polyline style="fill:none;stroke:green;" points="1210,-476 1214,-480 "/>
+<text text-anchor="middle" x="1157" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">runTomTom</text>
+</g>
+<!-- t73->t76 -->
+<g id="edge399" class="edge"><title>t73->t76</title>
+<path style="fill:none;stroke:gray;" d="M1744,-554C1728,-544 1707,-533 1687,-528 1518,-481 1465,-538 1292,-506 1266,-501 1237,-492 1213,-483"/>
+<polygon style="fill:gray;stroke:gray;" points="1213.58,-479.521 1203,-480 1211.57,-486.226 1213.58,-479.521"/>
+</g>
+<!-- t185 -->
+<g id="node80" class="node"><title>t185</title>
+<polygon style="fill:none;stroke:blue;" points="744,-312 632,-312 628,-308 628,-276 740,-276 744,-280 744,-312"/>
+<polyline style="fill:none;stroke:blue;" points="740,-308 628,-308 "/>
+<polyline style="fill:none;stroke:blue;" points="740,-308 740,-276 "/>
+<polyline style="fill:none;stroke:blue;" points="740,-308 744,-312 "/>
+<text text-anchor="middle" x="686" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">build_motifs</text>
+</g>
+<!-- t73->t185 -->
+<g id="edge411" class="edge"><title>t73->t185</title>
+<path style="fill:none;stroke:gray;" d="M1744,-554C1728,-544 1707,-533 1687,-528 1554,-490 1509,-546 1376,-506 1301,-483 1299,-440 1223,-418 1138,-392 896,-444 820,-396 805,-386 815,-372 801,-360 784,-343 774,-349 753,-338 742,-332 731,-324 721,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="722.262,-314.635 712,-312 718.379,-320.459 722.262,-314.635"/>
+</g>
+<!-- t79 -->
+<g id="node82" class="node"><title>t79</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1040,-312 946,-312 942,-308 942,-276 1036,-276 1040,-280 1040,-312"/>
+<polyline style="fill:none;stroke:green;" points="1036,-308 942,-308 "/>
+<polyline style="fill:none;stroke:green;" points="1036,-308 1036,-276 "/>
+<polyline style="fill:none;stroke:green;" points="1036,-308 1040,-312 "/>
+<text text-anchor="middle" x="991" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">runMAST</text>
+</g>
+<!-- t73->t79 -->
+<g id="edge417" class="edge"><title>t73->t79</title>
+<path style="fill:none;stroke:gray;" d="M1744,-554C1731,-545 1715,-536 1700,-528 1578,-461 1536,-466 1415,-396 1392,-382 1391,-369 1365,-360 1233,-312 1181,-387 1049,-338 1037,-333 1026,-326 1017,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1019.55,-316.596 1010,-312 1014.6,-321.546 1019.55,-316.596"/>
+</g>
+<!-- t91 -->
+<g id="node86" class="node"><title>t91</title>
+<polygon style="fill:none;stroke:blue;" points="924,-312 766,-312 762,-308 762,-276 920,-276 924,-280 924,-312"/>
+<polyline style="fill:none;stroke:blue;" points="920,-308 762,-308 "/>
+<polyline style="fill:none;stroke:blue;" points="920,-308 920,-276 "/>
+<polyline style="fill:none;stroke:blue;" points="920,-308 924,-312 "/>
+<text text-anchor="middle" x="843" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">runGLAM2SCAN</text>
+</g>
+<!-- t73->t91 -->
+<g id="edge423" class="edge"><title>t73->t91</title>
+<path style="fill:none;stroke:gray;" d="M1749,-554C1735,-544 1718,-533 1700,-528 1597,-493 1553,-556 1456,-506 1400,-476 1398,-450 1365,-396 1357,-381 1365,-369 1350,-360 1313,-333 978,-349 933,-338 915,-333 897,-325 882,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="883.441,-313.797 873,-312 880.042,-319.916 883.441,-313.797"/>
+</g>
+<!-- t58 -->
+<g id="node66" class="node"><title>t58</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1760,-682 1568,-682 1564,-678 1564,-646 1756,-646 1760,-650 1760,-682"/>
+<polyline style="fill:none;stroke:green;" points="1756,-678 1564,-678 "/>
+<polyline style="fill:none;stroke:green;" points="1756,-678 1756,-646 "/>
+<polyline style="fill:none;stroke:green;" points="1756,-678 1760,-682 "/>
+<text text-anchor="middle" x="1662" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">exportMotifSequences</text>
+</g>
+<!-- t74 -->
+<g id="node67" class="node"><title>t74</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1678,-590 1582,-590 1578,-586 1578,-554 1674,-554 1678,-558 1678,-590"/>
+<polyline style="fill:none;stroke:green;" points="1674,-586 1578,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1674,-586 1674,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1674,-586 1678,-590 "/>
+<text text-anchor="middle" x="1628" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">runMEME</text>
+</g>
+<!-- t58->t74 -->
+<g id="edge51" class="edge"><title>t58->t74</title>
+<path style="fill:none;stroke:gray;" d="M1655,-646C1650,-633 1643,-615 1638,-600"/>
+<polygon style="fill:gray;stroke:gray;" points="1641.23,-598.573 1635,-590 1634.52,-600.584 1641.23,-598.573"/>
+</g>
+<!-- t75 -->
+<g id="node75" class="node"><title>t75</title>
+<polygon style="fill:#90ee90;stroke:green;" points="939,-396 833,-396 829,-392 829,-360 935,-360 939,-364 939,-396"/>
+<polyline style="fill:none;stroke:green;" points="935,-392 829,-392 "/>
+<polyline style="fill:none;stroke:green;" points="935,-392 935,-360 "/>
+<polyline style="fill:none;stroke:green;" points="935,-392 939,-396 "/>
+<text text-anchor="middle" x="884" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">runGLAM2</text>
+</g>
+<!-- t58->t75 -->
+<g id="edge59" class="edge"><title>t58->t75</title>
+<path style="fill:none;stroke:gray;" d="M1674,-646C1691,-617 1717,-560 1687,-528 1662,-501 1391,-517 1355,-506 1289,-483 1290,-441 1223,-418 1111,-378 1071,-415 949,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="949.492,-392.529 939,-394 948.119,-399.393 949.492,-392.529"/>
+</g>
+<!-- t80 -->
+<g id="node78" class="node"><title>t80</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1540,-590 1388,-590 1384,-586 1384,-554 1536,-554 1540,-558 1540,-590"/>
+<polyline style="fill:none;stroke:green;" points="1536,-586 1384,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1536,-586 1536,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1536,-586 1540,-590 "/>
+<text text-anchor="middle" x="1462" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">runBioProspector</text>
+</g>
+<!-- t58->t80 -->
+<g id="edge61" class="edge"><title>t58->t80</title>
+<path style="fill:none;stroke:gray;" d="M1621,-646C1601,-637 1576,-626 1554,-616 1539,-609 1523,-602 1509,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="1510.56,-590.863 1500,-590 1507.72,-597.26 1510.56,-590.863"/>
+</g>
+<!-- t58->t185 -->
+<g id="edge415" class="edge"><title>t58->t185</title>
+<path style="fill:none;stroke:gray;" d="M1609,-646C1594,-639 1579,-629 1569,-616 1545,-584 1580,-553 1549,-528 1508,-493 1107,-540 1066,-506 1036,-480 1079,-444 1048,-418 1012,-386 645,-431 611,-396 588,-371 619,-339 648,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="650.621,-320.459 657,-312 646.738,-314.635 650.621,-320.459"/>
+</g>
+<!-- t58->t79 -->
+<g id="edge421" class="edge"><title>t58->t79</title>
+<path style="fill:none;stroke:gray;" d="M1760,-650C1800,-642 1838,-631 1850,-616 1874,-585 1874,-559 1850,-528 1828,-498 1805,-516 1769,-506 1620,-461 1565,-483 1435,-396 1418,-384 1424,-369 1405,-360 1335,-324 1124,-365 1049,-338 1037,-333 1026,-326 1017,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1019.55,-316.596 1010,-312 1014.6,-321.546 1019.55,-316.596"/>
+</g>
+<!-- t58->t91 -->
+<g id="edge427" class="edge"><title>t58->t91</title>
+<path style="fill:none;stroke:gray;" d="M1760,-647C1794,-639 1825,-629 1835,-616 1847,-600 1849,-542 1835,-528 1808,-498 1507,-527 1473,-506 1436,-482 1453,-453 1426,-418 1421,-410 1373,-363 1364,-360 1276,-322 1026,-361 933,-338 915,-333 897,-325 882,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="883.441,-313.797 873,-312 880.042,-319.916 883.441,-313.797"/>
+</g>
+<!-- t74->t76 -->
+<g id="edge53" class="edge"><title>t74->t76</title>
+<path style="fill:none;stroke:gray;" d="M1608,-554C1596,-544 1580,-533 1564,-528 1442,-484 1399,-534 1272,-506 1250,-501 1226,-492 1205,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="1206.56,-480.863 1196,-480 1203.72,-487.26 1206.56,-480.863"/>
+</g>
+<!-- t74->t185 -->
+<g id="edge409" class="edge"><title>t74->t185</title>
+<path style="fill:none;stroke:gray;" d="M1608,-554C1596,-544 1580,-533 1564,-528 1515,-511 1133,-538 1091,-506 1060,-480 1098,-443 1066,-418 1028,-387 657,-431 622,-396 600,-372 628,-341 653,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="655.779,-321.219 661,-312 651.169,-315.951 655.779,-321.219"/>
+</g>
+<!-- t77 -->
+<g id="node71" class="node"><title>t77</title>
+<polygon style="fill:#90ee90;stroke:green;" points="771,-396 635,-396 631,-392 631,-360 767,-360 771,-364 771,-396"/>
+<polyline style="fill:none;stroke:green;" points="767,-392 631,-392 "/>
+<polyline style="fill:none;stroke:green;" points="767,-392 767,-360 "/>
+<polyline style="fill:none;stroke:green;" points="767,-392 771,-396 "/>
+<text text-anchor="middle" x="701" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importTomTom</text>
+</g>
+<!-- t76->t77 -->
+<g id="edge55" class="edge"><title>t76->t77</title>
+<path style="fill:none;stroke:gray;" d="M1135,-444C1121,-434 1104,-423 1086,-418 959,-376 918,-414 785,-396 784,-396 782,-395 781,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="781.299,-391.512 771,-394 780.602,-398.478 781.299,-391.512"/>
+</g>
+<!-- t78 -->
+<g id="node73" class="node"><title>t78</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1065,-396 961,-396 957,-392 957,-360 1061,-360 1065,-364 1065,-396"/>
+<polyline style="fill:none;stroke:green;" points="1061,-392 957,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1061,-392 1061,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1061,-392 1065,-396 "/>
+<text text-anchor="middle" x="1011" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">filterMotifs</text>
+</g>
+<!-- t76->t78 -->
+<g id="edge57" class="edge"><title>t76->t78</title>
+<path style="fill:none;stroke:gray;" d="M1129,-444C1116,-436 1101,-426 1086,-418 1076,-413 1066,-407 1056,-401"/>
+<polygon style="fill:gray;stroke:gray;" points="1057.44,-397.797 1047,-396 1054.04,-403.916 1057.44,-397.797"/>
+</g>
+<!-- t76->t185 -->
+<g id="edge405" class="edge"><title>t76->t185</title>
+<path style="fill:none;stroke:gray;" d="M1135,-444C1121,-434 1104,-423 1086,-418 1027,-398 860,-427 805,-396 789,-386 794,-373 780,-360 770,-350 745,-333 723,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="725.1,-315.2 715,-312 720.9,-320.8 725.1,-315.2"/>
+</g>
+<!-- t77->t185 -->
+<g id="edge403" class="edge"><title>t77->t185</title>
+<path style="fill:none;stroke:gray;" d="M698,-360C696,-349 693,-334 691,-322"/>
+<polygon style="fill:gray;stroke:gray;" points="694.393,-321.119 689,-312 687.529,-322.492 694.393,-321.119"/>
+</g>
+<!-- t78->t185 -->
+<g id="edge401" class="edge"><title>t78->t185</title>
+<path style="fill:none;stroke:gray;" d="M957,-362C954,-361 951,-361 948,-360 863,-343 836,-367 753,-338 740,-333 727,-325 716,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="718.1,-315.2 708,-312 713.9,-320.8 718.1,-315.2"/>
+</g>
+<!-- t78->t79 -->
+<g id="edge65" class="edge"><title>t78->t79</title>
+<path style="fill:none;stroke:gray;" d="M1007,-360C1005,-349 1001,-334 998,-322"/>
+<polygon style="fill:gray;stroke:gray;" points="1001.23,-320.573 995,-312 994.521,-322.584 1001.23,-320.573"/>
+</g>
+<!-- t75->t185 -->
+<g id="edge407" class="edge"><title>t75->t185</title>
+<path style="fill:none;stroke:gray;" d="M829,-363C800,-354 767,-344 753,-338 741,-333 730,-325 719,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="721.1,-315.2 711,-312 716.9,-320.8 721.1,-315.2"/>
+</g>
+<!-- t75->t91 -->
+<g id="edge69" class="edge"><title>t75->t91</title>
+<path style="fill:none;stroke:gray;" d="M875,-360C869,-349 863,-334 856,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="859.26,-319.717 852,-312 852.863,-322.56 859.26,-319.717"/>
+</g>
+<!-- t59 -->
+<g id="node77" class="node"><title>t59</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1341,-396 1087,-396 1083,-392 1083,-360 1337,-360 1341,-364 1341,-396"/>
+<polyline style="fill:none;stroke:green;" points="1337,-392 1083,-392 "/>
+<polyline style="fill:none;stroke:green;" points="1337,-392 1337,-360 "/>
+<polyline style="fill:none;stroke:green;" points="1337,-392 1341,-396 "/>
+<text text-anchor="middle" x="1212" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">exportMotifControlSequences</text>
+</g>
+<!-- t59->t185 -->
+<g id="edge413" class="edge"><title>t59->t185</title>
+<path style="fill:none;stroke:gray;" d="M1083,-361C1080,-361 1077,-360 1074,-360 1003,-352 821,-360 753,-338 740,-333 727,-326 716,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="718.1,-315.2 708,-312 713.9,-320.8 718.1,-315.2"/>
+</g>
+<!-- t59->t79 -->
+<g id="edge419" class="edge"><title>t59->t79</title>
+<path style="fill:none;stroke:gray;" d="M1111,-360C1090,-354 1068,-347 1049,-338 1038,-333 1028,-326 1019,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="1020.83,-315.951 1011,-312 1016.22,-321.219 1020.83,-315.951"/>
+</g>
+<!-- t59->t91 -->
+<g id="edge425" class="edge"><title>t59->t91</title>
+<path style="fill:none;stroke:gray;" d="M1083,-360C1014,-350 941,-340 933,-338 916,-332 899,-324 884,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="885.441,-313.797 875,-312 882.042,-319.916 885.441,-313.797"/>
+</g>
+<!-- t80->t185 -->
+<g id="edge63" class="edge"><title>t80->t185</title>
+<path style="fill:none;stroke:gray;" d="M1432,-554C1415,-544 1392,-533 1370,-528 1303,-510 1112,-548 1056,-506 1024,-480 1059,-442 1026,-418 988,-388 621,-431 587,-396 576,-384 580,-374 587,-360 591,-354 621,-334 647,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="648.958,-319.916 656,-312 645.559,-313.797 648.958,-319.916"/>
+</g>
+<!-- t185->t191 -->
+<g id="edge325" class="edge"><title>t185->t191</title>
+<path style="fill:none;stroke:blue;" d="M707,-276C720,-266 737,-255 753,-250 808,-231 969,-265 1012,-228 1043,-201 1004,-168 1032,-140 1109,-62 1163,-95 1271,-82 1500,-52 3210,-43 3487,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-45.5001 3497,-42 3487,-38.5001 3487,-45.5001"/>
+</g>
+<!-- t90 -->
+<g id="node84" class="node"><title>t90</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1396,-202 1276,-202 1272,-198 1272,-166 1392,-166 1396,-170 1396,-202"/>
+<polyline style="fill:none;stroke:green;" points="1392,-198 1272,-198 "/>
+<polyline style="fill:none;stroke:green;" points="1392,-198 1392,-166 "/>
+<polyline style="fill:none;stroke:green;" points="1392,-198 1396,-202 "/>
+<text text-anchor="middle" x="1334" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importMAST</text>
+</g>
+<!-- t79->t90 -->
+<g id="edge67" class="edge"><title>t79->t90</title>
+<path style="fill:none;stroke:gray;" d="M1009,-276C1020,-266 1034,-255 1049,-250 1138,-214 1173,-258 1263,-228 1277,-223 1291,-216 1303,-208"/>
+<polygon style="fill:gray;stroke:gray;" points="1305.1,-210.8 1311,-202 1300.9,-205.2 1305.1,-210.8"/>
+</g>
+<!-- t186 -->
+<g id="node90" class="node"><title>t186</title>
+<polygon style="fill:none;stroke:blue;" points="1388,-118 1284,-118 1280,-114 1280,-82 1384,-82 1388,-86 1388,-118"/>
+<polyline style="fill:none;stroke:blue;" points="1384,-114 1280,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="1384,-114 1384,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="1384,-114 1388,-118 "/>
+<text text-anchor="middle" x="1334" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">find_motifs</text>
+</g>
+<!-- t79->t186 -->
+<g id="edge431" class="edge"><title>t79->t186</title>
+<path style="fill:none;stroke:gray;" d="M1009,-276C1020,-266 1034,-255 1049,-250 1087,-236 1378,-256 1405,-228 1432,-199 1425,-173 1405,-140 1402,-134 1397,-129 1391,-124"/>
+<polygon style="fill:gray;stroke:gray;" points="1393.1,-121.2 1383,-118 1388.9,-126.8 1393.1,-121.2"/>
+</g>
+<!-- t90->t186 -->
+<g id="edge429" class="edge"><title>t90->t186</title>
+<path style="fill:none;stroke:gray;" d="M1334,-166C1334,-155 1334,-141 1334,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="1337.5,-128 1334,-118 1330.5,-128 1337.5,-128"/>
+</g>
+<!-- t92 -->
+<g id="node88" class="node"><title>t92</title>
+<polygon style="fill:none;stroke:blue;" points="1254,-202 1070,-202 1066,-198 1066,-166 1250,-166 1254,-170 1254,-202"/>
+<polyline style="fill:none;stroke:blue;" points="1250,-198 1066,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="1250,-198 1250,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="1250,-198 1254,-202 "/>
+<text text-anchor="middle" x="1160" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importGLAM2SCAN</text>
+</g>
+<!-- t91->t92 -->
+<g id="edge71" class="edge"><title>t91->t92</title>
+<path style="fill:none;stroke:blue;" d="M874,-276C891,-267 913,-256 933,-250 985,-233 1001,-243 1052,-228 1072,-222 1093,-214 1112,-206"/>
+<polygon style="fill:blue;stroke:blue;" points="1113.28,-209.26 1121,-202 1110.44,-202.863 1113.28,-209.26"/>
+</g>
+<!-- t91->t186 -->
+<g id="edge75" class="edge"><title>t91->t186</title>
+<path style="fill:none;stroke:blue;" d="M875,-276C892,-267 913,-257 933,-250 976,-235 1000,-258 1032,-228 1062,-200 1027,-166 1057,-140 1073,-126 1193,-112 1270,-106"/>
+<polygon style="fill:blue;stroke:blue;" points="1270.4,-109.478 1280,-105 1269.7,-102.512 1270.4,-109.478"/>
+</g>
+<!-- t92->t186 -->
+<g id="edge73" class="edge"><title>t92->t186</title>
+<path style="fill:none;stroke:blue;" d="M1197,-166C1223,-153 1259,-136 1287,-122"/>
+<polygon style="fill:blue;stroke:blue;" points="1288.28,-125.26 1296,-118 1285.44,-118.863 1288.28,-125.26"/>
+</g>
+<!-- t186->t191 -->
+<g id="edge323" class="edge"><title>t186->t191</title>
+<path style="fill:none;stroke:blue;" d="M1388,-90C1407,-87 1429,-84 1449,-82 1659,-63 3223,-45 3487,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-45.5001 3497,-42 3487,-38.5001 3487,-45.5001"/>
+</g>
+<!-- t44 -->
+<g id="node93" class="node"><title>t44</title>
+<polygon style="fill:#90ee90;stroke:green;" points="693,-924 451,-924 447,-920 447,-888 689,-888 693,-892 693,-924"/>
+<polyline style="fill:none;stroke:green;" points="689,-920 447,-920 "/>
+<polyline style="fill:none;stroke:green;" points="689,-920 689,-888 "/>
+<polyline style="fill:none;stroke:green;" points="689,-920 693,-924 "/>
+<text text-anchor="middle" x="570" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">importAffymetrixAnnotation</text>
+</g>
+<!-- t45 -->
+<g id="node94" class="node"><title>t45</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1105,-858 893,-858 889,-854 889,-822 1101,-822 1105,-826 1105,-858"/>
+<polyline style="fill:none;stroke:green;" points="1101,-854 889,-854 "/>
+<polyline style="fill:none;stroke:green;" points="1101,-854 1101,-822 "/>
+<polyline style="fill:none;stroke:green;" points="1101,-854 1105,-858 "/>
+<text text-anchor="middle" x="997" y="-833.5" style="font-family:Times New Roman;font-size:20.00;">buildProbeset2Transcript</text>
+</g>
+<!-- t44->t45 -->
+<g id="edge77" class="edge"><title>t44->t45</title>
+<path style="fill:none;stroke:gray;" d="M687,-888C747,-879 819,-867 879,-858"/>
+<polygon style="fill:gray;stroke:gray;" points="879.398,-861.478 889,-857 878.701,-854.512 879.398,-861.478"/>
+</g>
+<!-- t184 -->
+<g id="node128" class="node"><title>t184</title>
+<polygon style="fill:none;stroke:blue;" points="791,-118 695,-118 691,-114 691,-82 787,-82 791,-86 791,-118"/>
+<polyline style="fill:none;stroke:blue;" points="787,-114 691,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="787,-114 787,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="787,-114 791,-118 "/>
+<text text-anchor="middle" x="741" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">expression</text>
+</g>
+<!-- t44->t184 -->
+<g id="edge461" class="edge"><title>t44->t184</title>
+<path style="fill:none;stroke:gray;" d="M447,-897C285,-882 21,-843 21,-756 21,-756 21,-756 21,-294 21,-225 -21,-188 26,-140 49,-116 512,-104 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t46 -->
+<g id="node96" class="node"><title>t46</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1111,-774 887,-774 883,-770 883,-738 1107,-738 1111,-742 1111,-774"/>
+<polyline style="fill:none;stroke:green;" points="1107,-770 883,-770 "/>
+<polyline style="fill:none;stroke:green;" points="1107,-770 1107,-738 "/>
+<polyline style="fill:none;stroke:green;" points="1107,-770 1111,-774 "/>
+<text text-anchor="middle" x="997" y="-749.5" style="font-family:Times New Roman;font-size:20.00;">importProbeset2Transcript</text>
+</g>
+<!-- t45->t46 -->
+<g id="edge79" class="edge"><title>t45->t46</title>
+<path style="fill:none;stroke:gray;" d="M997,-822C997,-811 997,-797 997,-784"/>
+<polygon style="fill:gray;stroke:gray;" points="1000.5,-784 997,-774 993.5,-784 1000.5,-784"/>
+</g>
+<!-- t45->t184 -->
+<g id="edge459" class="edge"><title>t45->t184</title>
+<path style="fill:none;stroke:gray;" d="M1065,-822C1084,-816 1103,-809 1120,-800 1238,-740 1299,-736 1352,-616 1368,-580 1379,-556 1352,-528 1303,-474 1079,-556 1026,-506 998,-478 1045,-444 1016,-418 981,-384 611,-431 577,-396 566,-384 572,-375 577,-360 582,-348 590,-349 595,-338 614,-301 588,-275 619,-250 681,-199 922,-285 976,-228 1003,-199 1002,-169 976,-140 965,-127 867,-113 801,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-102.512 791,-105 800.602,-109.478 801.299,-102.512"/>
+</g>
+<!-- t49 -->
+<g id="node98" class="node"><title>t49</title>
+<polygon style="fill:#90ee90;stroke:green;" points="903.007,-670.774 763,-685.92 622.993,-670.774 623.124,-646.266 902.876,-646.266 903.007,-670.774"/>
+<polygon style="fill:none;stroke:green;" points="907.026,-674.362 763,-689.944 618.974,-674.362 619.145,-642.266 906.855,-642.266 907.026,-674.362"/>
+<text text-anchor="middle" x="763" y="-657.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionTracks</text>
+</g>
+<!-- t46->t49 -->
+<g id="edge81" class="edge"><title>t46->t49</title>
+<path style="fill:none;stroke:gray;" d="M951,-738C915,-723 864,-703 824,-688"/>
+<polygon style="fill:gray;stroke:gray;" points="825.56,-684.863 815,-684 822.717,-691.26 825.56,-684.863"/>
+</g>
+<!-- t46->t184 -->
+<g id="edge457" class="edge"><title>t46->t184</title>
+<path style="fill:none;stroke:gray;" d="M1071,-738C1196,-704 1427,-626 1335,-528 1287,-476 1072,-549 1016,-506 984,-480 1021,-443 988,-418 950,-388 581,-431 547,-396 536,-384 541,-374 547,-360 554,-345 566,-350 575,-338 599,-304 572,-275 604,-250 667,-201 906,-285 960,-228 987,-199 986,-170 960,-140 940,-116 859,-106 801,-103"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-99.5125 791,-102 800.602,-106.478 801.299,-99.5125"/>
+</g>
+<!-- t50 -->
+<g id="node100" class="node"><title>t50</title>
+<polygon style="fill:#90ee90;stroke:green;" points="868,-590 662,-590 658,-586 658,-554 864,-554 868,-558 868,-590"/>
+<polyline style="fill:none;stroke:green;" points="864,-586 658,-586 "/>
+<polyline style="fill:none;stroke:green;" points="864,-586 864,-554 "/>
+<polyline style="fill:none;stroke:green;" points="864,-586 868,-590 "/>
+<text text-anchor="middle" x="763" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionTracks</text>
+</g>
+<!-- t49->t50 -->
+<g id="edge83" class="edge"><title>t49->t50</title>
+<path style="fill:none;stroke:gray;" d="M763,-642C763,-629 763,-614 763,-600"/>
+<polygon style="fill:gray;stroke:gray;" points="766.5,-600 763,-590 759.5,-600 766.5,-600"/>
+</g>
+<!-- t54 -->
+<g id="node102" class="node"><title>t54</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1116,-590 890,-590 886,-586 886,-554 1112,-554 1116,-558 1116,-590"/>
+<polyline style="fill:none;stroke:green;" points="1112,-586 886,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1112,-586 1112,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1112,-586 1116,-590 "/>
+<text text-anchor="middle" x="1001" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionProbesets</text>
+</g>
+<!-- t49->t54 -->
+<g id="edge85" class="edge"><title>t49->t54</title>
+<path style="fill:none;stroke:gray;" d="M819,-642C857,-627 907,-608 944,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="946.015,-596.964 954,-590 943.415,-590.464 946.015,-596.964"/>
+</g>
+<!-- t55 -->
+<g id="node104" class="node"><title>t55</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1326,-590 1138,-590 1134,-586 1134,-554 1322,-554 1326,-558 1326,-590"/>
+<polyline style="fill:none;stroke:green;" points="1322,-586 1134,-586 "/>
+<polyline style="fill:none;stroke:green;" points="1322,-586 1322,-554 "/>
+<polyline style="fill:none;stroke:green;" points="1322,-586 1326,-590 "/>
+<text text-anchor="middle" x="1230" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionMap</text>
+</g>
+<!-- t49->t55 -->
+<g id="edge87" class="edge"><title>t49->t55</title>
+<path style="fill:none;stroke:gray;" d="M907,-654C974,-647 1055,-635 1125,-616 1145,-610 1166,-602 1184,-594"/>
+<polygon style="fill:gray;stroke:gray;" points="1185.28,-597.26 1193,-590 1182.44,-590.863 1185.28,-597.26"/>
+</g>
+<!-- t51 -->
+<g id="node106" class="node"><title>t51</title>
+<polygon style="fill:#90ee90;stroke:green;" points="44.2538,-559.794 172,-532.5 299.746,-559.794 299.627,-603.956 44.373,-603.956 44.2538,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="40.2451,-556.56 172,-528.41 303.755,-556.56 303.616,-607.956 40.3838,-607.956 40.2451,-556.56"/>
+<text text-anchor="middle" x="172" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionCorrelation</text>
+</g>
+<!-- t49->t51 -->
+<g id="edge89" class="edge"><title>t49->t51</title>
+<path style="fill:none;stroke:gray;" d="M619,-656C530,-649 414,-637 313,-616 305,-615 298,-613 291,-611"/>
+<polygon style="fill:gray;stroke:gray;" points="291.584,-607.521 281,-608 289.573,-614.226 291.584,-607.521"/>
+</g>
+<!-- t52 -->
+<g id="node108" class="node"><title>t52</title>
+<polygon style="fill:#90ee90;stroke:green;" points="325.802,-559.794 471,-532.5 616.198,-559.794 616.062,-603.956 325.938,-603.956 325.802,-559.794"/>
+<polygon style="fill:none;stroke:green;" points="321.805,-556.476 471,-528.43 620.195,-556.476 620.037,-607.956 321.963,-607.956 321.805,-556.476"/>
+<text text-anchor="middle" x="471" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionFullCorrelation</text>
+</g>
+<!-- t49->t52 -->
+<g id="edge91" class="edge"><title>t49->t52</title>
+<path style="fill:none;stroke:gray;" d="M694,-642C664,-633 629,-622 595,-611"/>
+<polygon style="fill:gray;stroke:gray;" points="595.584,-607.521 585,-608 593.573,-614.226 595.584,-607.521"/>
+</g>
+<!-- t49->t184 -->
+<g id="edge455" class="edge"><title>t49->t184</title>
+<path style="fill:none;stroke:gray;" d="M684,-642C671,-636 658,-627 649,-616 624,-585 660,-554 629,-528 580,-485 385,-548 335,-506 305,-479 344,-445 315,-418 277,-380 247,-414 196,-396 146,-378 119,-383 91,-338 80,-319 76,-156 91,-140 112,-118 523,-106 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t102 -->
+<g id="node112" class="node"><title>t102</title>
+<polygon style="fill:#90ee90;stroke:green;" points="922,-480 648,-480 644,-476 644,-444 918,-444 922,-448 922,-480"/>
+<polyline style="fill:none;stroke:green;" points="918,-476 644,-476 "/>
+<polyline style="fill:none;stroke:green;" points="918,-476 918,-444 "/>
+<polyline style="fill:none;stroke:green;" points="918,-476 922,-480 "/>
+<text text-anchor="middle" x="783" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionDifferencesSAM</text>
+</g>
+<!-- t50->t102 -->
+<g id="edge95" class="edge"><title>t50->t102</title>
+<path style="fill:none;stroke:gray;" d="M766,-554C769,-537 774,-511 778,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="781.471,-490.492 780,-480 774.607,-489.119 781.471,-490.492"/>
+</g>
+<!-- t101 -->
+<g id="node114" class="node"><title>t101</title>
+<polygon style="fill:#90ee90;stroke:green;" points="626,-480 348,-480 344,-476 344,-444 622,-444 626,-448 626,-480"/>
+<polyline style="fill:none;stroke:green;" points="622,-476 344,-476 "/>
+<polyline style="fill:none;stroke:green;" points="622,-476 622,-444 "/>
+<polyline style="fill:none;stroke:green;" points="622,-476 626,-480 "/>
+<text text-anchor="middle" x="485" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildExpressionDifferencesTTest</text>
+</g>
+<!-- t50->t101 -->
+<g id="edge97" class="edge"><title>t50->t101</title>
+<path style="fill:none;stroke:gray;" d="M717,-554C669,-535 592,-505 540,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="541.56,-480.863 531,-480 538.717,-487.26 541.56,-480.863"/>
+</g>
+<!-- t50->t184 -->
+<g id="edge453" class="edge"><title>t50->t184</title>
+<path style="fill:none;stroke:gray;" d="M810,-554C859,-535 930,-507 931,-506 954,-473 958,-446 931,-418 897,-382 522,-431 487,-396 476,-384 480,-374 487,-360 495,-345 507,-351 516,-338 540,-304 512,-275 544,-250 609,-199 856,-287 912,-228 939,-199 937,-170 912,-140 898,-122 845,-112 801,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-102.512 791,-105 800.602,-109.478 801.299,-102.512"/>
+</g>
+<!-- t54->t184 -->
+<g id="edge447" class="edge"><title>t54->t184</title>
+<path style="fill:none;stroke:gray;" d="M994,-554C989,-541 982,-522 976,-506 962,-467 979,-442 945,-418 906,-388 541,-431 507,-396 496,-384 501,-374 507,-360 514,-345 526,-350 535,-338 559,-304 532,-275 564,-250 629,-200 873,-287 928,-228 955,-199 953,-170 928,-140 912,-119 850,-109 801,-104"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-100.512 791,-103 800.602,-107.478 801.299,-100.512"/>
+</g>
+<!-- t55->t184 -->
+<g id="edge445" class="edge"><title>t55->t184</title>
+<path style="fill:none;stroke:gray;" d="M1193,-554C1173,-545 1148,-535 1125,-528 1070,-511 1041,-543 996,-506 965,-479 1000,-442 967,-418 929,-388 561,-431 527,-396 516,-384 521,-374 527,-360 534,-345 546,-350 555,-338 579,-304 552,-275 584,-250 648,-201 890,-286 944,-228 971,-199 969,-170 944,-140 926,-118 854,-108 801,-104"/>
+<polygon style="fill:gray;stroke:gray;" points="801.299,-100.512 791,-103 800.602,-107.478 801.299,-100.512"/>
+</g>
+<!-- t53 -->
+<g id="node110" class="node"><title>t53</title>
+<polygon style="fill:#90ee90;stroke:green;" points="306,-480 64,-480 60,-476 60,-444 302,-444 306,-448 306,-480"/>
+<polyline style="fill:none;stroke:green;" points="302,-476 60,-476 "/>
+<polyline style="fill:none;stroke:green;" points="302,-476 302,-444 "/>
+<polyline style="fill:none;stroke:green;" points="302,-476 306,-480 "/>
+<text text-anchor="middle" x="183" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionCorrelation</text>
+</g>
+<!-- t51->t53 -->
+<g id="edge433" class="edge"><title>t51->t53</title>
+<path style="fill:none;stroke:gray;" d="M176,-529C178,-516 179,-502 180,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="183.488,-490.299 181,-480 176.522,-489.602 183.488,-490.299"/>
+</g>
+<!-- t51->t184 -->
+<g id="edge451" class="edge"><title>t51->t184</title>
+<path style="fill:none;stroke:gray;" d="M95,-545C78,-535 62,-522 51,-506 20,-458 41,-435 41,-378 41,-378 41,-378 41,-294 41,-225 6,-187 55,-140 78,-118 517,-106 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t52->t53 -->
+<g id="edge93" class="edge"><title>t52->t53</title>
+<path style="fill:none;stroke:gray;" d="M394,-543C346,-524 284,-501 240,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="240.585,-480.464 230,-480 237.985,-486.964 240.585,-480.464"/>
+</g>
+<!-- t53->t184 -->
+<g id="edge449" class="edge"><title>t53->t184</title>
+<path style="fill:none;stroke:gray;" d="M160,-444C134,-422 93,-383 76,-338 61,-296 46,-172 76,-140 98,-118 521,-106 681,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="681,-104.5 691,-101 681,-97.5001 681,-104.5"/>
+</g>
+<!-- t103 -->
+<g id="node116" class="node"><title>t103</title>
+<polygon style="fill:#90ee90;stroke:green;" points="453,-396 209,-396 205,-392 205,-360 449,-360 453,-364 453,-396"/>
+<polyline style="fill:none;stroke:green;" points="449,-392 205,-392 "/>
+<polyline style="fill:none;stroke:green;" points="449,-392 449,-360 "/>
+<polyline style="fill:none;stroke:green;" points="449,-392 453,-396 "/>
+<text text-anchor="middle" x="329" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importExpressionDifferences</text>
+</g>
+<!-- t102->t103 -->
+<g id="edge99" class="edge"><title>t102->t103</title>
+<path style="fill:none;stroke:gray;" d="M730,-444C703,-435 667,-425 635,-418 634,-417 545,-406 463,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="463.299,-391.512 453,-394 462.602,-398.478 463.299,-391.512"/>
+</g>
+<!-- t101->t103 -->
+<g id="edge435" class="edge"><title>t101->t103</title>
+<path style="fill:none;stroke:gray;" d="M452,-444C429,-432 397,-414 372,-401"/>
+<polygon style="fill:gray;stroke:gray;" points="373.441,-397.797 363,-396 370.042,-403.916 373.441,-397.797"/>
+</g>
+<!-- t104 -->
+<g id="node118" class="node"><title>t104</title>
+<polygon style="fill:#90ee90;stroke:green;" points="406,-202 104,-202 100,-198 100,-166 402,-166 406,-170 406,-202"/>
+<polyline style="fill:none;stroke:green;" points="402,-198 100,-198 "/>
+<polyline style="fill:none;stroke:green;" points="402,-198 402,-166 "/>
+<polyline style="fill:none;stroke:green;" points="402,-198 406,-202 "/>
+<text text-anchor="middle" x="253" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">exportDifferentiallyExpressedGenes</text>
+</g>
+<!-- t103->t104 -->
+<g id="edge101" class="edge"><title>t103->t104</title>
+<path style="fill:none;stroke:gray;" d="M205,-369C163,-363 124,-353 111,-338 86,-307 90,-282 111,-250 124,-230 145,-216 166,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="167.283,-209.26 175,-202 164.44,-202.863 167.283,-209.26"/>
+</g>
+<!-- t172 -->
+<g id="node120" class="node"><title>t172</title>
+<polygon style="fill:#90ee90;stroke:green;" points="314,-312 124,-312 120,-308 120,-276 310,-276 314,-280 314,-312"/>
+<polyline style="fill:none;stroke:green;" points="310,-308 120,-308 "/>
+<polyline style="fill:none;stroke:green;" points="310,-308 310,-276 "/>
+<polyline style="fill:none;stroke:green;" points="310,-308 314,-312 "/>
+<text text-anchor="middle" x="217" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">runExpressionGOSlim</text>
+</g>
+<!-- t103->t172 -->
+<g id="edge103" class="edge"><title>t103->t172</title>
+<path style="fill:none;stroke:gray;" d="M305,-360C289,-348 267,-332 249,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="251.1,-315.2 241,-312 246.9,-320.8 251.1,-315.2"/>
+</g>
+<!-- t173 -->
+<g id="node122" class="node"><title>t173</title>
+<polygon style="fill:#90ee90;stroke:green;" points="490,-312 336,-312 332,-308 332,-276 486,-276 490,-280 490,-312"/>
+<polyline style="fill:none;stroke:green;" points="486,-308 332,-308 "/>
+<polyline style="fill:none;stroke:green;" points="486,-308 486,-276 "/>
+<polyline style="fill:none;stroke:green;" points="486,-308 490,-312 "/>
+<text text-anchor="middle" x="411" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">runExpressionGO</text>
+</g>
+<!-- t103->t173 -->
+<g id="edge105" class="edge"><title>t103->t173</title>
+<path style="fill:none;stroke:gray;" d="M347,-360C359,-348 374,-332 386,-319"/>
+<polygon style="fill:gray;stroke:gray;" points="388.404,-321.546 393,-312 383.454,-316.596 388.404,-321.546"/>
+</g>
+<!-- t103->t184 -->
+<g id="edge443" class="edge"><title>t103->t184</title>
+<path style="fill:none;stroke:gray;" d="M442,-360C468,-354 490,-347 499,-338 529,-309 493,-275 524,-250 589,-198 841,-288 897,-228 924,-199 921,-170 897,-140 886,-124 840,-114 801,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="801.492,-104.529 791,-106 800.119,-111.393 801.492,-104.529"/>
+</g>
+<!-- t104->t184 -->
+<g id="edge441" class="edge"><title>t104->t184</title>
+<path style="fill:none;stroke:gray;" d="M313,-166C345,-157 384,-147 420,-140 510,-122 617,-111 681,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="681.398,-108.478 691,-104 680.701,-101.512 681.398,-108.478"/>
+</g>
+<!-- t175 -->
+<g id="node124" class="node"><title>t175</title>
+<polygon style="fill:none;stroke:blue;" points="666,-202 448,-202 444,-198 444,-166 662,-166 666,-170 666,-202"/>
+<polyline style="fill:none;stroke:blue;" points="662,-198 444,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="662,-198 662,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="662,-198 666,-202 "/>
+<text text-anchor="middle" x="555" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importExpressionGOSlim</text>
+</g>
+<!-- t172->t175 -->
+<g id="edge107" class="edge"><title>t172->t175</title>
+<path style="fill:none;stroke:gray;" d="M255,-276C275,-267 300,-257 323,-250 370,-235 384,-240 430,-228 453,-222 477,-214 499,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="500.283,-209.26 508,-202 497.44,-202.863 500.283,-209.26"/>
+</g>
+<!-- t172->t184 -->
+<g id="edge439" class="edge"><title>t172->t184</title>
+<path style="fill:none;stroke:gray;" d="M256,-276C276,-267 300,-257 323,-250 363,-237 386,-257 415,-228 444,-199 406,-167 435,-140 453,-123 597,-110 681,-104"/>
+<polygon style="fill:gray;stroke:gray;" points="681.398,-107.478 691,-103 680.701,-100.512 681.398,-107.478"/>
+</g>
+<!-- t174 -->
+<g id="node126" class="node"><title>t174</title>
+<polygon style="fill:none;stroke:blue;" points="888,-202 708,-202 704,-198 704,-166 884,-166 888,-170 888,-202"/>
+<polyline style="fill:none;stroke:blue;" points="884,-198 704,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="884,-198 884,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="884,-198 888,-202 "/>
+<text text-anchor="middle" x="796" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importExpressionGO</text>
+</g>
+<!-- t173->t174 -->
+<g id="edge109" class="edge"><title>t173->t174</title>
+<path style="fill:none;stroke:gray;" d="M443,-276C460,-266 483,-256 504,-250 585,-227 610,-249 690,-228 710,-223 732,-214 751,-206"/>
+<polygon style="fill:gray;stroke:gray;" points="752.283,-209.26 760,-202 749.44,-202.863 752.283,-209.26"/>
+</g>
+<!-- t173->t184 -->
+<g id="edge437" class="edge"><title>t173->t184</title>
+<path style="fill:none;stroke:gray;" d="M443,-276C460,-266 483,-256 504,-250 541,-239 647,-253 675,-228 705,-201 675,-174 695,-140 698,-135 702,-130 707,-125"/>
+<polygon style="fill:gray;stroke:gray;" points="709.404,-127.546 714,-118 704.454,-122.596 709.404,-127.546"/>
+</g>
+<!-- t175->t184 -->
+<g id="edge111" class="edge"><title>t175->t184</title>
+<path style="fill:none;stroke:blue;" d="M595,-166C623,-153 662,-136 692,-122"/>
+<polygon style="fill:blue;stroke:blue;" points="693.283,-125.26 701,-118 690.44,-118.863 693.283,-125.26"/>
+</g>
+<!-- t174->t184 -->
+<g id="edge113" class="edge"><title>t174->t184</title>
+<path style="fill:none;stroke:blue;" d="M784,-166C777,-155 767,-139 758,-126"/>
+<polygon style="fill:blue;stroke:blue;" points="761.268,-124.625 753,-118 755.332,-128.335 761.268,-124.625"/>
+</g>
+<!-- t184->t191 -->
+<g id="edge327" class="edge"><title>t184->t191</title>
+<path style="fill:none;stroke:blue;" d="M791,-96C847,-92 941,-85 1022,-82 1532,-61 3214,-44 3487,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3487,-45.5001 3497,-42 3487,-38.5001 3487,-45.5001"/>
+</g>
+<!-- t69 -->
+<g id="node133" class="node"><title>t69</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10456,-480 10248,-480 10244,-476 10244,-444 10452,-444 10456,-448 10456,-480"/>
+<polyline style="fill:none;stroke:green;" points="10452,-476 10244,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10452,-476 10452,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10452,-476 10456,-480 "/>
+<text text-anchor="middle" x="10350" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">makePeakvalCorrelation</text>
+</g>
+<!-- t31->t69 -->
+<g id="edge117" class="edge"><title>t31->t69</title>
+<path style="fill:none;stroke:gray;" d="M10368,-554C10365,-537 10360,-511 10356,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="10359.4,-489.119 10354,-480 10352.5,-490.492 10359.4,-489.119"/>
+</g>
+<!-- t70 -->
+<g id="node135" class="node"><title>t70</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10682,-480 10478,-480 10474,-476 10474,-444 10678,-444 10682,-448 10682,-480"/>
+<polyline style="fill:none;stroke:green;" points="10678,-476 10474,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10678,-476 10678,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10678,-476 10682,-480 "/>
+<text text-anchor="middle" x="10578" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">makeAvgvalCorrelation</text>
+</g>
+<!-- t31->t70 -->
+<g id="edge119" class="edge"><title>t31->t70</title>
+<path style="fill:none;stroke:gray;" d="M10406,-554C10442,-535 10497,-506 10535,-485"/>
+<polygon style="fill:gray;stroke:gray;" points="10537,-487.916 10544,-480 10533.6,-481.797 10537,-487.916"/>
+</g>
+<!-- t71 -->
+<g id="node137" class="node"><title>t71</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10906,-480 10704,-480 10700,-476 10700,-444 10902,-444 10906,-448 10906,-480"/>
+<polyline style="fill:none;stroke:green;" points="10902,-476 10700,-476 "/>
+<polyline style="fill:none;stroke:green;" points="10902,-476 10902,-444 "/>
+<polyline style="fill:none;stroke:green;" points="10902,-476 10906,-480 "/>
+<text text-anchor="middle" x="10803" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">makeLengthCorrelation</text>
+</g>
+<!-- t31->t71 -->
+<g id="edge121" class="edge"><title>t31->t71</title>
+<path style="fill:none;stroke:gray;" d="M10432,-563C10497,-552 10603,-532 10691,-506 10712,-500 10734,-492 10753,-484"/>
+<polygon style="fill:gray;stroke:gray;" points="10754.3,-487.26 10762,-480 10751.4,-480.863 10754.3,-487.26"/>
+</g>
+<!-- t72 -->
+<g id="node139" class="node"><title>t72</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10897,-396 10741,-396 10737,-392 10737,-360 10893,-360 10897,-364 10897,-396"/>
+<polyline style="fill:none;stroke:green;" points="10893,-392 10737,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10893,-392 10893,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10893,-392 10897,-396 "/>
+<text text-anchor="middle" x="10817" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">importCorrelation</text>
+</g>
+<!-- t69->t72 -->
+<g id="edge465" class="edge"><title>t69->t72</title>
+<path style="fill:none;stroke:gray;" d="M10389,-444C10411,-435 10439,-424 10465,-418 10578,-392 10609,-411 10723,-396 10724,-396 10726,-396 10727,-395"/>
+<polygon style="fill:gray;stroke:gray;" points="10727.4,-398.478 10737,-394 10726.7,-391.512 10727.4,-398.478"/>
+</g>
+<!-- t70->t72 -->
+<g id="edge463" class="edge"><title>t70->t72</title>
+<path style="fill:none;stroke:gray;" d="M10622,-444C10643,-436 10668,-426 10691,-418 10710,-411 10729,-405 10747,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="10748.4,-402.226 10757,-396 10746.4,-395.521 10748.4,-402.226"/>
+</g>
+<!-- t71->t72 -->
+<g id="edge123" class="edge"><title>t71->t72</title>
+<path style="fill:none;stroke:gray;" d="M10806,-444C10808,-433 10810,-418 10812,-406"/>
+<polygon style="fill:gray;stroke:gray;" points="10815.5,-406.492 10814,-396 10808.6,-405.119 10815.5,-406.492"/>
+</g>
+<!-- t188 -->
+<g id="node154" class="node"><title>t188</title>
+<polygon style="fill:none;stroke:blue;" points="10627,-118 10529,-118 10525,-114 10525,-82 10623,-82 10627,-86 10627,-118"/>
+<polyline style="fill:none;stroke:blue;" points="10623,-114 10525,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="10623,-114 10623,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="10623,-114 10627,-118 "/>
+<text text-anchor="middle" x="10576" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">correlation</text>
+</g>
+<!-- t72->t188 -->
+<g id="edge137" class="edge"><title>t72->t188</title>
+<path style="fill:none;stroke:gray;" d="M10872,-360C10881,-354 10889,-347 10895,-338 10942,-262 10938,-200 10873,-140 10857,-124 10719,-111 10637,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="10637.3,-101.512 10627,-104 10636.6,-108.478 10637.3,-101.512"/>
+</g>
+<!-- t61 -->
+<g id="node143" class="node"><title>t61</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10384,-202 10256,-202 10252,-198 10252,-166 10380,-166 10384,-170 10384,-202"/>
+<polyline style="fill:none;stroke:green;" points="10380,-198 10252,-198 "/>
+<polyline style="fill:none;stroke:green;" points="10380,-198 10380,-166 "/>
+<polyline style="fill:none;stroke:green;" points="10380,-198 10384,-202 "/>
+<text text-anchor="middle" x="10318" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importOverlap</text>
+</g>
+<!-- t60->t61 -->
+<g id="edge127" class="edge"><title>t60->t61</title>
+<path style="fill:none;stroke:gray;" d="M10195,-267C10222,-249 10257,-225 10283,-208"/>
+<polygon style="fill:gray;stroke:gray;" points="10285.1,-210.8 10291,-202 10280.9,-205.2 10285.1,-210.8"/>
+</g>
+<!-- t61->t188 -->
+<g id="edge479" class="edge"><title>t61->t188</title>
+<path style="fill:none;stroke:gray;" d="M10373,-166C10415,-152 10472,-134 10515,-120"/>
+<polygon style="fill:gray;stroke:gray;" points="10516.4,-123.226 10525,-117 10514.4,-116.521 10516.4,-123.226"/>
+</g>
+<!-- t63 -->
+<g id="node147" class="node"><title>t63</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10864,-202 10682,-202 10678,-198 10678,-166 10860,-166 10864,-170 10864,-202"/>
+<polyline style="fill:none;stroke:green;" points="10860,-198 10678,-198 "/>
+<polyline style="fill:none;stroke:green;" points="10860,-198 10860,-166 "/>
+<polyline style="fill:none;stroke:green;" points="10860,-198 10864,-202 "/>
+<text text-anchor="middle" x="10771" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importUCSCOverlap</text>
+</g>
+<!-- t62->t63 -->
+<g id="edge131" class="edge"><title>t62->t63</title>
+<path style="fill:none;stroke:gray;" d="M10782,-252C10780,-239 10777,-225 10776,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="10779.4,-211.119 10774,-202 10772.5,-212.492 10779.4,-211.119"/>
+</g>
+<!-- t63->t188 -->
+<g id="edge477" class="edge"><title>t63->t188</title>
+<path style="fill:none;stroke:gray;" d="M10729,-166C10699,-153 10659,-136 10628,-122"/>
+<polygon style="fill:gray;stroke:gray;" points="10628.6,-118.464 10618,-118 10626,-124.964 10628.6,-118.464"/>
+</g>
+<!-- t64 -->
+<g id="node149" class="node"><title>t64</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10499,-396 10321,-396 10317,-392 10317,-360 10495,-360 10499,-364 10499,-396"/>
+<polyline style="fill:none;stroke:green;" points="10495,-392 10317,-392 "/>
+<polyline style="fill:none;stroke:green;" points="10495,-392 10495,-360 "/>
+<polyline style="fill:none;stroke:green;" points="10495,-392 10499,-396 "/>
+<text text-anchor="middle" x="10408" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">makeReproducibility</text>
+</g>
+<!-- t65 -->
+<g id="node150" class="node"><title>t65</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10672,-312 10484,-312 10480,-308 10480,-276 10668,-276 10672,-280 10672,-312"/>
+<polyline style="fill:none;stroke:green;" points="10668,-308 10480,-308 "/>
+<polyline style="fill:none;stroke:green;" points="10668,-308 10668,-276 "/>
+<polyline style="fill:none;stroke:green;" points="10668,-308 10672,-312 "/>
+<text text-anchor="middle" x="10576" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importReproducibility</text>
+</g>
+<!-- t64->t65 -->
+<g id="edge133" class="edge"><title>t64->t65</title>
+<path style="fill:none;stroke:gray;" d="M10444,-360C10469,-347 10503,-330 10531,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="10532.3,-319.26 10540,-312 10529.4,-312.863 10532.3,-319.26"/>
+</g>
+<!-- t66 -->
+<g id="node152" class="node"><title>t66</title>
+<polygon style="fill:none;stroke:blue;" points="10643,-202 10513,-202 10509,-198 10509,-166 10639,-166 10643,-170 10643,-202"/>
+<polyline style="fill:none;stroke:blue;" points="10639,-198 10509,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="10639,-198 10639,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="10639,-198 10643,-202 "/>
+<text text-anchor="middle" x="10576" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">reproducibility</text>
+</g>
+<!-- t65->t66 -->
+<g id="edge135" class="edge"><title>t65->t66</title>
+<path style="fill:none;stroke:gray;" d="M10576,-276C10576,-259 10576,-233 10576,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="10579.5,-212 10576,-202 10572.5,-212 10579.5,-212"/>
+</g>
+<!-- t66->t188 -->
+<g id="edge139" class="edge"><title>t66->t188</title>
+<path style="fill:none;stroke:blue;" d="M10576,-166C10576,-155 10576,-141 10576,-128"/>
+<polygon style="fill:blue;stroke:blue;" points="10579.5,-128 10576,-118 10572.5,-128 10579.5,-128"/>
+</g>
+<!-- t188->t191 -->
+<g id="edge321" class="edge"><title>t188->t191</title>
+<path style="fill:none;stroke:blue;" d="M10525,-99C10391,-95 10019,-86 9709,-82 7128,-46 3939,-42 3561,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-38.5001 3551,-42 3561,-45.5001 3561,-38.5001"/>
+</g>
+<!-- t93 -->
+<g id="node157" class="node"><title>t93</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9979,-396 9829,-396 9825,-392 9825,-360 9975,-360 9979,-364 9979,-396"/>
+<polyline style="fill:none;stroke:green;" points="9975,-392 9825,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9975,-392 9975,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9975,-392 9979,-396 "/>
+<text text-anchor="middle" x="9902" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateIntervals</text>
+</g>
+<!-- t94 -->
+<g id="node158" class="node"><title>t94</title>
+<polygon style="fill:#90ee90;stroke:green;" points="10026,-312 9866,-312 9862,-308 9862,-276 10022,-276 10026,-280 10026,-312"/>
+<polyline style="fill:none;stroke:green;" points="10022,-308 9862,-308 "/>
+<polyline style="fill:none;stroke:green;" points="10022,-308 10022,-276 "/>
+<polyline style="fill:none;stroke:green;" points="10022,-308 10026,-312 "/>
+<text text-anchor="middle" x="9944" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotations</text>
+</g>
+<!-- t93->t94 -->
+<g id="edge141" class="edge"><title>t93->t94</title>
+<path style="fill:none;stroke:gray;" d="M9911,-360C9917,-349 9924,-334 9930,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="9933.2,-322.441 9935,-312 9927.08,-319.042 9933.2,-322.441"/>
+</g>
+<!-- t189 -->
+<g id="node190" class="node"><title>t189</title>
+<polygon style="fill:none;stroke:blue;" points="9315,-202 9235,-202 9231,-198 9231,-166 9311,-166 9315,-170 9315,-202"/>
+<polyline style="fill:none;stroke:blue;" points="9311,-198 9231,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="9311,-198 9311,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="9311,-198 9315,-202 "/>
+<text text-anchor="middle" x="9273" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotate</text>
+</g>
+<!-- t93->t189 -->
+<g id="edge517" class="edge"><title>t93->t189</title>
+<path style="fill:none;stroke:gray;" d="M9975,-360C10003,-352 10031,-343 10035,-338 10059,-306 10063,-278 10035,-250 10016,-230 9567,-230 9539,-228 9465,-220 9379,-205 9325,-194"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-190.529 9315,-192 9324.12,-197.393 9325.49,-190.529"/>
+</g>
+<!-- t94->t189 -->
+<g id="edge515" class="edge"><title>t94->t189</title>
+<path style="fill:none;stroke:gray;" d="M9915,-276C9897,-266 9875,-255 9853,-250 9718,-214 9678,-244 9539,-228 9465,-220 9379,-204 9325,-194"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-190.529 9315,-192 9324.12,-197.393 9325.49,-190.529"/>
+</g>
+<!-- t99 -->
+<g id="node162" class="node"><title>t99</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8744,-312 8644,-312 8640,-308 8640,-276 8740,-276 8744,-280 8744,-312"/>
+<polyline style="fill:none;stroke:green;" points="8740,-308 8640,-308 "/>
+<polyline style="fill:none;stroke:green;" points="8740,-308 8740,-276 "/>
+<polyline style="fill:none;stroke:green;" points="8740,-308 8744,-312 "/>
+<text text-anchor="middle" x="8692" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTSS</text>
+</g>
+<!-- t97->t99 -->
+<g id="edge145" class="edge"><title>t97->t99</title>
+<path style="fill:none;stroke:gray;" d="M8577,-360C8593,-353 8611,-346 8626,-338 8637,-332 8648,-325 8659,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="8661.1,-320.8 8667,-312 8656.9,-315.2 8661.1,-320.8"/>
+</g>
+<!-- t97->t189 -->
+<g id="edge509" class="edge"><title>t97->t189</title>
+<path style="fill:none;stroke:gray;" d="M8583,-360C8594,-354 8604,-347 8611,-338 8636,-306 8601,-276 8631,-250 8675,-211 9076,-192 9221,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-188.5 9231,-185 9221,-181.5 9221,-188.5"/>
+</g>
+<!-- t99->t189 -->
+<g id="edge505" class="edge"><title>t99->t189</title>
+<path style="fill:none;stroke:gray;" d="M8714,-276C8726,-266 8742,-256 8758,-250 8842,-218 9107,-195 9221,-188"/>
+<polygon style="fill:gray;stroke:gray;" points="9221.4,-191.478 9231,-187 9220.7,-184.512 9221.4,-191.478"/>
+</g>
+<!-- t98 -->
+<g id="node164" class="node"><title>t98</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9247,-396 9105,-396 9101,-392 9101,-360 9243,-360 9247,-364 9247,-396"/>
+<polyline style="fill:none;stroke:green;" points="9243,-392 9101,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9243,-392 9243,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9243,-392 9247,-396 "/>
+<text text-anchor="middle" x="9174" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateRepeats</text>
+</g>
+<!-- t100 -->
+<g id="node165" class="node"><title>t100</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9444,-312 9316,-312 9312,-308 9312,-276 9440,-276 9444,-280 9444,-312"/>
+<polyline style="fill:none;stroke:green;" points="9440,-308 9312,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9440,-308 9440,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9440,-308 9444,-312 "/>
+<text text-anchor="middle" x="9378" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importRepeats</text>
+</g>
+<!-- t98->t100 -->
+<g id="edge147" class="edge"><title>t98->t100</title>
+<path style="fill:none;stroke:gray;" d="M9237,-360C9257,-354 9279,-346 9298,-338 9312,-332 9326,-324 9339,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="9340.96,-319.916 9348,-312 9337.56,-313.797 9340.96,-319.916"/>
+</g>
+<!-- t98->t189 -->
+<g id="edge507" class="edge"><title>t98->t189</title>
+<path style="fill:none;stroke:gray;" d="M9238,-360C9256,-353 9273,-346 9278,-338 9303,-300 9293,-245 9283,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="9286.23,-210.573 9280,-202 9279.52,-212.584 9286.23,-210.573"/>
+</g>
+<!-- t100->t189 -->
+<g id="edge503" class="edge"><title>t100->t189</title>
+<path style="fill:none;stroke:gray;" d="M9361,-276C9343,-258 9317,-230 9297,-209"/>
+<polygon style="fill:gray;stroke:gray;" points="9299.55,-206.596 9290,-202 9294.6,-211.546 9299.55,-206.596"/>
+</g>
+<!-- t168 -->
+<g id="node169" class="node"><title>t168</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9048,-312 8786,-312 8782,-308 8782,-276 9044,-276 9048,-280 9048,-312"/>
+<polyline style="fill:none;stroke:green;" points="9044,-308 8782,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9044,-308 9044,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9044,-308 9048,-312 "/>
+<text text-anchor="middle" x="8915" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTSSIntervalAssociations</text>
+</g>
+<!-- t167->t168 -->
+<g id="edge151" class="edge"><title>t167->t168</title>
+<path style="fill:none;stroke:gray;" d="M8784,-360C8809,-347 8843,-330 8870,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="8871.96,-319.916 8879,-312 8868.56,-313.797 8871.96,-319.916"/>
+</g>
+<!-- t167->t189 -->
+<g id="edge493" class="edge"><title>t167->t189</title>
+<path style="fill:none;stroke:gray;" d="M8750,-360C8755,-326 8765,-257 8773,-250 8805,-219 9099,-195 9221,-188"/>
+<polygon style="fill:gray;stroke:gray;" points="9221.4,-191.478 9231,-187 9220.7,-184.512 9221.4,-191.478"/>
+</g>
+<!-- t168->t189 -->
+<g id="edge491" class="edge"><title>t168->t189</title>
+<path style="fill:none;stroke:gray;" d="M8973,-276C8999,-268 9029,-258 9057,-250 9113,-232 9177,-213 9221,-200"/>
+<polygon style="fill:gray;stroke:gray;" points="9222.43,-203.226 9231,-197 9220.42,-196.521 9222.43,-203.226"/>
+</g>
+<!-- t166 -->
+<g id="node173" class="node"><title>t166</title>
+<polygon style="fill:#90ee90;stroke:green;" points="8602,-312 8370,-312 8366,-308 8366,-276 8598,-276 8602,-280 8602,-312"/>
+<polyline style="fill:none;stroke:green;" points="8598,-308 8366,-308 "/>
+<polyline style="fill:none;stroke:green;" points="8598,-308 8598,-276 "/>
+<polyline style="fill:none;stroke:green;" points="8598,-308 8602,-312 "/>
+<text text-anchor="middle" x="8484" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTSSIntervalDistance</text>
+</g>
+<!-- t165->t166 -->
+<g id="edge155" class="edge"><title>t165->t166</title>
+<path style="fill:none;stroke:gray;" d="M8359,-360C8383,-348 8415,-330 8441,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="8442.96,-319.916 8450,-312 8439.56,-313.797 8442.96,-319.916"/>
+</g>
+<!-- t165->t189 -->
+<g id="edge497" class="edge"><title>t165->t189</title>
+<path style="fill:none;stroke:gray;" d="M8331,-360C8333,-353 8335,-345 8337,-338 8348,-299 8327,-276 8357,-250 8390,-221 9032,-194 9221,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-189.5 9231,-186 9221,-182.5 9221,-189.5"/>
+</g>
+<!-- t166->t189 -->
+<g id="edge495" class="edge"><title>t166->t189</title>
+<path style="fill:none;stroke:gray;" d="M8529,-276C8554,-267 8587,-256 8616,-250 8838,-202 9109,-189 9221,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-188.5 9231,-185 9221,-181.5 9221,-188.5"/>
+</g>
+<!-- t95 -->
+<g id="node175" class="node"><title>t95</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9807,-396 9673,-396 9669,-392 9669,-360 9803,-360 9807,-364 9807,-396"/>
+<polyline style="fill:none;stroke:green;" points="9803,-392 9669,-392 "/>
+<polyline style="fill:none;stroke:green;" points="9803,-392 9803,-360 "/>
+<polyline style="fill:none;stroke:green;" points="9803,-392 9807,-396 "/>
+<text text-anchor="middle" x="9738" y="-371.5" style="font-family:Times New Roman;font-size:20.00;">annotateTracks</text>
+</g>
+<!-- t96 -->
+<g id="node176" class="node"><title>t96</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9844,-312 9726,-312 9722,-308 9722,-276 9840,-276 9844,-280 9844,-312"/>
+<polyline style="fill:none;stroke:green;" points="9840,-308 9722,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9840,-308 9840,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9840,-308 9844,-312 "/>
+<text text-anchor="middle" x="9783" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importTracks</text>
+</g>
+<!-- t95->t96 -->
+<g id="edge157" class="edge"><title>t95->t96</title>
+<path style="fill:none;stroke:gray;" d="M9748,-360C9754,-349 9762,-334 9768,-321"/>
+<polygon style="fill:gray;stroke:gray;" points="9771.2,-322.441 9773,-312 9765.08,-319.042 9771.2,-322.441"/>
+</g>
+<!-- t95->t189 -->
+<g id="edge513" class="edge"><title>t95->t189</title>
+<path style="fill:none;stroke:gray;" d="M9725,-360C9721,-353 9717,-345 9713,-338 9697,-300 9719,-276 9688,-250 9663,-227 9573,-233 9539,-228 9465,-216 9379,-202 9325,-193"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-189.529 9315,-191 9324.12,-196.393 9325.49,-189.529"/>
+</g>
+<!-- t96->t189 -->
+<g id="edge511" class="edge"><title>t96->t189</title>
+<path style="fill:none;stroke:gray;" d="M9758,-276C9744,-266 9726,-256 9708,-250 9637,-225 9614,-239 9539,-228 9465,-217 9379,-202 9325,-193"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-189.529 9315,-191 9324.12,-196.393 9325.49,-189.529"/>
+</g>
+<!-- t107 -->
+<g id="node180" class="node"><title>t107</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9254,-312 9070,-312 9066,-308 9066,-276 9250,-276 9254,-280 9254,-312"/>
+<polyline style="fill:none;stroke:green;" points="9250,-308 9066,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9250,-308 9250,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9250,-308 9254,-312 "/>
+<text text-anchor="middle" x="9160" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importIntervalCounts</text>
+</g>
+<!-- t106->t107 -->
+<g id="edge161" class="edge"><title>t106->t107</title>
+<path style="fill:none;stroke:gray;" d="M9030,-360C9055,-347 9089,-330 9115,-317"/>
+<polygon style="fill:gray;stroke:gray;" points="9116.96,-319.916 9124,-312 9113.56,-313.797 9116.96,-319.916"/>
+</g>
+<!-- t106->t189 -->
+<g id="edge501" class="edge"><title>t106->t189</title>
+<path style="fill:none;stroke:gray;" d="M9083,-361C9086,-361 9089,-360 9092,-360 9130,-354 9237,-365 9263,-338 9279,-320 9278,-251 9276,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="9279.48,-211.602 9275,-202 9272.51,-212.299 9279.48,-211.602"/>
+</g>
+<!-- t107->t189 -->
+<g id="edge499" class="edge"><title>t107->t189</title>
+<path style="fill:none;stroke:gray;" d="M9179,-276C9198,-258 9227,-230 9247,-209"/>
+<polygon style="fill:gray;stroke:gray;" points="9249.4,-211.546 9254,-202 9244.45,-206.596 9249.4,-211.546"/>
+</g>
+<!-- t180 -->
+<g id="node184" class="node"><title>t180</title>
+<polygon style="fill:#90ee90;stroke:green;" points="9664,-312 9486,-312 9482,-308 9482,-276 9660,-276 9664,-280 9664,-312"/>
+<polyline style="fill:none;stroke:green;" points="9660,-308 9482,-308 "/>
+<polyline style="fill:none;stroke:green;" points="9660,-308 9660,-276 "/>
+<polyline style="fill:none;stroke:green;" points="9660,-308 9664,-312 "/>
+<text text-anchor="middle" x="9573" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importSNPCoverage</text>
+</g>
+<!-- t179->t180 -->
+<g id="edge165" class="edge"><title>t179->t180</title>
+<path style="fill:none;stroke:gray;" d="M9406,-360C9425,-353 9448,-345 9468,-338 9486,-331 9506,-323 9523,-316"/>
+<polygon style="fill:gray;stroke:gray;" points="9524.28,-319.26 9532,-312 9521.44,-312.863 9524.28,-319.26"/>
+</g>
+<!-- t179->t189 -->
+<g id="edge485" class="edge"><title>t179->t189</title>
+<path style="fill:none;stroke:gray;" d="M9413,-360C9432,-353 9448,-346 9453,-338 9476,-305 9476,-282 9453,-250 9438,-229 9373,-208 9325,-196"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-192.529 9315,-194 9324.12,-199.393 9325.49,-192.529"/>
+</g>
+<!-- t180->t189 -->
+<g id="edge171" class="edge"><title>t180->t189</title>
+<path style="fill:none;stroke:gray;" d="M9533,-276C9513,-267 9490,-258 9468,-250 9420,-232 9364,-213 9325,-200"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.58,-196.521 9315,-197 9323.57,-203.226 9325.58,-196.521"/>
+</g>
+<!-- t178->t189 -->
+<g id="edge487" class="edge"><title>t178->t189</title>
+<path style="fill:none;stroke:gray;" d="M9629,-360C9649,-354 9667,-346 9673,-338 9696,-306 9699,-279 9673,-250 9654,-226 9569,-232 9539,-228 9465,-216 9379,-201 9325,-193"/>
+<polygon style="fill:gray;stroke:gray;" points="9325.49,-189.529 9315,-191 9324.12,-196.393 9325.49,-189.529"/>
+</g>
+<!-- t176->t189 -->
+<g id="edge489" class="edge"><title>t176->t189</title>
+<path style="fill:none;stroke:gray;" d="M8219,-276C8255,-267 8301,-256 8342,-250 8675,-199 9078,-188 9221,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="9221,-188.5 9231,-185 9221,-181.5 9221,-188.5"/>
+</g>
+<!-- t189->t191 -->
+<g id="edge319" class="edge"><title>t189->t191</title>
+<path style="fill:none;stroke:blue;" d="M9231,-182C9068,-176 8464,-155 7966,-140 6937,-109 6679,-104 5649,-82 4804,-63 3769,-46 3561,-42"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-38.5001 3551,-42 3561,-45.5001 3561,-38.5001"/>
+</g>
+<!-- t1 -->
+<g id="node192" class="node"><title>t1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5528,-590 5272,-590 5268,-586 5268,-554 5524,-554 5528,-558 5528,-590"/>
+<polyline style="fill:none;stroke:green;" points="5524,-586 5268,-586 "/>
+<polyline style="fill:none;stroke:green;" points="5524,-586 5524,-554 "/>
+<polyline style="fill:none;stroke:green;" points="5524,-586 5528,-590 "/>
+<text text-anchor="middle" x="5398" y="-565.5" style="font-family:Times New Roman;font-size:20.00;">buildGenomeGCSegmentation</text>
+</g>
+<!-- t2 -->
+<g id="node193" class="node"><title>t2</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5480,-480 5320,-480 5316,-476 5316,-444 5476,-444 5480,-448 5480,-480"/>
+<polyline style="fill:none;stroke:green;" points="5476,-476 5316,-476 "/>
+<polyline style="fill:none;stroke:green;" points="5476,-476 5476,-444 "/>
+<polyline style="fill:none;stroke:green;" points="5476,-476 5480,-480 "/>
+<text text-anchor="middle" x="5398" y="-455.5" style="font-family:Times New Roman;font-size:20.00;">buildAnnotatorGC</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge173" class="edge"><title>t1->t2</title>
+<path style="fill:none;stroke:gray;" d="M5398,-554C5398,-537 5398,-511 5398,-490"/>
+<polygon style="fill:gray;stroke:gray;" points="5401.5,-490 5398,-480 5394.5,-490 5401.5,-490"/>
+</g>
+<!-- t2->t117 -->
+<g id="edge519" class="edge"><title>t2->t117</title>
+<path style="fill:none;stroke:gray;" d="M5480,-460C5884,-452 7648,-414 7892,-396 7898,-395 7904,-395 7911,-394"/>
+<polygon style="fill:gray;stroke:gray;" points="7911.4,-397.478 7921,-393 7910.7,-390.512 7911.4,-397.478"/>
+</g>
+<!-- t2->t120 -->
+<g id="edge521" class="edge"><title>t2->t120</title>
+<path style="fill:none;stroke:gray;" d="M5480,-447C5555,-434 5665,-413 5745,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="5745.88,-401.393 5755,-396 5744.51,-394.529 5745.88,-401.393"/>
+</g>
+<!-- t2->t121 -->
+<g id="edge523" class="edge"><title>t2->t121</title>
+<path style="fill:none;stroke:gray;" d="M5480,-457C5631,-447 5964,-425 6255,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="6255.4,-399.478 6265,-395 6254.7,-392.512 6255.4,-399.478"/>
+</g>
+<!-- t2->t118 -->
+<g id="edge525" class="edge"><title>t2->t118</title>
+<path style="fill:none;stroke:gray;" d="M5480,-460C5810,-454 7035,-428 7421,-396 7429,-395 7437,-395 7445,-394"/>
+<polygon style="fill:gray;stroke:gray;" points="7445.4,-397.478 7455,-393 7444.7,-390.512 7445.4,-397.478"/>
+</g>
+<!-- t2->t119 -->
+<g id="edge527" class="edge"><title>t2->t119</title>
+<path style="fill:none;stroke:gray;" d="M5480,-461C5831,-456 7204,-435 7659,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="7659.4,-399.478 7669,-395 7658.7,-392.512 7659.4,-399.478"/>
+</g>
+<!-- t2->t160 -->
+<g id="edge531" class="edge"><title>t2->t160</title>
+<path style="fill:none;stroke:gray;" d="M5316,-454C5208,-443 5013,-425 4845,-418 4304,-391 4165,-430 3619,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="3619.3,-392.512 3609,-395 3618.6,-399.478 3619.3,-392.512"/>
+</g>
+<!-- t2->t161 -->
+<g id="edge533" class="edge"><title>t2->t161</title>
+<path style="fill:none;stroke:gray;" d="M5316,-454C5208,-443 5013,-425 4845,-418 4146,-385 3969,-434 3269,-396 3268,-396 3266,-396 3265,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="3265.3,-392.512 3255,-395 3264.6,-399.478 3265.3,-392.512"/>
+</g>
+<!-- t2->t147 -->
+<g id="edge535" class="edge"><title>t2->t147</title>
+<path style="fill:none;stroke:gray;" d="M5436,-444C5463,-431 5499,-414 5528,-400"/>
+<polygon style="fill:gray;stroke:gray;" points="5529.28,-403.26 5537,-396 5526.44,-396.863 5529.28,-403.26"/>
+</g>
+<!-- t2->t148 -->
+<g id="edge537" class="edge"><title>t2->t148</title>
+<path style="fill:none;stroke:gray;" d="M5360,-444C5333,-431 5297,-414 5268,-400"/>
+<polygon style="fill:gray;stroke:gray;" points="5269.56,-396.863 5259,-396 5266.72,-403.26 5269.56,-396.863"/>
+</g>
+<!-- t2->t149 -->
+<g id="edge539" class="edge"><title>t2->t149</title>
+<path style="fill:none;stroke:gray;" d="M5316,-455C5190,-443 4940,-421 4717,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4717.3,-392.512 4707,-395 4716.6,-399.478 4717.3,-392.512"/>
+</g>
+<!-- t2->t150 -->
+<g id="edge541" class="edge"><title>t2->t150</title>
+<path style="fill:none;stroke:gray;" d="M5316,-449C5231,-435 5096,-414 4998,-398"/>
+<polygon style="fill:gray;stroke:gray;" points="4998.49,-394.529 4988,-396 4997.12,-401.393 4998.49,-394.529"/>
+</g>
+<!-- t2->t151 -->
+<g id="edge543" class="edge"><title>t2->t151</title>
+<path style="fill:none;stroke:gray;" d="M5316,-455C5208,-445 5013,-429 4845,-418 4660,-405 4611,-409 4421,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4421.3,-392.512 4411,-395 4420.6,-399.478 4421.3,-392.512"/>
+</g>
+<!-- t2->t152 -->
+<g id="edge545" class="edge"><title>t2->t152</title>
+<path style="fill:none;stroke:gray;" d="M5316,-454C5208,-444 5013,-427 4845,-418 4488,-398 4395,-416 4033,-396"/>
+<polygon style="fill:gray;stroke:gray;" points="4033.3,-392.512 4023,-395 4032.6,-399.478 4033.3,-392.512"/>
+</g>
+<!-- t190 -->
+<g id="node309" class="node"><title>t190</title>
+<polygon style="fill:none;stroke:blue;" points="5640,-118 5552,-118 5548,-114 5548,-82 5636,-82 5640,-86 5640,-118"/>
+<polyline style="fill:none;stroke:blue;" points="5636,-114 5548,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="5636,-114 5636,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="5636,-114 5640,-118 "/>
+<text text-anchor="middle" x="5594" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator</text>
+</g>
+<!-- t2->t190 -->
+<g id="edge577" class="edge"><title>t2->t190</title>
+<path style="fill:none;stroke:gray;" d="M5480,-460C5939,-450 8167,-399 8170,-396 8182,-384 8181,-371 8170,-360 8141,-326 8005,-367 7971,-338 7941,-311 7970,-285 7950,-250 7919,-191 7909,-166 7847,-140 7794,-116 5821,-127 5650,-118"/>
+<polygon style="fill:gray;stroke:gray;" points="5650.3,-114.512 5640,-117 5649.6,-121.478 5650.3,-114.512"/>
+</g>
+<!-- t114->t190 -->
+<g id="edge575" class="edge"><title>t114->t190</title>
+<path style="fill:none;stroke:gray;" d="M5743,-166C5714,-153 5675,-136 5644,-122"/>
+<polygon style="fill:gray;stroke:gray;" points="5645.56,-118.863 5635,-118 5642.72,-125.26 5645.56,-118.863"/>
+</g>
+<!-- t124 -->
+<g id="node199" class="node"><title>t124</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7483.1,-171.794 7619,-144.5 7754.9,-171.794 7754.77,-215.956 7483.23,-215.956 7483.1,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="7479.09,-168.519 7619,-140.42 7758.91,-168.519 7758.76,-219.956 7479.24,-219.956 7479.09,-168.519"/>
+<text text-anchor="middle" x="7619" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorArchitecture</text>
+</g>
+<!-- t117->t124 -->
+<g id="edge179" class="edge"><title>t117->t124</title>
+<path style="fill:none;stroke:gray;" d="M7968,-360C7956,-354 7945,-347 7936,-338 7909,-308 7941,-277 7911,-250 7889,-229 7829,-214 7769,-204"/>
+<polygon style="fill:gray;stroke:gray;" points="7769.49,-200.529 7759,-202 7768.12,-207.393 7769.49,-200.529"/>
+</g>
+<!-- t117->t190 -->
+<g id="edge573" class="edge"><title>t117->t190</title>
+<path style="fill:none;stroke:gray;" d="M7986,-360C7975,-354 7964,-347 7956,-338 7930,-307 7955,-282 7930,-250 7879,-179 7852,-166 7768,-140 7658,-105 5841,-137 5650,-118"/>
+<polygon style="fill:gray;stroke:gray;" points="5650.3,-114.512 5640,-117 5649.6,-121.478 5650.3,-114.512"/>
+</g>
+<!-- t124->t190 -->
+<g id="edge567" class="edge"><title>t124->t190</title>
+<path style="fill:none;stroke:gray;" d="M7518,-161C7478,-152 7431,-144 7388,-140 7212,-121 5936,-104 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t123 -->
+<g id="node203" class="node"><title>t123</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4294.11,-171.794 4405,-144.5 4515.89,-171.794 4515.79,-215.956 4294.21,-215.956 4294.11,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="4290.1,-168.661 4405,-140.381 4519.9,-168.661 4519.78,-219.956 4290.22,-219.956 4290.1,-168.661"/>
+<text text-anchor="middle" x="4405" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTracks</text>
+</g>
+<!-- t120->t123 -->
+<g id="edge183" class="edge"><title>t120->t123</title>
+<path style="fill:none;stroke:gray;" d="M5755,-361C5752,-361 5749,-360 5746,-360 5713,-356 4562,-357 4534,-338 4501,-313 4530,-282 4504,-250 4497,-241 4489,-234 4480,-226"/>
+<polygon style="fill:gray;stroke:gray;" points="4482.1,-223.2 4472,-220 4477.9,-228.8 4482.1,-223.2"/>
+</g>
+<!-- t120->t190 -->
+<g id="edge571" class="edge"><title>t120->t190</title>
+<path style="fill:none;stroke:gray;" d="M5755,-361C5752,-361 5749,-360 5746,-360 5713,-356 4573,-361 4549,-338 4518,-306 4538,-168 4571,-140 4608,-109 5329,-101 5538,-100"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-103.5 5548,-100 5538,-96.5001 5538,-103.5"/>
+</g>
+<!-- t123->t190 -->
+<g id="edge569" class="edge"><title>t123->t190</title>
+<path style="fill:none;stroke:gray;" d="M4473,-157C4492,-150 4514,-144 4534,-140 4633,-121 5333,-106 5538,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t134 -->
+<g id="node207" class="node"><title>t134</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5990.15,-171.794 6131,-144.5 6271.85,-171.794 6271.72,-215.956 5990.28,-215.956 5990.15,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="5986.15,-168.494 6131,-140.426 6275.85,-168.494 6275.69,-219.956 5986.31,-219.956 5986.15,-168.494"/>
+<text text-anchor="middle" x="6131" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorPromotorsGO</text>
+</g>
+<!-- t130->t134 -->
+<g id="edge187" class="edge"><title>t130->t134</title>
+<path style="fill:none;stroke:gray;" d="M6552,-276C6526,-267 6494,-257 6464,-250 6405,-235 6389,-239 6329,-228 6315,-225 6301,-222 6286,-219"/>
+<polygon style="fill:gray;stroke:gray;" points="6286.49,-215.529 6276,-217 6285.12,-222.393 6286.49,-215.529"/>
+</g>
+<!-- t130->t190 -->
+<g id="edge565" class="edge"><title>t130->t190</title>
+<path style="fill:none;stroke:gray;" d="M6551,-276C6525,-267 6493,-257 6464,-250 6421,-238 6404,-250 6364,-228 6319,-201 6333,-162 6285,-140 6228,-113 5805,-103 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t134->t190 -->
+<g id="edge557" class="edge"><title>t134->t190</title>
+<path style="fill:none;stroke:gray;" d="M6021,-162C5983,-154 5939,-146 5898,-140 5812,-126 5712,-113 5650,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="5650.3,-102.512 5640,-105 5649.6,-109.478 5650.3,-102.512"/>
+</g>
+<!-- t135 -->
+<g id="node211" class="node"><title>t135</title>
+<polygon style="fill:#90ee90;stroke:green;" points="6377.04,-171.794 6538,-144.5 6698.96,-171.794 6698.81,-215.956 6377.19,-215.956 6377.04,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="6373.04,-168.415 6538,-140.443 6702.96,-168.415 6702.79,-219.956 6373.21,-219.956 6373.04,-168.415"/>
+<text text-anchor="middle" x="6538" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorPromotorsGOSlim</text>
+</g>
+<!-- t131->t135 -->
+<g id="edge191" class="edge"><title>t131->t135</title>
+<path style="fill:none;stroke:gray;" d="M6832,-276C6786,-262 6720,-241 6663,-223"/>
+<polygon style="fill:gray;stroke:gray;" points="6663.58,-219.521 6653,-220 6661.57,-226.226 6663.58,-219.521"/>
+</g>
+<!-- t131->t190 -->
+<g id="edge563" class="edge"><title>t131->t190</title>
+<path style="fill:none;stroke:gray;" d="M6876,-276C6850,-242 6786,-169 6712,-140 6663,-120 5869,-105 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t135->t190 -->
+<g id="edge555" class="edge"><title>t135->t190</title>
+<path style="fill:none;stroke:gray;" d="M6435,-158C6405,-151 6371,-144 6339,-140 6206,-121 5801,-107 5650,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t136 -->
+<g id="node215" class="node"><title>t136</title>
+<polygon style="fill:#90ee90;stroke:green;" points="4584.12,-171.794 4726,-144.5 4867.88,-171.794 4867.75,-215.956 4584.25,-215.956 4584.12,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="4580.11,-168.492 4726,-140.427 4871.89,-168.492 4871.74,-219.956 4580.26,-219.956 4580.11,-168.492"/>
+<text text-anchor="middle" x="4726" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTerritoriesGO</text>
+</g>
+<!-- t132->t136 -->
+<g id="edge195" class="edge"><title>t132->t136</title>
+<path style="fill:none;stroke:gray;" d="M4691,-276C4696,-263 4702,-246 4708,-230"/>
+<polygon style="fill:gray;stroke:gray;" points="4711.54,-230.585 4712,-220 4705.04,-227.985 4711.54,-230.585"/>
+</g>
+<!-- t132->t190 -->
+<g id="edge561" class="edge"><title>t132->t190</title>
+<path style="fill:none;stroke:gray;" d="M4739,-276C4795,-258 4875,-231 4881,-228 4941,-197 4940,-162 5002,-140 5052,-121 5400,-107 5538,-102"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-105.5 5548,-102 5538,-98.5001 5538,-105.5"/>
+</g>
+<!-- t136->t190 -->
+<g id="edge553" class="edge"><title>t136->t190</title>
+<path style="fill:none;stroke:gray;" d="M4832,-161C4870,-153 4913,-145 4953,-140 5168,-112 5426,-104 5538,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t137 -->
+<g id="node219" class="node"><title>t137</title>
+<polygon style="fill:#90ee90;stroke:green;" points="5014.95,-171.794 5177,-144.5 5339.05,-171.794 5338.9,-215.956 5015.1,-215.956 5014.95,-171.794"/>
+<polygon style="fill:none;stroke:green;" points="5010.95,-168.411 5177,-140.444 5343.05,-168.411 5342.87,-219.956 5011.13,-219.956 5010.95,-168.411"/>
+<text text-anchor="middle" x="5177" y="-177.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTerritoriesGOSlim</text>
+</g>
+<!-- t133->t137 -->
+<g id="edge199" class="edge"><title>t133->t137</title>
+<path style="fill:none;stroke:gray;" d="M5006,-276C5032,-262 5068,-243 5101,-225"/>
+<polygon style="fill:gray;stroke:gray;" points="5102.96,-227.916 5110,-220 5099.56,-221.797 5102.96,-227.916"/>
+</g>
+<!-- t133->t190 -->
+<g id="edge559" class="edge"><title>t133->t190</title>
+<path style="fill:none;stroke:gray;" d="M5025,-276C5055,-267 5092,-256 5125,-250 5225,-231 5255,-257 5352,-228 5374,-221 5489,-158 5552,-123"/>
+<polygon style="fill:gray;stroke:gray;" points="5553.96,-125.916 5561,-118 5550.56,-119.797 5553.96,-125.916"/>
+</g>
+<!-- t137->t190 -->
+<g id="edge551" class="edge"><title>t137->t190</title>
+<path style="fill:none;stroke:gray;" d="M5286,-159C5314,-152 5345,-146 5374,-140 5430,-129 5493,-117 5538,-110"/>
+<polygon style="fill:gray;stroke:gray;" points="5538.88,-113.393 5548,-108 5537.51,-106.529 5538.88,-113.393"/>
+</g>
+<!-- t125 -->
+<g id="node223" class="node"><title>t125</title>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="7056.09,-281.794 7217,-254.5 7377.91,-281.794 7377.76,-325.956 7056.24,-325.956 7056.09,-281.794"/>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="7052.08,-278.417 7217,-250.443 7381.92,-278.417 7381.74,-329.956 7052.26,-329.956 7052.08,-278.417"/>
+<text text-anchor="middle" x="7217" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterest</text>
+</g>
+<!-- t121->t125 -->
+<g id="edge203" class="edge"><title>t121->t125</title>
+<path style="fill:none;stroke:blue;" d="M6551,-361C6554,-360 6557,-360 6560,-360 6775,-342 6831,-369 7043,-338 7054,-336 7065,-334 7076,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="7076.88,-335.393 7086,-330 7075.51,-328.529 7076.88,-335.393"/>
+</g>
+<!-- t128 -->
+<g id="node233" class="node"><title>t128</title>
+<polygon style="fill:none;stroke:blue;" points="7379,-202 7223,-202 7219,-198 7219,-166 7375,-166 7379,-170 7379,-202"/>
+<polyline style="fill:none;stroke:blue;" points="7375,-198 7219,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="7375,-198 7375,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="7375,-198 7379,-202 "/>
+<text text-anchor="middle" x="7299" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_regions</text>
+</g>
+<!-- t125->t128 -->
+<g id="edge215" class="edge"><title>t125->t128</title>
+<path style="fill:none;stroke:blue;" d="M7246,-255C7258,-240 7270,-224 7280,-210"/>
+<polygon style="fill:blue;stroke:blue;" points="7282.8,-212.1 7286,-202 7277.2,-207.9 7282.8,-212.1"/>
+</g>
+<!-- t126 -->
+<g id="node227" class="node"><title>t126</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7404.02,-281.794 7516,-254.5 7627.98,-281.794 7627.88,-325.956 7404.12,-325.956 7404.02,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="7400.01,-278.653 7516,-250.383 7631.99,-278.653 7631.87,-329.956 7400.13,-329.956 7400.01,-278.653"/>
+<text text-anchor="middle" x="7516" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorGWAS</text>
+</g>
+<!-- t118->t126 -->
+<g id="edge207" class="edge"><title>t118->t126</title>
+<path style="fill:none;stroke:gray;" d="M7545,-360C7542,-354 7539,-347 7536,-340"/>
+<polygon style="fill:gray;stroke:gray;" points="7538.96,-337.985 7532,-330 7532.46,-340.585 7538.96,-337.985"/>
+</g>
+<!-- t126->t128 -->
+<g id="edge529" class="edge"><title>t126->t128</title>
+<path style="fill:none;stroke:gray;" d="M7458,-265C7423,-247 7377,-224 7344,-207"/>
+<polygon style="fill:gray;stroke:gray;" points="7345.44,-203.797 7335,-202 7342.04,-209.916 7345.44,-203.797"/>
+</g>
+<!-- t127 -->
+<g id="node231" class="node"><title>t127</title>
+<polygon style="fill:#90ee90;stroke:green;" points="7654.17,-281.794 7776,-254.5 7897.83,-281.794 7897.71,-325.956 7654.29,-325.956 7654.17,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="7650.18,-278.59 7776,-250.402 7901.82,-278.59 7901.69,-329.956 7650.31,-329.956 7650.18,-278.59"/>
+<text text-anchor="middle" x="7776" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorSelection</text>
+</g>
+<!-- t119->t127 -->
+<g id="edge211" class="edge"><title>t119->t127</title>
+<path style="fill:none;stroke:gray;" d="M7776,-360C7776,-354 7776,-347 7776,-340"/>
+<polygon style="fill:gray;stroke:gray;" points="7779.5,-340 7776,-330 7772.5,-340 7779.5,-340"/>
+</g>
+<!-- t127->t128 -->
+<g id="edge213" class="edge"><title>t127->t128</title>
+<path style="fill:none;stroke:gray;" d="M7702,-267C7683,-260 7661,-254 7641,-250 7567,-233 7546,-242 7470,-228 7438,-222 7402,-213 7372,-205"/>
+<polygon style="fill:gray;stroke:gray;" points="7372.58,-201.521 7362,-202 7370.57,-208.226 7372.58,-201.521"/>
+</g>
+<!-- t128->t190 -->
+<g id="edge315" class="edge"><title>t128->t190</title>
+<path style="fill:none;stroke:blue;" d="M7219,-177C7110,-167 6909,-150 6737,-140 6320,-117 5817,-105 5650,-101"/>
+<polygon style="fill:blue;stroke:blue;" points="5650,-97.5001 5640,-101 5650,-104.5 5650,-97.5001"/>
+</g>
+<!-- t162 -->
+<g id="node238" class="node"><title>t162</title>
+<polygon style="fill:none;stroke:blue;" points="1608.24,-281.794 1786,-254.5 1963.76,-281.794 1963.59,-325.956 1608.41,-325.956 1608.24,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="1604.23,-278.363 1786,-250.453 1967.77,-278.363 1967.58,-329.956 1604.42,-329.956 1604.23,-278.363"/>
+<text text-anchor="middle" x="1786" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestTop</text>
+</g>
+<!-- t160->t162 -->
+<g id="edge219" class="edge"><title>t160->t162</title>
+<path style="fill:none;stroke:blue;" d="M3293,-361C3288,-361 3284,-360 3279,-360 2991,-342 2265,-373 1977,-338 1964,-336 1951,-334 1938,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="1938.49,-328.529 1928,-330 1937.12,-335.393 1938.49,-328.529"/>
+</g>
+<!-- t164 -->
+<g id="node244" class="node"><title>t164</title>
+<polygon style="fill:none;stroke:blue;" points="1678,-202 1496,-202 1492,-198 1492,-166 1674,-166 1678,-170 1678,-202"/>
+<polyline style="fill:none;stroke:blue;" points="1674,-198 1492,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="1674,-198 1674,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="1674,-198 1678,-202 "/>
+<text text-anchor="middle" x="1585" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_proportion</text>
+</g>
+<!-- t160->t164 -->
+<g id="edge231" class="edge"><title>t160->t164</title>
+<path style="fill:none;stroke:blue;" d="M3293,-361C3288,-361 3284,-360 3279,-360 3256,-358 1612,-354 1595,-338 1579,-322 1580,-252 1582,-212"/>
+<polygon style="fill:blue;stroke:blue;" points="1585.49,-212.299 1583,-202 1578.52,-211.602 1585.49,-212.299"/>
+</g>
+<!-- t162->t164 -->
+<g id="edge227" class="edge"><title>t162->t164</title>
+<path style="fill:none;stroke:blue;" d="M1724,-260C1692,-243 1655,-222 1627,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="1628.44,-203.797 1618,-202 1625.04,-209.916 1628.44,-203.797"/>
+</g>
+<!-- t163 -->
+<g id="node242" class="node"><title>t163</title>
+<polygon style="fill:none;stroke:blue;" points="1175.97,-281.794 1369,-254.5 1562.03,-281.794 1561.85,-325.956 1176.15,-325.956 1175.97,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="1171.96,-278.32 1369,-250.46 1566.04,-278.32 1565.83,-329.956 1172.17,-329.956 1171.96,-278.32"/>
+<text text-anchor="middle" x="1369" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestBottom</text>
+</g>
+<!-- t161->t163 -->
+<g id="edge223" class="edge"><title>t161->t163</title>
+<path style="fill:none;stroke:blue;" d="M2909,-361C2904,-360 2900,-360 2895,-360 2604,-343 1871,-370 1580,-338 1565,-336 1551,-334 1536,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="1536.49,-328.529 1526,-330 1535.12,-335.393 1536.49,-328.529"/>
+</g>
+<!-- t161->t164 -->
+<g id="edge229" class="edge"><title>t161->t164</title>
+<path style="fill:none;stroke:blue;" d="M2909,-361C2904,-360 2900,-360 2895,-360 2871,-358 1180,-355 1163,-338 1150,-324 1150,-264 1163,-250 1186,-225 1427,-234 1459,-228 1484,-223 1510,-214 1532,-206"/>
+<polygon style="fill:blue;stroke:blue;" points="1534.02,-208.964 1542,-202 1531.42,-202.464 1534.02,-208.964"/>
+</g>
+<!-- t163->t164 -->
+<g id="edge225" class="edge"><title>t163->t164</title>
+<path style="fill:none;stroke:blue;" d="M1436,-260C1470,-243 1511,-222 1541,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="1542.96,-209.916 1550,-202 1539.56,-203.797 1542.96,-209.916"/>
+</g>
+<!-- t164->t190 -->
+<g id="edge309" class="edge"><title>t164->t190</title>
+<path style="fill:none;stroke:blue;" d="M1678,-181C1926,-173 2624,-152 3204,-140 4138,-119 5274,-104 5538,-101"/>
+<polygon style="fill:blue;stroke:blue;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t153 -->
+<g id="node251" class="node"><title>t153</title>
+<polygon style="fill:none;stroke:blue;" points="4113.93,-281.794 4295,-254.5 4476.07,-281.794 4475.9,-325.956 4114.1,-325.956 4113.93,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="4109.92,-278.352 4295,-250.455 4480.08,-278.352 4479.88,-329.956 4110.12,-329.956 4109.92,-278.352"/>
+<text text-anchor="middle" x="4295" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorArchitectureWithMotif</text>
+</g>
+<!-- t147->t153 -->
+<g id="edge235" class="edge"><title>t147->t153</title>
+<path style="fill:none;stroke:blue;" d="M5413,-360C5410,-360 5407,-360 5404,-360 5001,-333 4896,-386 4494,-338 4481,-336 4467,-334 4453,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="4453.49,-328.529 4443,-330 4452.12,-335.393 4453.49,-328.529"/>
+</g>
+<!-- t159 -->
+<g id="node273" class="node"><title>t159</title>
+<polygon style="fill:none;stroke:blue;" points="3365,-202 3217,-202 3213,-198 3213,-166 3361,-166 3365,-170 3365,-202"/>
+<polyline style="fill:none;stroke:blue;" points="3361,-198 3213,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="3361,-198 3361,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="3361,-198 3365,-202 "/>
+<text text-anchor="middle" x="3289" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_motifs</text>
+</g>
+<!-- t147->t159 -->
+<g id="edge275" class="edge"><title>t147->t159</title>
+<path style="fill:none;stroke:blue;" d="M5413,-360C5410,-360 5407,-360 5404,-360 5380,-358 4523,-354 4504,-338 4474,-312 4519,-276 4489,-250 4469,-231 3647,-198 3375,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3375,-183.5 3365,-187 3375,-190.5 3375,-183.5"/>
+</g>
+<!-- t153->t159 -->
+<g id="edge265" class="edge"><title>t153->t159</title>
+<path style="fill:none;stroke:blue;" d="M4188,-267C4160,-260 4129,-254 4101,-250 3839,-210 3524,-193 3375,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3375,-183.5 3365,-187 3375,-190.5 3375,-183.5"/>
+</g>
+<!-- t154 -->
+<g id="node255" class="node"><title>t154</title>
+<polygon style="fill:none;stroke:blue;" points="3697.79,-281.794 3893,-254.5 4088.21,-281.794 4088.02,-325.956 3697.98,-325.956 3697.79,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="3693.79,-278.315 3893,-250.461 4092.21,-278.315 4092,-329.956 3694,-329.956 3693.79,-278.315"/>
+<text text-anchor="middle" x="3893" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorArchitectureWithoutMotif</text>
+</g>
+<!-- t148->t154 -->
+<g id="edge239" class="edge"><title>t148->t154</title>
+<path style="fill:none;stroke:blue;" d="M5045,-360C5042,-360 5039,-360 5036,-360 4622,-334 4514,-386 4101,-338 4087,-336 4073,-334 4058,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="4058.49,-328.529 4048,-330 4057.12,-335.393 4058.49,-328.529"/>
+</g>
+<!-- t148->t159 -->
+<g id="edge273" class="edge"><title>t148->t159</title>
+<path style="fill:none;stroke:blue;" d="M5045,-360C5042,-360 5039,-360 5036,-360 4999,-357 3715,-361 3685,-338 3654,-312 3695,-277 3665,-250 3624,-211 3473,-195 3375,-189"/>
+<polygon style="fill:blue;stroke:blue;" points="3375.3,-185.512 3365,-188 3374.6,-192.478 3375.3,-185.512"/>
+</g>
+<!-- t154->t159 -->
+<g id="edge263" class="edge"><title>t154->t159</title>
+<path style="fill:none;stroke:blue;" d="M3771,-268C3741,-261 3710,-255 3680,-250 3575,-230 3454,-210 3375,-198"/>
+<polygon style="fill:blue;stroke:blue;" points="3375.49,-194.529 3365,-196 3374.12,-201.393 3375.49,-194.529"/>
+</g>
+<!-- t155 -->
+<g id="node259" class="node"><title>t155</title>
+<polygon style="fill:none;stroke:blue;" points="2953.93,-281.794 3110,-254.5 3266.07,-281.794 3265.92,-325.956 2954.08,-325.956 2953.93,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="2949.93,-278.433 3110,-250.44 3270.07,-278.433 3269.9,-329.956 2950.1,-329.956 2949.93,-278.433"/>
+<text text-anchor="middle" x="3110" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorTracksWithMotif</text>
+</g>
+<!-- t149->t155 -->
+<g id="edge243" class="edge"><title>t149->t155</title>
+<path style="fill:none;stroke:blue;" d="M4429,-361C4426,-360 4423,-360 4420,-360 4169,-342 3535,-371 3284,-338 3273,-336 3261,-334 3250,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="3250.49,-328.529 3240,-330 3249.12,-335.393 3250.49,-328.529"/>
+</g>
+<!-- t149->t159 -->
+<g id="edge271" class="edge"><title>t149->t159</title>
+<path style="fill:none;stroke:blue;" d="M4429,-361C4426,-360 4423,-360 4420,-360 4380,-357 2970,-367 2941,-338 2928,-324 2929,-265 2941,-250 2974,-210 3111,-194 3203,-188"/>
+<polygon style="fill:blue;stroke:blue;" points="3203.4,-191.478 3213,-187 3202.7,-184.512 3203.4,-191.478"/>
+</g>
+<!-- t155->t159 -->
+<g id="edge261" class="edge"><title>t155->t159</title>
+<path style="fill:none;stroke:blue;" d="M3166,-260C3194,-243 3226,-223 3251,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="3252.96,-209.916 3260,-202 3249.56,-203.797 3252.96,-209.916"/>
+</g>
+<!-- t156 -->
+<g id="node263" class="node"><title>t156</title>
+<polygon style="fill:#90ee90;stroke:green;" points="3311.85,-281.794 3482,-254.5 3652.15,-281.794 3651.99,-325.956 3312.01,-325.956 3311.85,-281.794"/>
+<polygon style="fill:none;stroke:green;" points="3307.84,-278.386 3482,-250.449 3656.16,-278.386 3655.97,-329.956 3308.03,-329.956 3307.84,-278.386"/>
+<text text-anchor="middle" x="3482" y="-287.5" style="font-family:Times New Roman;font-size:20.00;">importAnnotatorTracksWithoutMotif</text>
+</g>
+<!-- t150->t156 -->
+<g id="edge247" class="edge"><title>t150->t156</title>
+<path style="fill:none;stroke:gray;" d="M4725,-361C4722,-360 4719,-360 4716,-360 4485,-344 3901,-367 3670,-338 3657,-336 3645,-334 3632,-332"/>
+<polygon style="fill:gray;stroke:gray;" points="3632.49,-328.529 3622,-330 3631.12,-335.393 3632.49,-328.529"/>
+</g>
+<!-- t150->t159 -->
+<g id="edge549" class="edge"><title>t150->t159</title>
+<path style="fill:none;stroke:gray;" d="M4725,-361C4722,-360 4719,-360 4716,-360 4677,-357 3328,-365 3299,-338 3283,-322 3284,-252 3286,-212"/>
+<polygon style="fill:gray;stroke:gray;" points="3289.49,-212.299 3287,-202 3282.52,-211.602 3289.49,-212.299"/>
+</g>
+<!-- t156->t159 -->
+<g id="edge547" class="edge"><title>t156->t159</title>
+<path style="fill:none;stroke:gray;" d="M3422,-260C3392,-243 3356,-223 3330,-207"/>
+<polygon style="fill:gray;stroke:gray;" points="3331.44,-203.797 3321,-202 3328.04,-209.916 3331.44,-203.797"/>
+</g>
+<!-- t157 -->
+<g id="node267" class="node"><title>t157</title>
+<polygon style="fill:none;stroke:blue;" points="2475.92,-281.794 2682,-254.5 2888.08,-281.794 2887.89,-325.956 2476.11,-325.956 2475.92,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="2471.92,-278.29 2682,-250.465 2892.08,-278.29 2891.86,-329.956 2472.14,-329.956 2471.92,-278.29"/>
+<text text-anchor="middle" x="2682" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestWithMotif</text>
+</g>
+<!-- t151->t157 -->
+<g id="edge251" class="edge"><title>t151->t157</title>
+<path style="fill:none;stroke:blue;" d="M4041,-360C4038,-360 4035,-360 4032,-360 3783,-345 3155,-364 2906,-338 2891,-336 2875,-334 2859,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="2859.49,-328.529 2849,-330 2858.12,-335.393 2859.49,-328.529"/>
+</g>
+<!-- t151->t159 -->
+<g id="edge269" class="edge"><title>t151->t159</title>
+<path style="fill:none;stroke:blue;" d="M4041,-360C4038,-360 4035,-360 4032,-360 4002,-358 2948,-359 2926,-338 2899,-310 2901,-280 2926,-250 2961,-208 3107,-193 3203,-188"/>
+<polygon style="fill:blue;stroke:blue;" points="3203.4,-191.478 3213,-187 3202.7,-184.512 3203.4,-191.478"/>
+</g>
+<!-- t157->t159 -->
+<g id="edge259" class="edge"><title>t157->t159</title>
+<path style="fill:none;stroke:blue;" d="M2813,-268C2843,-262 2876,-255 2906,-250 3007,-231 3124,-211 3203,-198"/>
+<polygon style="fill:blue;stroke:blue;" points="3203.88,-201.393 3213,-196 3202.51,-194.529 3203.88,-201.393"/>
+</g>
+<!-- t158 -->
+<g id="node271" class="node"><title>t158</title>
+<polygon style="fill:none;stroke:blue;" points="1989.84,-281.794 2210,-254.5 2430.16,-281.794 2429.95,-325.956 1990.05,-325.956 1989.84,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="1985.83,-278.261 2210,-250.469 2434.17,-278.261 2433.93,-329.956 1986.07,-329.956 1985.83,-278.261"/>
+<text text-anchor="middle" x="2210" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorRegionsOfInterestWithoutMotif</text>
+</g>
+<!-- t152->t158 -->
+<g id="edge255" class="edge"><title>t152->t158</title>
+<path style="fill:none;stroke:blue;" d="M3627,-360C3624,-360 3621,-360 3618,-360 3359,-346 2707,-364 2448,-338 2431,-336 2415,-334 2397,-331"/>
+<polygon style="fill:blue;stroke:blue;" points="2397.3,-327.512 2387,-330 2396.6,-334.478 2397.3,-327.512"/>
+</g>
+<!-- t152->t159 -->
+<g id="edge267" class="edge"><title>t152->t159</title>
+<path style="fill:none;stroke:blue;" d="M3627,-360C3624,-360 3621,-360 3618,-360 3586,-358 2486,-360 2463,-338 2450,-324 2450,-264 2463,-250 2489,-223 2996,-197 3203,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3203,-190.5 3213,-187 3203,-183.5 3203,-190.5"/>
+</g>
+<!-- t158->t159 -->
+<g id="edge257" class="edge"><title>t158->t159</title>
+<path style="fill:none;stroke:blue;" d="M2343,-267C2377,-261 2414,-254 2448,-250 2723,-214 3050,-195 3203,-187"/>
+<polygon style="fill:blue;stroke:blue;" points="3203,-190.5 3213,-187 3203,-183.5 3203,-190.5"/>
+</g>
+<!-- t159->t190 -->
+<g id="edge311" class="edge"><title>t159->t190</title>
+<path style="fill:none;stroke:blue;" d="M3365,-180C3531,-172 3939,-152 4281,-140 4768,-121 5356,-106 5538,-101"/>
+<polygon style="fill:blue;stroke:blue;" points="5538,-104.5 5548,-101 5538,-97.5001 5538,-104.5"/>
+</g>
+<!-- t142 -->
+<g id="node286" class="node"><title>t142</title>
+<polygon style="fill:none;stroke:blue;" points="5474.24,-281.794 5589,-254.5 5703.76,-281.794 5703.65,-325.956 5474.35,-325.956 5474.24,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="5470.25,-278.632 5589,-250.389 5707.75,-278.632 5707.63,-329.956 5470.37,-329.956 5470.25,-278.632"/>
+<text text-anchor="middle" x="5589" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIGO</text>
+</g>
+<!-- t139->t142 -->
+<g id="edge279" class="edge"><title>t139->t142</title>
+<path style="fill:none;stroke:blue;" d="M6569,-361C6566,-361 6563,-360 6560,-360 6375,-342 5906,-370 5722,-338 5714,-337 5706,-335 5698,-333"/>
+<polygon style="fill:blue;stroke:blue;" points="5698.58,-329.521 5688,-330 5696.57,-336.226 5698.58,-329.521"/>
+</g>
+<!-- t146 -->
+<g id="node300" class="node"><title>t146</title>
+<polygon style="fill:none;stroke:blue;" points="5656,-202 5536,-202 5532,-198 5532,-166 5652,-166 5656,-170 5656,-202"/>
+<polyline style="fill:none;stroke:blue;" points="5652,-198 5532,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="5652,-198 5652,-166 "/>
+<polyline style="fill:none;stroke:blue;" points="5652,-198 5656,-202 "/>
+<text text-anchor="middle" x="5594" y="-177.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">annotator_roi</text>
+</g>
+<!-- t139->t146 -->
+<g id="edge305" class="edge"><title>t139->t146</title>
+<path style="fill:none;stroke:blue;" d="M6569,-361C6566,-361 6563,-360 6560,-360 6530,-357 5483,-359 5461,-338 5448,-324 5451,-266 5461,-250 5475,-227 5499,-212 5522,-202"/>
+<polygon style="fill:blue;stroke:blue;" points="5523.43,-205.226 5532,-199 5521.42,-198.521 5523.43,-205.226"/>
+</g>
+<!-- t142->t146 -->
+<g id="edge299" class="edge"><title>t142->t146</title>
+<path style="fill:none;stroke:blue;" d="M5591,-251C5592,-238 5592,-224 5592,-212"/>
+<polygon style="fill:blue;stroke:blue;" points="5595.49,-212.299 5593,-202 5588.52,-211.602 5595.49,-212.299"/>
+</g>
+<!-- t143 -->
+<g id="node290" class="node"><title>t143</title>
+<polygon style="fill:none;stroke:blue;" points="5138.13,-281.794 5273,-254.5 5407.87,-281.794 5407.75,-325.956 5138.25,-325.956 5138.13,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="5134.13,-278.522 5273,-250.419 5411.87,-278.522 5411.72,-329.956 5134.28,-329.956 5134.13,-278.522"/>
+<text text-anchor="middle" x="5273" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIGOSlim</text>
+</g>
+<!-- t138->t143 -->
+<g id="edge283" class="edge"><title>t138->t143</title>
+<path style="fill:none;stroke:blue;" d="M6007,-361C6002,-361 5998,-360 5993,-360 5742,-338 5675,-378 5426,-338 5417,-337 5408,-334 5398,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="5398.49,-328.529 5388,-330 5397.12,-335.393 5398.49,-328.529"/>
+</g>
+<!-- t138->t146 -->
+<g id="edge307" class="edge"><title>t138->t146</title>
+<path style="fill:none;stroke:blue;" d="M6007,-361C6002,-361 5998,-360 5993,-360 5963,-357 5468,-359 5446,-338 5419,-309 5425,-282 5446,-250 5463,-223 5494,-207 5522,-198"/>
+<polygon style="fill:blue;stroke:blue;" points="5523.43,-201.226 5532,-195 5521.42,-194.521 5523.43,-201.226"/>
+</g>
+<!-- t143->t146 -->
+<g id="edge297" class="edge"><title>t143->t146</title>
+<path style="fill:none;stroke:blue;" d="M5353,-267C5408,-248 5481,-223 5531,-205"/>
+<polygon style="fill:blue;stroke:blue;" points="5532.43,-208.226 5541,-202 5530.42,-201.521 5532.43,-208.226"/>
+</g>
+<!-- t144 -->
+<g id="node294" class="node"><title>t144</title>
+<polygon style="fill:none;stroke:blue;" points="5749.97,-281.794 5900,-254.5 6050.03,-281.794 6049.89,-325.956 5750.11,-325.956 5749.97,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="5745.96,-278.458 5900,-250.434 6054.04,-278.458 6053.88,-329.956 5746.12,-329.956 5745.96,-278.458"/>
+<text text-anchor="middle" x="5900" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIOverlapGO</text>
+</g>
+<!-- t141->t144 -->
+<g id="edge287" class="edge"><title>t141->t144</title>
+<path style="fill:none;stroke:blue;" d="M6789,-361C6786,-360 6783,-360 6780,-360 6465,-335 6382,-383 6068,-338 6057,-337 6047,-334 6037,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="6037.49,-328.529 6027,-330 6036.12,-335.393 6037.49,-328.529"/>
+</g>
+<!-- t141->t146 -->
+<g id="edge301" class="edge"><title>t141->t146</title>
+<path style="fill:none;stroke:blue;" d="M6789,-361C6786,-360 6783,-360 6780,-360 6752,-357 5760,-356 5737,-338 5706,-312 5743,-281 5717,-250 5703,-232 5691,-238 5670,-228 5658,-222 5645,-214 5634,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="5635.44,-203.797 5625,-202 5632.04,-209.916 5635.44,-203.797"/>
+</g>
+<!-- t144->t146 -->
+<g id="edge295" class="edge"><title>t144->t146</title>
+<path style="fill:none;stroke:blue;" d="M5802,-268C5751,-254 5694,-238 5670,-228 5657,-222 5643,-215 5631,-207"/>
+<polygon style="fill:blue;stroke:blue;" points="5632.44,-203.797 5622,-202 5629.04,-209.916 5632.44,-203.797"/>
+</g>
+<!-- t145 -->
+<g id="node298" class="node"><title>t145</title>
+<polygon style="fill:none;stroke:blue;" points="6095.8,-281.794 6266,-254.5 6436.2,-281.794 6436.04,-325.956 6095.96,-325.956 6095.8,-281.794"/>
+<polygon style="fill:none;stroke:blue;" points="6091.8,-278.385 6266,-250.449 6440.2,-278.385 6440.02,-329.956 6091.98,-329.956 6091.8,-278.385"/>
+<text text-anchor="middle" x="6266" y="-287.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">importAnnotatorROIOverlapGOSlim</text>
+</g>
+<!-- t140->t145 -->
+<g id="edge291" class="edge"><title>t140->t145</title>
+<path style="fill:none;stroke:blue;" d="M7073,-361C7070,-360 7067,-360 7064,-360 6794,-340 6723,-373 6454,-338 6442,-336 6430,-334 6418,-332"/>
+<polygon style="fill:blue;stroke:blue;" points="6418.49,-328.529 6408,-330 6417.12,-335.393 6418.49,-328.529"/>
+</g>
+<!-- t140->t146 -->
+<g id="edge303" class="edge"><title>t140->t146</title>
+<path style="fill:none;stroke:blue;" d="M7073,-361C7070,-360 7067,-360 7064,-360 7037,-358 6105,-355 6083,-338 6052,-312 6094,-275 6063,-250 6030,-221 5712,-240 5670,-228 5655,-223 5640,-216 5627,-208"/>
+<polygon style="fill:blue;stroke:blue;" points="5629.1,-205.2 5619,-202 5624.9,-210.8 5629.1,-205.2"/>
+</g>
+<!-- t145->t146 -->
+<g id="edge293" class="edge"><title>t145->t146</title>
+<path style="fill:none;stroke:blue;" d="M6164,-267C6136,-260 6106,-254 6078,-250 5989,-237 5758,-253 5670,-228 5655,-223 5640,-216 5627,-208"/>
+<polygon style="fill:blue;stroke:blue;" points="5629.1,-205.2 5619,-202 5624.9,-210.8 5629.1,-205.2"/>
+</g>
+<!-- t146->t190 -->
+<g id="edge313" class="edge"><title>t146->t190</title>
+<path style="fill:none;stroke:blue;" d="M5594,-166C5594,-155 5594,-141 5594,-128"/>
+<polygon style="fill:blue;stroke:blue;" points="5597.5,-128 5594,-118 5590.5,-128 5597.5,-128"/>
+</g>
+<!-- t190->t191 -->
+<g id="edge317" class="edge"><title>t190->t191</title>
+<path style="fill:none;stroke:blue;" d="M5548,-99C5269,-91 3813,-50 3561,-43"/>
+<polygon style="fill:blue;stroke:blue;" points="3561,-39.5001 3551,-43 3561,-46.5001 3561,-39.5001"/>
+</g>
+<!-- k1 -->
+<g id="node447" class="node"><title>k1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="11082,-1175.5 10934,-1175.5 10930,-1171.5 10930,-1124.5 11078,-1124.5 11082,-1128.5 11082,-1175.5"/>
+<polyline style="fill:none;stroke:green;" points="11078,-1171.5 10930,-1171.5 "/>
+<polyline style="fill:none;stroke:green;" points="11078,-1171.5 11078,-1124.5 "/>
+<polyline style="fill:none;stroke:green;" points="11078,-1171.5 11082,-1175.5 "/>
+<text text-anchor="middle" x="11006" y="-1143.5" style="font-family:Times New Roman;font-size:20.00;">Up-to-date task</text>
+</g>
+<!-- k2 -->
+<g id="node448" class="node"><title>k2</title>
+<polygon style="fill:none;stroke:blue;" points="11065,-1101.5 10951,-1101.5 10947,-1097.5 10947,-1050.5 11061,-1050.5 11065,-1054.5 11065,-1101.5"/>
+<polyline style="fill:none;stroke:blue;" points="11061,-1097.5 10947,-1097.5 "/>
+<polyline style="fill:none;stroke:blue;" points="11061,-1097.5 11061,-1050.5 "/>
+<polyline style="fill:none;stroke:blue;" points="11061,-1097.5 11065,-1101.5 "/>
+<text text-anchor="middle" x="11006" y="-1069.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">Task to run</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge580" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:gray;" d="M11006,-1124C11006,-1120 11006,-1116 11006,-1112"/>
+<polygon style="fill:gray;stroke:gray;" points="11009.5,-1112 11006,-1102 11002.5,-1112 11009.5,-1112"/>
+</g>
+<!-- k3 -->
+<g id="node450" class="node"><title>k3</title>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11082,-1028 10934,-1028 10930,-1024 10930,-954 11078,-954 11082,-958 11082,-1028"/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11078,-1024 10930,-1024 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11078,-1024 11078,-954 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="11078,-1024 11082,-1028 "/>
+<text text-anchor="middle" x="11006" y="-996" style="font-family:Times New Roman;font-size:20.00;fill:blue;">Up-to-date task</text>
+<text text-anchor="middle" x="11006" y="-973" style="font-family:Times New Roman;font-size:20.00;fill:blue;">forced to rerun</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge582" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M11006,-1050C11006,-1046 11006,-1042 11006,-1038"/>
+<polygon style="fill:blue;stroke:blue;" points="11009.5,-1038 11006,-1028 11002.5,-1038 11009.5,-1038"/>
+</g>
+<!-- k4 -->
+<g id="node452" class="node"><title>k4</title>
+<polygon style="fill:#fff68f;stroke:black;" points="11067,-931.5 10949,-931.5 10945,-927.5 10945,-880.5 11063,-880.5 11067,-884.5 11067,-931.5"/>
+<polyline style="fill:none;stroke:black;" points="11063,-927.5 10945,-927.5 "/>
+<polyline style="fill:none;stroke:black;" points="11063,-927.5 11063,-880.5 "/>
+<polyline style="fill:none;stroke:black;" points="11063,-927.5 11067,-931.5 "/>
+<text text-anchor="middle" x="11006" y="-899.5" style="font-family:Times New Roman;font-size:20.00;">Final target</text>
+</g>
+<!-- k3->k4 -->
+<g id="edge584" class="edge"><title>k3->k4</title>
+<path style="fill:none;stroke:blue;" d="M11006,-954C11006,-950 11006,-946 11006,-942"/>
+<polygon style="fill:blue;stroke:blue;" points="11009.5,-942 11006,-932 11002.5,-942 11009.5,-942"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/gallery/gallery_dless.png b/doc/images/gallery/gallery_dless.png
new file mode 100644
index 0000000..e05c9e2
Binary files /dev/null and b/doc/images/gallery/gallery_dless.png differ
diff --git a/doc/images/gallery/gallery_dless.py b/doc/images/gallery/gallery_dless.py
new file mode 100755
index 0000000..1a6fe2f
--- /dev/null
+++ b/doc/images/gallery/gallery_dless.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python
+"""
+
+ run_dless2_rebecca.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test
+#
+#
+# Copyright (c) 7/13/2010 Rebecca Chodroff
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os, re, shutil
+
+# add self to search path for testing
+if __name__ == '__main__':
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# Use import path from <<../python_modules>>
+if __name__ == '__main__':
+ sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "python_modules")))
+ sys.path.insert(0, "/net/cpp-group/Leo/inprogress/oss_projects/ruffus/installation")
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %progs [options]")
+ parser.add_option("--targets", dest="targets",
+ metavar="INTEGERS",
+ type="string",
+ help="List of comma separated targets.")
+ parser.add_option("--working_dir", dest="working_dir",
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+ parser.add_option("--starting_dir", dest="starting_dir",
+ metavar="PATH",
+ type="string",
+ help="Starting directory.")
+
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+ parser.add_option("-L", "--log_file", dest="log_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of log file")
+ parser.add_option("--skip_parameter_logging", dest="skip_parameter_logging",
+ action="store_true", default=False,
+ help="Do not print program parameters to log.")
+ parser.add_option("--debug", dest="debug",
+ action="count", default=0,
+ help="Set default program parameters in debugging mode.")
+
+
+
+
+
+
+
+ #
+ # pipeline
+ #
+ parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="N",
+ type="int",
+ help="Allow N jobs (commands) to run simultaneously.")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Don't actually run any commands; just print the pipeline.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+ parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+
+ #
+ # Less common pipeline options
+ #
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+ parser.add_option("--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+
+ # get help string
+ f =StringIO.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+ original_args = " ".join(sys.argv)
+ (options, remaining_args) = parser.parse_args()
+
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Debug: Change these #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ if options.debug:
+ options.log_file = os.path.join("run_dless2.log")
+ if not options.verbose:
+ options.verbose = 1
+ if not options.targets:
+ options.targets = "87"
+ if not options.working_dir:
+ options.working_dir = "DLESS"
+ if not options.starting_dir:
+ options.starting_dir = "DUMMY"
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Debug: Change these #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ #
+ # mandatory options
+ #
+ mandatory_options = ["targets", "starting_dir", "working_dir"]
+ def check_mandatory_options (options, mandatory_options, helpstr):
+ """
+ Check if specified mandatory options have b een defined
+ """
+ missing_options = []
+ for o in mandatory_options:
+ if not getattr(options, o):
+ missing_options.append("--" + o)
+
+ if not len(missing_options):
+ return
+
+ raise Exception("Missing mandatory parameter%s: %s.\n\n%s\n\n" %
+ ("s" if len(missing_options) > 1 else "",
+ ", ".join(missing_options),
+ helpstr))
+ check_mandatory_options (options, mandatory_options, helpstr)
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+#from json import dumps
+#from collections import defaultdict
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Constants
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#from read_project_parameter_file import get_keys
+#parameters = get_keys('PARAMETERS')
+#
+#reference_species = parameters['REFERENCE_SPECIES']
+#species = parameters['SPECIES']
+#working_dir = parameters['WORKING_DIR_ROOT']
+#tree = parameters['TREE']
+#ref_sequences = parameters['REFERENCE_SEQUENCES']
+#working_dir= parameters['WORKING_DIR_ROOT']
+#tba_alignments = parameters['TBA_ALIGNMENTS_DIR']
+#tba_projected_alignments = parameters['TBA_PROJECTED_ALIGNMENTS_DIR']
+#fasta_alignments = parameters['FASTA_ALIGNMENTS_DIR']
+#repeats_dir = parameters['REPEATS_DIR']
+#neutral_mods_dir = parameters['NEUTRAL_MODELS_DIR']
+#indel_hist_dir = parameters['INDEL_HISTORY_DIR']
+#indel_mods_dir = parameters['INDEL_MODELS_DIR']
+#
+#python_code = parameters['PYTHON_CODE_DIR']
+#find_ars = parameters['FIND_ARS']
+#maf_project = parameters['MAF_PROJECT_BIN']
+#phylo_fit = parameters['PHYLOFIT_BINARY']
+#msa_view = parameters['MSA_VIEW_BINARY']
+#indel_history = parameters['INDEL_HISTORY_BINARY']
+#tree_doctor = parameters['TREE_DOCTOR_BINARY']
+#indel_fit = parameters['INDEL_FIT_BINARY']
+#dless = parameters['DLESS_BINARY']
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#def get_all_alignment_file_names():
+# alignment_file_names = os.listdir(tba_alignments)
+# return alignment_file_names
+#
+#def get_all_targets():
+# alignment_file_names = get_all_alignment_file_names()
+# targets = []
+# for alignment_file_name in alignment_file_names:
+# targets.append(alignment_file_name.split('.')[0])
+# return targets
+#
+#
+#def find_ARs(target):
+# coor_file = os.path.join(ref_sequences, target + '_' + reference_species)
+# coor = open(coor_file, 'r').readline()
+# species, chr, start, strand, other = coor.split(':',4)
+# RM_out = os.path.join(repeats_dir, target + '_' + reference_species + '.out')
+# out_file = os.path.join(repeats_dir, target + '_' + reference_species + '.ar')
+# os.system('perl ' + find_ars + ' ' + RM_out + ' >' + out_file)
+# return chr, start, strand, out_file
+#
+#def write_gff(target):
+# chr, start, strand, out_file = find_ARs(target)
+# file = open(out_file, 'r')
+# line = file.readline()
+# lines = file.readlines()
+# repeats_file = os.path.join(repeats_dir, target + '_' + reference_species + '.gff')
+# repeats = open(repeats_file, 'w')
+# for line in lines:
+# line = line.strip()
+# beg, end, feature = line.split()
+# b = str(int(beg) + int(start))
+# e = str(int(end) + int(start))
+# entry = '\t'.join([chr, 'RepeatMasker', 'AR', b, e,'.', '.','.',feature])
+# repeats.write(entry + '\n')
+# repeats.close()
+# return repeats_file
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Logger
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+if __name__ == '__main__':
+ import logging
+ import logging.handlers
+
+ MESSAGE = 15
+ logging.addLevelName(MESSAGE, "MESSAGE")
+
+ def setup_std_logging (logger, log_file, verbose):
+ """
+ set up logging using programme options
+ """
+ class debug_filter(logging.Filter):
+ """
+ Ignore INFO messages
+ """
+ def filter(self, record):
+ return logging.INFO != record.levelno
+
+ class NullHandler(logging.Handler):
+ """
+ for when there is no logging
+ """
+ def emit(self, record):
+ pass
+
+ # We are interesting in all messages
+ logger.setLevel(logging.DEBUG)
+ has_handler = False
+
+ # log to file if that is specified
+ if log_file:
+ handler = logging.FileHandler(log_file, delay=False)
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s"))
+ handler.setLevel(MESSAGE)
+ logger.addHandler(handler)
+ has_handler = True
+
+ # log to stderr if verbose
+ if verbose:
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ if log_file:
+ stderrhandler.addFilter(debug_filter())
+ logger.addHandler(stderrhandler)
+ has_handler = True
+
+ # no logging
+ if not has_handler:
+ logger.addHandler(NullHandler())
+
+
+ #
+ # set up log
+ #
+ logger = logging.getLogger(module_name)
+ setup_std_logging(logger, options.log_file, options.verbose)
+
+ #
+ # Allow logging across Ruffus pipeline
+ #
+ def get_logger (logger_name, args):
+ return logger
+
+ from ruffus.proxy_logger import *
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (get_logger,
+ module_name,
+ {})
+
+ #
+ # log programme parameters
+ #
+ if not options.skip_parameter_logging:
+ programme_name = os.path.split(sys.argv[0])[1]
+ logger.info("%s %s" % (programme_name, original_args))
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#
+# convert targets into individual strings
+# and identify files in staring_dir
+#
+options.targets = re.split(", *", options.targets)
+if not len(options.targets):
+ raise Exception ("Please specify the targets as a common separated list for --targets")
+starting_files = [os.path.join(options.starting_dir, t + ".maf") for t in options.targets]
+repeats_files = [os.path.join(options.starting_dir, t + ".repeats") for t in options.targets]
+
+#
+# regex to split path out and substitute working directory
+#
+ at follows(mkdir(options.working_dir, "test"))
+ at transform(starting_files, regex(r"(.+/)([^/]+\.maf)"), os.path.join(options.working_dir, r"\2"))
+def copy_maf_into_working_directory(input_file, output_file):
+ """
+ Make copy in working directory
+ """
+ shutil.copyfile(input_file, output_file)
+
+ at follows(mkdir(options.working_dir))
+ at transform(repeats_files, regex(r"(.+/)([^/]+\.repeats)"), os.path.join(options.working_dir, r"\2"))
+def copy_repeats_into_working_directory(input_file, output_file):
+ """
+ Make copy in working directory
+ """
+ shutil.copyfile(input_file, output_file)
+
+
+#
+# pipes in output from copy_maf_into_working_directory
+#
+# working_dir/target.maf -> working_dir/target.projected_maf
+#
+ at transform(copy_maf_into_working_directory, suffix(".maf"), ".projected_maf")
+def project_maf_alignments(input_file, output_file):
+ #os.system(maf_project + ' ' + input_file + ' ' + reference_species + ' > ' + output_file)
+ open(output_file, "w")
+
+ at transform(project_maf_alignments, suffix(".projected_maf"), add_inputs(r"\1.repeats"), r".neutral_model")
+ at follows(copy_repeats_into_working_directory)
+def generate_neutral_model(input_files, output_file):
+ maf_file, repeats_file = input_files
+ #cmd_str = (phylo_fit + ' --tree ' + tree
+ # + ' --features ' + repeats_file
+ # + ' --do-cats AR --out-root '
+ # + output_file + ' --msa-format MAF '
+ # + maf_file)
+ #os.system(cmd_str)
+ #run_cmd(cmd_str, "generate neutral model")
+ #queue_cmd_prefix = "qrsh -now n -cwd -p -6 -v BASH_ENV=~/.bashrc -q medium_jobs.q"
+ #target_name = os.path.splitext("maf_file")[0]
+ #run_cmd(cmd_str, "generate neutral model", queue_cmd_prefix = queue_cmd_prefix, job_name = target_name)
+ open(output_file, "w")
+
+
+#must convert maf to fasta for indel history program
+ at transform(project_maf_alignments, suffix(".projected_maf"), ".fa")
+def convert_maf2fasta(input_file, output_file):
+ #species = (open(species, 'r')).readline()
+ #os.system(msa_view + ' ' + input_file\
+ # + ' --soft-masked --seqs '\
+ # + species + ' --in-format MAF > ' + output_file)
+ open(output_file, "w")
+
+
+ at follows(generate_neutral_model)
+ at transform(convert_maf2fasta, regex("(.+).fa"), add_inputs(r"\1.neutral_model"), r"\1.indel_history")
+def generate_indel_history(input_files, output_file):
+ fasta_file, neutral_model = input_files
+ #os.system(indel_history + ' ' + fasta_file + ' ' + neutral_model + ' > ' + output_file)
+ open(output_file, "w")
+
+def parameters_for_dless(indel_history_file, neutral_model):
+ os.system(tree_doctor + ' -t ' + neutral_model + ' > ' + tree_file)
+ cmd = indel_fit + ' ' + indel_history_file + ' ' + tree_file
+ fin,fout=os.popen4(cmd)
+ indel_mod = fout.read()
+ indelmod=[]
+ for n in [2,5,8]:
+ indelmod.append(indel_mod.split()[n].strip(','))
+ return indelmod
+
+
+ at follows(generate_neutral_model)
+ at follows(convert_maf2fasta)
+ at transform(generate_indel_history,
+ suffix(".indel_history"),
+ add_inputs(r"\1.neutral_model", r"\1.fa"),
+ ".dless.out")
+def run_dless(input_files, output_file):
+ indel_history_file, neutral_model, fasta_file = input_files
+ #indelmod = parameters_for_dless(indel_history_file, neutral_model)
+ #cmd = ' '.join([dless,'-I',','.join(indel_params), '-H', indel_history_file,
+ # alignment, neutral_mod, '>', output_file])
+ #os.system(cmd)
+ open(output_file, "w")
+
+
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose=options.verbose)
+
+ elif options.flowchart:
+ pipeline_printout_graph ( open(options.flowchart, "w"),
+ os.path.splitext(options.flowchart)[1][1:],
+ options.target_tasks,
+ options.forced_tasks,
+ no_key_legend = not options.key_legend_in_graph,
+ minimal_key_legend = True,
+ user_colour_scheme = {"colour_scheme_index": options.colour_scheme_index},
+ pipeline_name = "dless2")
+ #graph_colour_demo_printout (open(options.flowchart, "w"),
+ # os.path.splitext(options.flowchart)[1][1:])
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks,
+ multiprocess = options.jobs,
+ logger = stderr_logger,
+ verbose = options.verbose)
+
+
+
+
+
+
+
+
diff --git a/doc/images/gallery/gallery_dless.svg b/doc/images/gallery/gallery_dless.svg
new file mode 100644
index 0000000..c1fcd49
--- /dev/null
+++ b/doc/images/gallery/gallery_dless.svg
@@ -0,0 +1,197 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: (lg) Leo Goodstadt -->
+<!-- Title: dless2 Pages: 1 -->
+<svg width="834pt" height="480pt"
+ viewBox="0.00 0.00 834.00 480.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 476)">
+<title>dless2</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-476 830,-476 830,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-456 574,-456 574,-16 8,-16"/>
+<text text-anchor="middle" x="291" y="-424.892" style="font-family:Times New Roman;font-size:30.0px;fill:#ff3232;">dless2</text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey1</title>
+<polygon style="fill:#f6f4f4;stroke:#f6f4f4;" points="582,-132 582,-464 818,-464 818,-132 582,-132"/>
+<text text-anchor="middle" x="700" y="-432.892" style="font-family:Times New Roman;font-size:30.0px;">Key:</text>
+</g>
+<!-- t1 -->
+<g id="node2" class="node"><title>t1</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="551,-406 289,-406 285,-402 285,-370 547,-370 551,-374 551,-406"/>
+<polyline style="fill:none;stroke:#006000;" points="547,-402 285,-402 "/>
+<polyline style="fill:none;stroke:#006000;" points="547,-402 547,-370 "/>
+<polyline style="fill:none;stroke:#006000;" points="547,-402 551,-406 "/>
+<text text-anchor="middle" x="418" y="-381.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Make directories [DLESS, test]</text>
+</g>
+<!-- t0 -->
+<g id="node3" class="node"><title>t0</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="566,-332 274,-332 270,-328 270,-296 562,-296 566,-300 566,-332"/>
+<polyline style="fill:none;stroke:#006000;" points="562,-328 270,-328 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-328 562,-296 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-328 566,-332 "/>
+<text text-anchor="middle" x="418" y="-307.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">copy_maf_into_working_directory</text>
+</g>
+<!-- t1->t0 -->
+<g id="edge3" class="edge"><title>t1->t0</title>
+<path style="fill:none;stroke:gray;" d="M418,-370C418,-361 418,-352 418,-342"/>
+<polygon style="fill:gray;stroke:gray;" points="421.5,-342 418,-332 414.5,-342 421.5,-342"/>
+</g>
+<!-- t4 -->
+<g id="node5" class="node"><title>t4</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="566,-258 360,-258 356,-254 356,-222 562,-222 566,-226 566,-258"/>
+<polyline style="fill:none;stroke:#006000;" points="562,-254 356,-254 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-254 562,-222 "/>
+<polyline style="fill:none;stroke:#006000;" points="562,-254 566,-258 "/>
+<text text-anchor="middle" x="461" y="-233.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">project_maf_alignments</text>
+</g>
+<!-- t0->t4 -->
+<g id="edge5" class="edge"><title>t0->t4</title>
+<path style="fill:none;stroke:gray;" d="M429,-296C434,-287 440,-277 445,-267"/>
+<polygon style="fill:gray;stroke:gray;" points="448.203,-268.441 450,-258 442.084,-265.042 448.203,-268.441"/>
+</g>
+<!-- t6 -->
+<g id="node7" class="node"><title>t6</title>
+<polygon style="fill:none;stroke:black;" points="544,-184 382,-184 378,-180 378,-148 540,-148 544,-152 544,-184"/>
+<polyline style="fill:none;stroke:black;" points="540,-180 378,-180 "/>
+<polyline style="fill:none;stroke:black;" points="540,-180 540,-148 "/>
+<polyline style="fill:none;stroke:black;" points="540,-180 544,-184 "/>
+<text text-anchor="middle" x="461" y="-159.392" style="font-family:Times New Roman;font-size:20.0px;">convert_maf2fasta</text>
+</g>
+<!-- t4->t6 -->
+<g id="edge7" class="edge"><title>t4->t6</title>
+<path style="fill:none;stroke:gray;" d="M461,-222C461,-213 461,-204 461,-194"/>
+<polygon style="fill:gray;stroke:gray;" points="464.5,-194 461,-184 457.5,-194 464.5,-194"/>
+</g>
+<!-- t5 -->
+<g id="node12" class="node"><title>t5</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="352,-184 146,-184 142,-180 142,-148 348,-148 352,-152 352,-184"/>
+<polyline style="fill:none;stroke:#0044a0;" points="348,-180 142,-180 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="348,-180 348,-148 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="348,-180 352,-184 "/>
+<text text-anchor="middle" x="247" y="-159.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">generate_neutral_model</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge11" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:gray;" d="M409,-222C379,-211 341,-199 309,-187"/>
+<polygon style="fill:gray;stroke:gray;" points="309.584,-183.521 299,-184 307.573,-190.226 309.584,-183.521"/>
+</g>
+<!-- t7 -->
+<g id="node15" class="node"><title>t7</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="450,-118 254,-118 250,-114 250,-82 446,-82 450,-86 450,-118"/>
+<polyline style="fill:none;stroke:#0044a0;" points="446,-114 250,-114 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="446,-114 446,-82 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="446,-114 450,-118 "/>
+<text text-anchor="middle" x="350" y="-93.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">generate_indel_history</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge15" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:#0044a0;" d="M431,-148C418,-141 403,-131 390,-123"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="391.441,-119.797 381,-118 388.042,-125.916 391.441,-119.797"/>
+</g>
+<!-- t8 -->
+<g id="node18" class="node"><title>t8</title>
+<polygon style="fill:#efa03b;stroke:black;" points="396,-60 308,-60 304,-56 304,-24 392,-24 396,-28 396,-60"/>
+<polyline style="fill:none;stroke:black;" points="392,-56 304,-56 "/>
+<polyline style="fill:none;stroke:black;" points="392,-56 392,-24 "/>
+<polyline style="fill:none;stroke:black;" points="392,-56 396,-60 "/>
+<text text-anchor="middle" x="350" y="-35.392" style="font-family:Times New Roman;font-size:20.0px;">run_dless</text>
+</g>
+<!-- t6->t8 -->
+<g id="edge21" class="edge"><title>t6->t8</title>
+<path style="fill:none;stroke:#0044a0;" d="M466,-148C470,-129 473,-101 459,-82 452,-72 429,-64 406,-56"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="406.584,-52.5212 396,-53 404.573,-59.2259 406.584,-52.5212"/>
+</g>
+<!-- t3 -->
+<g id="node9" class="node"><title>t3</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="252,-332 26,-332 22,-328 22,-296 248,-296 252,-300 252,-332"/>
+<polyline style="fill:none;stroke:#006000;" points="248,-328 22,-328 "/>
+<polyline style="fill:none;stroke:#006000;" points="248,-328 248,-296 "/>
+<polyline style="fill:none;stroke:#006000;" points="248,-328 252,-332 "/>
+<text text-anchor="middle" x="137" y="-307.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Make directories [DLESS]</text>
+</g>
+<!-- t2 -->
+<g id="node10" class="node"><title>t2</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="338,-258 20,-258 16,-254 16,-222 334,-222 338,-226 338,-258"/>
+<polyline style="fill:none;stroke:#0044a0;" points="334,-254 16,-254 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="334,-254 334,-222 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="334,-254 338,-258 "/>
+<text text-anchor="middle" x="177" y="-233.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">copy_repeats_into_working_directory</text>
+</g>
+<!-- t3->t2 -->
+<g id="edge9" class="edge"><title>t3->t2</title>
+<path style="fill:none;stroke:gray;" d="M147,-296C152,-287 157,-277 162,-267"/>
+<polygon style="fill:gray;stroke:gray;" points="165.203,-268.441 167,-258 159.084,-265.042 165.203,-268.441"/>
+</g>
+<!-- t2->t5 -->
+<g id="edge13" class="edge"><title>t2->t5</title>
+<path style="fill:none;stroke:#0044a0;" d="M194,-222C203,-212 213,-202 223,-192"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="226.049,-193.831 230,-184 220.781,-189.221 226.049,-193.831"/>
+</g>
+<!-- t5->t7 -->
+<g id="edge17" class="edge"><title>t5->t7</title>
+<path style="fill:none;stroke:#0044a0;" d="M275,-148C287,-141 301,-131 313,-123"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="314.958,-125.916 322,-118 311.559,-119.797 314.958,-125.916"/>
+</g>
+<!-- t5->t8 -->
+<g id="edge23" class="edge"><title>t5->t8</title>
+<path style="fill:none;stroke:#0044a0;" d="M240,-148C234,-129 228,-101 241,-82 248,-72 271,-63 294,-56"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="295.427,-59.2259 304,-53 293.416,-52.5212 295.427,-59.2259"/>
+</g>
+<!-- t7->t8 -->
+<g id="edge19" class="edge"><title>t7->t8</title>
+<path style="fill:none;stroke:#0044a0;" d="M350,-82C350,-78 350,-74 350,-70"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="353.5,-70 350,-60 346.5,-70 353.5,-70"/>
+</g>
+<!-- k1_1 -->
+<g id="node23" class="node"><title>k1_1</title>
+<polygon style="fill:#b8cc6e;stroke:#006000;" points="776,-413.5 628,-413.5 624,-409.5 624,-362.5 772,-362.5 776,-366.5 776,-413.5"/>
+<polyline style="fill:none;stroke:#006000;" points="772,-409.5 624,-409.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="772,-409.5 772,-362.5 "/>
+<polyline style="fill:none;stroke:#006000;" points="772,-409.5 776,-413.5 "/>
+<text text-anchor="middle" x="700" y="-381.392" style="font-family:Times New Roman;font-size:20.0px;fill:#006000;">Up-to-date task</text>
+</g>
+<!-- k2_1 -->
+<g id="node24" class="node"><title>k2_1</title>
+<polygon style="fill:none;stroke:black;" points="810,-339.5 594,-339.5 590,-335.5 590,-288.5 806,-288.5 810,-292.5 810,-339.5"/>
+<polyline style="fill:none;stroke:black;" points="806,-335.5 590,-335.5 "/>
+<polyline style="fill:none;stroke:black;" points="806,-335.5 806,-288.5 "/>
+<polyline style="fill:none;stroke:black;" points="806,-335.5 810,-339.5 "/>
+<text text-anchor="middle" x="700" y="-307.392" style="font-family:Times New Roman;font-size:20.0px;">Explicitly specified task</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge26" class="edge"><title>k1_1->k2_1</title>
+<path style="fill:none;stroke:gray;" d="M700,-362C700,-358 700,-354 700,-350"/>
+<polygon style="fill:gray;stroke:gray;" points="703.5,-350 700,-340 696.5,-350 703.5,-350"/>
+</g>
+<!-- k3_1 -->
+<g id="node26" class="node"><title>k3_1</title>
+<polygon style="fill:#ebf3ff;stroke:#0044a0;" points="758,-265.5 646,-265.5 642,-261.5 642,-214.5 754,-214.5 758,-218.5 758,-265.5"/>
+<polyline style="fill:none;stroke:#0044a0;" points="754,-261.5 642,-261.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="754,-261.5 754,-214.5 "/>
+<polyline style="fill:none;stroke:#0044a0;" points="754,-261.5 758,-265.5 "/>
+<text text-anchor="middle" x="700" y="-233.392" style="font-family:Times New Roman;font-size:20.0px;fill:#0044a0;">Task to run</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge28" class="edge"><title>k2_1->k3_1</title>
+<path style="fill:none;stroke:#0044a0;" d="M700,-288C700,-284 700,-280 700,-276"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="703.5,-276 700,-266 696.5,-276 703.5,-276"/>
+</g>
+<!-- k4_1 -->
+<g id="node28" class="node"><title>k4_1</title>
+<polygon style="fill:#efa03b;stroke:black;" points="760,-191.5 644,-191.5 640,-187.5 640,-140.5 756,-140.5 760,-144.5 760,-191.5"/>
+<polyline style="fill:none;stroke:black;" points="756,-187.5 640,-187.5 "/>
+<polyline style="fill:none;stroke:black;" points="756,-187.5 756,-140.5 "/>
+<polyline style="fill:none;stroke:black;" points="756,-187.5 760,-191.5 "/>
+<text text-anchor="middle" x="700" y="-159.392" style="font-family:Times New Roman;font-size:20.0px;">Final target</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge30" class="edge"><title>k3_1->k4_1</title>
+<path style="fill:none;stroke:#0044a0;" d="M700,-214C700,-210 700,-206 700,-202"/>
+<polygon style="fill:#0044a0;stroke:#0044a0;" points="703.5,-202 700,-192 696.5,-202 703.5,-202"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/gallery/gallery_rna_seq.png b/doc/images/gallery/gallery_rna_seq.png
new file mode 100644
index 0000000..19f16ed
Binary files /dev/null and b/doc/images/gallery/gallery_rna_seq.png differ
diff --git a/doc/images/gallery/gallery_rna_seq.svg b/doc/images/gallery/gallery_rna_seq.svg
new file mode 100644
index 0000000..43a6e5b
--- /dev/null
+++ b/doc/images/gallery/gallery_rna_seq.svg
@@ -0,0 +1,672 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: Christoffer Nellaker -->
+<!-- Title: tree Pages: 1 -->
+<svg width="347pt" height="576pt"
+ viewBox="0.00 0.00 346.75 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.358209 0.358209) rotate(0) translate(4 1604)">
+<title>RNASeq Pipeline</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-1604 964,-1604 964,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-1592 952,-1592 952,-16 8,-16"/>
+<text text-anchor="middle" x="480" y="-1561" style="font-family:Times New Roman;font-size:20.00pt;fill:red;"><tspan font-weight = "bold">RNASeq Pipeline:</tspan></text>
+</g>
+<!-- t1 -->
+<g id="node2" class="node"><title>t1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="381,-1542 239,-1542 235,-1538 235,-1506 377,-1506 381,-1510 381,-1542"/>
+<polyline style="fill:none;stroke:green;" points="377,-1538 235,-1538 "/>
+<polyline style="fill:none;stroke:green;" points="377,-1538 377,-1506 "/>
+<polyline style="fill:none;stroke:green;" points="377,-1538 381,-1542 "/>
+<text text-anchor="middle" x="308" y="-1517.5" style="font-family:Times New Roman;font-size:20.00;">prepare_indexes</text>
+</g>
+<!-- t3 -->
+<g id="node3" class="node"><title>t3</title>
+<polygon style="fill:#90ee90;stroke:green;" points="454,-1484 276,-1484 272,-1480 272,-1448 450,-1448 454,-1452 454,-1484"/>
+<polyline style="fill:none;stroke:green;" points="450,-1480 272,-1480 "/>
+<polyline style="fill:none;stroke:green;" points="450,-1480 450,-1448 "/>
+<polyline style="fill:none;stroke:green;" points="450,-1480 454,-1484 "/>
+<text text-anchor="middle" x="363" y="-1459.5" style="font-family:Times New Roman;font-size:20.00;">symbolic_link_make</text>
+</g>
+<!-- t1->t3 -->
+<g id="edge3" class="edge"><title>t1->t3</title>
+<path style="fill:none;stroke:gray;" d="M325,-1506C329,-1501 334,-1496 339,-1491"/>
+<polygon style="fill:gray;stroke:gray;" points="341.404,-1493.55 346,-1484 336.454,-1488.6 341.404,-1493.55"/>
+</g>
+<!-- t2 -->
+<g id="node46" class="node"><title>t2</title>
+<polygon style="fill:#90ee90;stroke:green;" points="610,-814 420,-814 416,-810 416,-778 606,-778 610,-782 610,-814"/>
+<polyline style="fill:none;stroke:green;" points="606,-810 416,-810 "/>
+<polyline style="fill:none;stroke:green;" points="606,-810 606,-778 "/>
+<polyline style="fill:none;stroke:green;" points="606,-810 610,-814 "/>
+<text text-anchor="middle" x="513" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">chromosome_size_list</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge43" class="edge"><title>t1->t2</title>
+<path style="fill:none;stroke:gray;" d="M282,-1506C275,-1500 268,-1492 263,-1484 242,-1447 266,-1426 243,-1390 212,-1340 175,-1356 139,-1310 123,-1287 122,-1279 116,-1252 112,-1229 114,-1222 116,-1200 118,-1190 120,-1187 121,-1178 128,-1144 131,-1136 131,-1102 131,-1102 131,-1102 131,-920 131,-824 169,-882 344,-836 370,-829 398,-823 423,-816"/>
+<polygon style="fill:gray;stroke:gray;" points="423.881,-819.393 433,-814 422.508,-812.529 423.881,-819.393"/>
+</g>
+<!-- t4 -->
+<g id="node6" class="node"><title>t4</title>
+<polygon style="fill:#90ee90;stroke:green;" points="419,-1426 311,-1426 307,-1422 307,-1390 415,-1390 419,-1394 419,-1426"/>
+<polyline style="fill:none;stroke:green;" points="415,-1422 307,-1422 "/>
+<polyline style="fill:none;stroke:green;" points="415,-1422 415,-1390 "/>
+<polyline style="fill:none;stroke:green;" points="415,-1422 419,-1426 "/>
+<text text-anchor="middle" x="363" y="-1401.5" style="font-family:Times New Roman;font-size:20.00;">TopHat_run</text>
+</g>
+<!-- t3->t4 -->
+<g id="edge5" class="edge"><title>t3->t4</title>
+<path style="fill:none;stroke:gray;" d="M363,-1448C363,-1444 363,-1440 363,-1436"/>
+<polygon style="fill:gray;stroke:gray;" points="366.5,-1436 363,-1426 359.5,-1436 366.5,-1436"/>
+</g>
+<!-- t0 -->
+<g id="node5" class="node"><title>t0</title>
+<polygon style="fill:#90ee90;stroke:green;" points="226,-1484 42,-1484 38,-1480 38,-1448 222,-1448 226,-1452 226,-1484"/>
+<polyline style="fill:none;stroke:green;" points="222,-1480 38,-1480 "/>
+<polyline style="fill:none;stroke:green;" points="222,-1480 222,-1448 "/>
+<polyline style="fill:none;stroke:green;" points="222,-1480 226,-1484 "/>
+<text text-anchor="middle" x="132" y="-1459.5" style="font-family:Times New Roman;font-size:20.00;">export_to_fastq_files</text>
+</g>
+<!-- t0->t4 -->
+<g id="edge85" class="edge"><title>t0->t4</title>
+<path style="fill:none;stroke:gray;" d="M204,-1448C234,-1441 268,-1432 297,-1425"/>
+<polygon style="fill:gray;stroke:gray;" points="298.427,-1428.23 307,-1422 296.416,-1421.52 298.427,-1428.23"/>
+</g>
+<!-- t43 -->
+<g id="node50" class="node"><title>t43</title>
+<polygon style="fill:#90ee90;stroke:green;" points="234,-1426 20,-1426 16,-1422 16,-1390 230,-1390 234,-1394 234,-1426"/>
+<polyline style="fill:none;stroke:green;" points="230,-1422 16,-1422 "/>
+<polyline style="fill:none;stroke:green;" points="230,-1422 230,-1390 "/>
+<polyline style="fill:none;stroke:green;" points="230,-1422 234,-1426 "/>
+<text text-anchor="middle" x="125" y="-1401.5" style="font-family:Times New Roman;font-size:20.00;">prepare_fastq_for_velvet</text>
+</g>
+<!-- t0->t43 -->
+<g id="edge47" class="edge"><title>t0->t43</title>
+<path style="fill:none;stroke:gray;" d="M130,-1448C129,-1444 129,-1440 128,-1436"/>
+<polygon style="fill:gray;stroke:gray;" points="131.478,-1435.6 127,-1426 124.512,-1436.3 131.478,-1435.6"/>
+</g>
+<!-- t6 -->
+<g id="node8" class="node"><title>t6</title>
+<polygon style="fill:#90ee90;stroke:green;" points="458,-1368 272,-1368 268,-1364 268,-1332 454,-1332 458,-1336 458,-1368"/>
+<polyline style="fill:none;stroke:green;" points="454,-1364 268,-1364 "/>
+<polyline style="fill:none;stroke:green;" points="454,-1364 454,-1332 "/>
+<polyline style="fill:none;stroke:green;" points="454,-1364 458,-1368 "/>
+<text text-anchor="middle" x="363" y="-1343.5" style="font-family:Times New Roman;font-size:20.00;">tophatSAM_to_BAM</text>
+</g>
+<!-- t4->t6 -->
+<g id="edge7" class="edge"><title>t4->t6</title>
+<path style="fill:none;stroke:gray;" d="M363,-1390C363,-1386 363,-1382 363,-1378"/>
+<polygon style="fill:gray;stroke:gray;" points="366.5,-1378 363,-1368 359.5,-1378 366.5,-1378"/>
+</g>
+<!-- t46 -->
+<g id="node82" class="node"><title>t46</title>
+<polygon style="fill:none;stroke:blue;" points="451,-118 313,-118 309,-114 309,-82 447,-82 451,-86 451,-118"/>
+<polyline style="fill:none;stroke:blue;" points="447,-114 309,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="447,-114 447,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="447,-114 451,-118 "/>
+<text text-anchor="middle" x="380" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">compile_results</text>
+</g>
+<!-- t4->t46 -->
+<g id="edge127" class="edge"><title>t4->t46</title>
+<path style="fill:none;stroke:gray;" d="M419,-1403C565,-1388 943,-1344 943,-1292 943,-1292 943,-1292 943,-216 943,-185 935,-172 848,-140 779,-114 576,-105 461,-101"/>
+<polygon style="fill:gray;stroke:gray;" points="461,-97.5001 451,-101 461,-104.5 461,-97.5001"/>
+</g>
+<!-- t7 -->
+<g id="node10" class="node"><title>t7</title>
+<polygon style="fill:#90ee90;stroke:green;" points="523,-1310 227,-1310 223,-1306 223,-1274 519,-1274 523,-1278 523,-1310"/>
+<polyline style="fill:none;stroke:green;" points="519,-1306 223,-1306 "/>
+<polyline style="fill:none;stroke:green;" points="519,-1306 519,-1274 "/>
+<polyline style="fill:none;stroke:green;" points="519,-1306 523,-1310 "/>
+<text text-anchor="middle" x="373" y="-1285.5" style="font-family:Times New Roman;font-size:20.00;">get_req_qnames_from_Tophat_run</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge9" class="edge"><title>t6->t7</title>
+<path style="fill:none;stroke:gray;" d="M366,-1332C367,-1328 367,-1324 368,-1320"/>
+<polygon style="fill:gray;stroke:gray;" points="371.471,-1320.49 370,-1310 364.607,-1319.12 371.471,-1320.49"/>
+</g>
+<!-- t22 -->
+<g id="node60" class="node"><title>t22</title>
+<polygon style="fill:#90ee90;stroke:green;" points="786,-814 632,-814 628,-810 628,-778 782,-778 786,-782 786,-814"/>
+<polyline style="fill:none;stroke:green;" points="782,-810 628,-810 "/>
+<polyline style="fill:none;stroke:green;" points="782,-810 782,-778 "/>
+<polyline style="fill:none;stroke:green;" points="782,-810 786,-814 "/>
+<text text-anchor="middle" x="707" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">uniques_repairing</text>
+</g>
+<!-- t6->t22 -->
+<g id="edge97" class="edge"><title>t6->t22</title>
+<path style="fill:none;stroke:gray;" d="M279,-1332C258,-1326 235,-1319 214,-1310 171,-1290 148,-1293 125,-1252 114,-1231 120,-1222 125,-1200 128,-1189 133,-1188 136,-1178 148,-1145 151,-1136 151,-1102 151,-1102 151,-1102 151,-920 151,-914 460,-848 618,-815"/>
+<polygon style="fill:gray;stroke:gray;" points="618.881,-818.393 628,-813 617.508,-811.529 618.881,-818.393"/>
+</g>
+<!-- t8 -->
+<g id="node12" class="node"><title>t8</title>
+<polygon style="fill:#90ee90;stroke:green;" points="607.975,-1232.77 373,-1247.92 138.025,-1232.77 138.244,-1208.27 607.756,-1208.27 607.975,-1232.77"/>
+<polygon style="fill:none;stroke:green;" points="612,-1236.52 373,-1251.93 134,-1236.52 134.288,-1204.27 611.712,-1204.27 612,-1236.52"/>
+<text text-anchor="middle" x="373" y="-1219.5" style="font-family:Times New Roman;font-size:20.00;">prepare_raw_files_of_unmapped_reads</text>
+</g>
+<!-- t7->t8 -->
+<g id="edge11" class="edge"><title>t7->t8</title>
+<path style="fill:none;stroke:gray;" d="M373,-1274C373,-1270 373,-1266 373,-1262"/>
+<polygon style="fill:gray;stroke:gray;" points="376.5,-1262 373,-1252 369.5,-1262 376.5,-1262"/>
+</g>
+<!-- t17 -->
+<g id="node23" class="node"><title>t17</title>
+<polygon style="fill:#90ee90;stroke:green;" points="797,-1004 503,-1004 499,-1000 499,-968 793,-968 797,-972 797,-1004"/>
+<polyline style="fill:none;stroke:green;" points="793,-1000 499,-1000 "/>
+<polyline style="fill:none;stroke:green;" points="793,-1000 793,-968 "/>
+<polyline style="fill:none;stroke:green;" points="793,-1000 797,-1004 "/>
+<text text-anchor="middle" x="648" y="-979.5" style="font-family:Times New Roman;font-size:20.00;">get_req_qnames_post_xenoMRNA</text>
+</g>
+<!-- t7->t17 -->
+<g id="edge91" class="edge"><title>t7->t17</title>
+<path style="fill:none;stroke:gray;" d="M523,-1275C571,-1268 613,-1260 621,-1252 650,-1222 631,-1201 631,-1160 631,-1160 631,-1160 631,-1102 631,-1072 637,-1038 642,-1014"/>
+<polygon style="fill:gray;stroke:gray;" points="645.471,-1014.49 644,-1004 638.607,-1013.12 645.471,-1014.49"/>
+</g>
+<!-- t14 -->
+<g id="node17" class="node"><title>t14</title>
+<polygon style="fill:#90ee90;stroke:green;" points="612,-1178 334,-1178 330,-1174 330,-1142 608,-1142 612,-1146 612,-1178"/>
+<polyline style="fill:none;stroke:green;" points="608,-1174 330,-1174 "/>
+<polyline style="fill:none;stroke:green;" points="608,-1174 608,-1142 "/>
+<polyline style="fill:none;stroke:green;" points="608,-1174 612,-1178 "/>
+<text text-anchor="middle" x="471" y="-1153.5" style="font-family:Times New Roman;font-size:20.00;">run_Bowtie_against_xenomRNA</text>
+</g>
+<!-- t8->t14 -->
+<g id="edge87" class="edge"><title>t8->t14</title>
+<path style="fill:none;stroke:gray;" d="M405,-1204C415,-1198 426,-1190 436,-1184"/>
+<polygon style="fill:gray;stroke:gray;" points="438.1,-1186.8 444,-1178 433.9,-1181.2 438.1,-1186.8"/>
+</g>
+<!-- t12 -->
+<g id="node14" class="node"><title>t12</title>
+<polygon style="fill:#90ee90;stroke:green;" points="906,-1310 672,-1310 668,-1306 668,-1274 902,-1274 906,-1278 906,-1310"/>
+<polyline style="fill:none;stroke:green;" points="902,-1306 668,-1306 "/>
+<polyline style="fill:none;stroke:green;" points="902,-1306 902,-1274 "/>
+<polyline style="fill:none;stroke:green;" points="902,-1306 906,-1310 "/>
+<text text-anchor="middle" x="787" y="-1285.5" style="font-family:Times New Roman;font-size:20.00;">prepare_xenoMrna_indexes</text>
+</g>
+<!-- t13 -->
+<g id="node15" class="node"><title>t13</title>
+<polygon style="fill:#90ee90;stroke:green;" points="924,-1244 654,-1244 650,-1240 650,-1208 920,-1208 924,-1212 924,-1244"/>
+<polyline style="fill:none;stroke:green;" points="920,-1240 650,-1240 "/>
+<polyline style="fill:none;stroke:green;" points="920,-1240 920,-1208 "/>
+<polyline style="fill:none;stroke:green;" points="920,-1240 924,-1244 "/>
+<text text-anchor="middle" x="787" y="-1219.5" style="font-family:Times New Roman;font-size:20.00;">xenoMrna_symbolic_link_make</text>
+</g>
+<!-- t12->t13 -->
+<g id="edge13" class="edge"><title>t12->t13</title>
+<path style="fill:none;stroke:gray;" d="M787,-1274C787,-1268 787,-1261 787,-1254"/>
+<polygon style="fill:gray;stroke:gray;" points="790.5,-1254 787,-1244 783.5,-1254 790.5,-1254"/>
+</g>
+<!-- t13->t14 -->
+<g id="edge15" class="edge"><title>t13->t14</title>
+<path style="fill:none;stroke:gray;" d="M701,-1208C659,-1200 611,-1189 568,-1180"/>
+<polygon style="fill:gray;stroke:gray;" points="568.492,-1176.53 558,-1178 567.119,-1183.39 568.492,-1176.53"/>
+</g>
+<!-- t13->t17 -->
+<g id="edge89" class="edge"><title>t13->t17</title>
+<path style="fill:none;stroke:gray;" d="M777,-1208C753,-1166 692,-1063 664,-1013"/>
+<polygon style="fill:gray;stroke:gray;" points="666.916,-1011.04 659,-1004 660.797,-1014.44 666.916,-1011.04"/>
+</g>
+<!-- t15 -->
+<g id="node19" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="600,-1120 372,-1120 368,-1116 368,-1084 596,-1084 600,-1088 600,-1120"/>
+<polyline style="fill:none;stroke:green;" points="596,-1116 368,-1116 "/>
+<polyline style="fill:none;stroke:green;" points="596,-1116 596,-1084 "/>
+<polyline style="fill:none;stroke:green;" points="596,-1116 600,-1120 "/>
+<text text-anchor="middle" x="484" y="-1095.5" style="font-family:Times New Roman;font-size:20.00;">xenoMrna_SAM_to_BAM</text>
+</g>
+<!-- t14->t15 -->
+<g id="edge17" class="edge"><title>t14->t15</title>
+<path style="fill:none;stroke:gray;" d="M475,-1142C476,-1138 477,-1134 478,-1130"/>
+<polygon style="fill:gray;stroke:gray;" points="481.471,-1130.49 480,-1120 474.607,-1129.12 481.471,-1130.49"/>
+</g>
+<!-- t16 -->
+<g id="node21" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="603,-1062 413,-1062 409,-1058 409,-1026 599,-1026 603,-1030 603,-1062"/>
+<polyline style="fill:none;stroke:green;" points="599,-1058 409,-1058 "/>
+<polyline style="fill:none;stroke:green;" points="599,-1058 599,-1026 "/>
+<polyline style="fill:none;stroke:green;" points="599,-1058 603,-1062 "/>
+<text text-anchor="middle" x="506" y="-1037.5" style="font-family:Times New Roman;font-size:20.00;">FilterxenoMrna_SAM</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge19" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:gray;" d="M491,-1084C492,-1080 494,-1076 495,-1072"/>
+<polygon style="fill:gray;stroke:gray;" points="498.536,-1072.58 499,-1062 492.036,-1069.98 498.536,-1072.58"/>
+</g>
+<!-- t16->t17 -->
+<g id="edge21" class="edge"><title>t16->t17</title>
+<path style="fill:none;stroke:gray;" d="M550,-1026C564,-1020 580,-1014 595,-1008"/>
+<polygon style="fill:gray;stroke:gray;" points="596.283,-1011.26 604,-1004 593.44,-1004.86 596.283,-1011.26"/>
+</g>
+<!-- t18 -->
+<g id="node25" class="node"><title>t18</title>
+<polygon style="fill:#90ee90;stroke:green;" points="902.126,-926.774 682,-941.92 461.874,-926.774 462.079,-902.266 901.921,-902.266 902.126,-926.774"/>
+<polygon style="fill:none;stroke:green;" points="906.148,-930.506 682,-945.93 457.852,-930.506 458.122,-898.266 905.878,-898.266 906.148,-930.506"/>
+<text text-anchor="middle" x="682" y="-913.5" style="font-family:Times New Roman;font-size:20.00;">prepare_raw_files_post_xenoMRNA</text>
+</g>
+<!-- t17->t18 -->
+<g id="edge23" class="edge"><title>t17->t18</title>
+<path style="fill:none;stroke:gray;" d="M657,-968C659,-964 662,-959 664,-955"/>
+<polygon style="fill:gray;stroke:gray;" points="667.203,-956.441 669,-946 661.084,-953.042 667.203,-956.441"/>
+</g>
+<!-- t21 -->
+<g id="node27" class="node"><title>t21</title>
+<polygon style="fill:#90ee90;stroke:green;" points="254,-814 40,-814 36,-810 36,-778 250,-778 254,-782 254,-814"/>
+<polyline style="fill:none;stroke:green;" points="250,-810 36,-810 "/>
+<polyline style="fill:none;stroke:green;" points="250,-810 250,-778 "/>
+<polyline style="fill:none;stroke:green;" points="250,-810 254,-814 "/>
+<text text-anchor="middle" x="145" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run_multi_onehit</text>
+</g>
+<!-- t18->t21 -->
+<g id="edge25" class="edge"><title>t18->t21</title>
+<path style="fill:none;stroke:gray;" d="M588,-898C487,-875 331,-839 233,-816"/>
+<polygon style="fill:gray;stroke:gray;" points="233.492,-812.529 223,-814 232.119,-819.393 233.492,-812.529"/>
+</g>
+<!-- t20 -->
+<g id="node56" class="node"><title>t20</title>
+<polygon style="fill:#90ee90;stroke:green;" points="884,-748 730,-748 726,-744 726,-712 880,-712 884,-716 884,-748"/>
+<polyline style="fill:none;stroke:green;" points="880,-744 726,-744 "/>
+<polyline style="fill:none;stroke:green;" points="880,-744 880,-712 "/>
+<polyline style="fill:none;stroke:green;" points="880,-744 884,-748 "/>
+<text text-anchor="middle" x="805" y="-723.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run_multi</text>
+</g>
+<!-- t18->t20 -->
+<g id="edge53" class="edge"><title>t18->t20</title>
+<path style="fill:none;stroke:gray;" d="M692,-898C701,-879 717,-852 737,-836 758,-818 778,-835 795,-814 807,-798 809,-776 809,-758"/>
+<polygon style="fill:gray;stroke:gray;" points="812.478,-757.602 808,-748 805.512,-758.299 812.478,-757.602"/>
+</g>
+<!-- t19 -->
+<g id="node58" class="node"><title>t19</title>
+<polygon style="fill:#90ee90;stroke:green;" points="924,-872 750,-872 746,-868 746,-836 920,-836 924,-840 924,-872"/>
+<polyline style="fill:none;stroke:green;" points="920,-868 746,-868 "/>
+<polyline style="fill:none;stroke:green;" points="920,-868 920,-836 "/>
+<polyline style="fill:none;stroke:green;" points="920,-868 924,-872 "/>
+<text text-anchor="middle" x="835" y="-847.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run_uniques</text>
+</g>
+<!-- t18->t19 -->
+<g id="edge55" class="edge"><title>t18->t19</title>
+<path style="fill:none;stroke:gray;" d="M733,-898C749,-891 767,-883 783,-876"/>
+<polygon style="fill:gray;stroke:gray;" points="785.015,-878.964 793,-872 782.415,-872.464 785.015,-878.964"/>
+</g>
+<!-- t25 -->
+<g id="node30" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="454.138,-736.774 333,-751.92 211.862,-736.774 211.975,-712.266 454.025,-712.266 454.138,-736.774"/>
+<polygon style="fill:none;stroke:green;" points="458.138,-740.304 333,-755.951 207.862,-740.304 208.01,-708.266 457.99,-708.266 458.138,-740.304"/>
+<text text-anchor="middle" x="333" y="-723.5" style="font-family:Times New Roman;font-size:20.00;">FiltoutDualRepeats</text>
+</g>
+<!-- t21->t25 -->
+<g id="edge93" class="edge"><title>t21->t25</title>
+<path style="fill:none;stroke:gray;" d="M196,-778C218,-770 245,-761 268,-752"/>
+<polygon style="fill:gray;stroke:gray;" points="269.427,-755.226 278,-749 267.416,-748.521 269.427,-755.226"/>
+</g>
+<!-- t21->t46 -->
+<g id="edge121" class="edge"><title>t21->t46</title>
+<path style="fill:none;stroke:gray;" d="M91,-778C80,-772 70,-765 62,-756 34,-720 37,-701 37,-656 37,-656 37,-656 37,-216 37,-192 43,-176 125,-140 155,-126 236,-115 299,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="299.398,-111.478 309,-107 298.701,-104.512 299.398,-111.478"/>
+</g>
+<!-- t24 -->
+<g id="node29" class="node"><title>t24</title>
+<polygon style="fill:#90ee90;stroke:green;" points="398,-814 276,-814 272,-810 272,-778 394,-778 398,-782 398,-814"/>
+<polyline style="fill:none;stroke:green;" points="394,-810 272,-810 "/>
+<polyline style="fill:none;stroke:green;" points="394,-810 394,-778 "/>
+<polyline style="fill:none;stroke:green;" points="394,-810 398,-814 "/>
+<text text-anchor="middle" x="335" y="-789.5" style="font-family:Times New Roman;font-size:20.00;">make_replists</text>
+</g>
+<!-- t24->t25 -->
+<g id="edge27" class="edge"><title>t24->t25</title>
+<path style="fill:none;stroke:gray;" d="M334,-778C334,-774 334,-770 334,-766"/>
+<polygon style="fill:gray;stroke:gray;" points="337.5,-766 334,-756 330.5,-766 337.5,-766"/>
+</g>
+<!-- t26 -->
+<g id="node32" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="308.068,-662.774 194,-677.92 79.932,-662.774 80.0385,-638.266 307.961,-638.266 308.068,-662.774"/>
+<polygon style="fill:none;stroke:green;" points="312.066,-666.278 194,-681.955 75.9337,-666.278 76.0727,-634.266 311.927,-634.266 312.066,-666.278"/>
+<text text-anchor="middle" x="194" y="-649.5" style="font-family:Times New Roman;font-size:20.00;">prepare_raw_files</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge29" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M292,-708C277,-700 259,-691 243,-682"/>
+<polygon style="fill:gray;stroke:gray;" points="244.441,-678.797 234,-677 241.042,-684.916 244.441,-678.797"/>
+</g>
+<!-- t34 -->
+<g id="node64" class="node"><title>t34</title>
+<polygon style="fill:#90ee90;stroke:green;" points="665,-674 565,-674 561,-670 561,-638 661,-638 665,-642 665,-674"/>
+<polyline style="fill:none;stroke:green;" points="661,-670 561,-670 "/>
+<polyline style="fill:none;stroke:green;" points="661,-670 661,-638 "/>
+<polyline style="fill:none;stroke:green;" points="661,-670 665,-674 "/>
+<text text-anchor="middle" x="613" y="-649.5" style="font-family:Times New Roman;font-size:20.00;">fill_islands</text>
+</g>
+<!-- t25->t34 -->
+<g id="edge61" class="edge"><title>t25->t34</title>
+<path style="fill:none;stroke:gray;" d="M415,-708C458,-696 511,-682 551,-672"/>
+<polygon style="fill:gray;stroke:gray;" points="551.881,-675.393 561,-670 550.508,-668.529 551.881,-675.393"/>
+</g>
+<!-- t37 -->
+<g id="node70" class="node"><title>t37</title>
+<polygon style="fill:#90ee90;stroke:green;" points="794,-484 582,-484 578,-480 578,-448 790,-448 794,-452 794,-484"/>
+<polyline style="fill:none;stroke:green;" points="790,-480 578,-480 "/>
+<polyline style="fill:none;stroke:green;" points="790,-480 790,-448 "/>
+<polyline style="fill:none;stroke:green;" points="790,-480 794,-484 "/>
+<text text-anchor="middle" x="686" y="-459.5" style="font-family:Times New Roman;font-size:20.00;">Reallocate_Repeat_Ends</text>
+</g>
+<!-- t25->t37 -->
+<g id="edge107" class="edge"><title>t25->t37</title>
+<path style="fill:none;stroke:gray;" d="M353,-708C393,-666 486,-573 577,-514 592,-504 610,-495 626,-488"/>
+<polygon style="fill:gray;stroke:gray;" points="628.015,-490.964 636,-484 625.415,-484.464 628.015,-490.964"/>
+</g>
+<!-- t25->t46 -->
+<g id="edge119" class="edge"><title>t25->t46</title>
+<path style="fill:none;stroke:gray;" d="M331,-708C326,-643 311,-446 311,-282 311,-282 311,-282 311,-216 311,-181 334,-147 354,-125"/>
+<polygon style="fill:gray;stroke:gray;" points="356.404,-127.546 361,-118 351.454,-122.596 356.404,-127.546"/>
+</g>
+<!-- t27 -->
+<g id="node34" class="node"><title>t27</title>
+<polygon style="fill:#90ee90;stroke:green;" points="247,-608 135,-608 131,-604 131,-572 243,-572 247,-576 247,-608"/>
+<polyline style="fill:none;stroke:green;" points="243,-604 131,-604 "/>
+<polyline style="fill:none;stroke:green;" points="243,-604 243,-572 "/>
+<polyline style="fill:none;stroke:green;" points="243,-604 247,-608 "/>
+<text text-anchor="middle" x="189" y="-583.5" style="font-family:Times New Roman;font-size:20.00;">bowtie_run2</text>
+</g>
+<!-- t26->t27 -->
+<g id="edge31" class="edge"><title>t26->t27</title>
+<path style="fill:none;stroke:gray;" d="M192,-634C192,-629 192,-624 191,-618"/>
+<polygon style="fill:gray;stroke:gray;" points="194.478,-617.602 190,-608 187.512,-618.299 194.478,-617.602"/>
+</g>
+<!-- t28 -->
+<g id="node36" class="node"><title>t28</title>
+<polygon style="fill:#90ee90;stroke:green;" points="244,-550 134,-550 130,-546 130,-514 240,-514 244,-518 244,-550"/>
+<polyline style="fill:none;stroke:green;" points="240,-546 130,-546 "/>
+<polyline style="fill:none;stroke:green;" points="240,-546 240,-514 "/>
+<polyline style="fill:none;stroke:green;" points="240,-546 244,-550 "/>
+<text text-anchor="middle" x="187" y="-525.5" style="font-family:Times New Roman;font-size:20.00;">SAM2BAM</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge33" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M188,-572C188,-568 188,-564 188,-560"/>
+<polygon style="fill:gray;stroke:gray;" points="191.5,-560 188,-550 184.5,-560 191.5,-560"/>
+</g>
+<!-- t29 -->
+<g id="node38" class="node"><title>t29</title>
+<polygon style="fill:#90ee90;stroke:green;" points="259.953,-472.774 185,-487.92 110.047,-472.774 110.117,-448.266 259.883,-448.266 259.953,-472.774"/>
+<polygon style="fill:none;stroke:green;" points="263.963,-476.044 185,-492.001 106.037,-476.044 106.128,-444.266 263.872,-444.266 263.963,-476.044"/>
+<text text-anchor="middle" x="185" y="-459.5" style="font-family:Times New Roman;font-size:20.00;">splitby_chr</text>
+</g>
+<!-- t28->t29 -->
+<g id="edge35" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M186,-514C186,-510 186,-506 186,-502"/>
+<polygon style="fill:gray;stroke:gray;" points="189.5,-502 186,-492 182.5,-502 189.5,-502"/>
+</g>
+<!-- t30 -->
+<g id="node40" class="node"><title>t30</title>
+<polygon style="fill:#90ee90;stroke:green;" points="292,-392 80,-392 76,-388 76,-356 288,-356 292,-360 292,-392"/>
+<polyline style="fill:none;stroke:green;" points="288,-388 76,-388 "/>
+<polyline style="fill:none;stroke:green;" points="288,-388 288,-356 "/>
+<polyline style="fill:none;stroke:green;" points="288,-388 292,-392 "/>
+<text text-anchor="middle" x="184" y="-367.5" style="font-family:Times New Roman;font-size:20.00;">create_by_chr_raw_files</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge37" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M185,-444C184,-431 184,-416 184,-402"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-402 184,-392 180.5,-402 187.5,-402"/>
+</g>
+<!-- t31 -->
+<g id="node42" class="node"><title>t31</title>
+<polygon style="fill:#90ee90;stroke:green;" points="270,-300 102,-300 98,-296 98,-264 266,-264 270,-268 270,-300"/>
+<polyline style="fill:none;stroke:green;" points="266,-296 98,-296 "/>
+<polyline style="fill:none;stroke:green;" points="266,-296 266,-264 "/>
+<polyline style="fill:none;stroke:green;" points="266,-296 270,-300 "/>
+<text text-anchor="middle" x="184" y="-275.5" style="font-family:Times New Roman;font-size:20.00;">run_by_chr_bowtie</text>
+</g>
+<!-- t30->t31 -->
+<g id="edge39" class="edge"><title>t30->t31</title>
+<path style="fill:none;stroke:gray;" d="M184,-356C184,-343 184,-325 184,-310"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-310 184,-300 180.5,-310 187.5,-310"/>
+</g>
+<!-- t32 -->
+<g id="node44" class="node"><title>t32</title>
+<polygon style="fill:#90ee90;stroke:green;" points="287,-234 85,-234 81,-230 81,-198 283,-198 287,-202 287,-234"/>
+<polyline style="fill:none;stroke:green;" points="283,-230 81,-230 "/>
+<polyline style="fill:none;stroke:green;" points="283,-230 283,-198 "/>
+<polyline style="fill:none;stroke:green;" points="283,-230 287,-234 "/>
+<text text-anchor="middle" x="184" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">by_chr_SAM_to_BAM</text>
+</g>
+<!-- t31->t32 -->
+<g id="edge41" class="edge"><title>t31->t32</title>
+<path style="fill:none;stroke:gray;" d="M184,-264C184,-258 184,-251 184,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-244 184,-234 180.5,-244 187.5,-244"/>
+</g>
+<!-- t33 -->
+<g id="node48" class="node"><title>t33</title>
+<polygon style="fill:#90ee90;stroke:green;" points="234,-176 138,-176 134,-172 134,-140 230,-140 234,-144 234,-176"/>
+<polyline style="fill:none;stroke:green;" points="230,-172 134,-172 "/>
+<polyline style="fill:none;stroke:green;" points="230,-172 230,-140 "/>
+<polyline style="fill:none;stroke:green;" points="230,-172 234,-176 "/>
+<text text-anchor="middle" x="184" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">ccheckhits</text>
+</g>
+<!-- t32->t33 -->
+<g id="edge45" class="edge"><title>t32->t33</title>
+<path style="fill:none;stroke:gray;" d="M184,-198C184,-194 184,-190 184,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="187.5,-186 184,-176 180.5,-186 187.5,-186"/>
+</g>
+<!-- t2->t33 -->
+<g id="edge95" class="edge"><title>t2->t33</title>
+<path style="fill:none;stroke:gray;" d="M416,-779C413,-779 410,-778 407,-778 244,-755 57,-820 57,-656 57,-656 57,-656 57,-282 57,-244 48,-226 72,-198 80,-189 102,-181 124,-173"/>
+<polygon style="fill:gray;stroke:gray;" points="125.427,-176.226 134,-170 123.416,-169.521 125.427,-176.226"/>
+</g>
+<!-- t23 -->
+<g id="node62" class="node"><title>t23</title>
+<polygon style="fill:#90ee90;stroke:green;" points="697,-748 533,-748 529,-744 529,-712 693,-712 697,-716 697,-748"/>
+<polyline style="fill:none;stroke:green;" points="693,-744 529,-744 "/>
+<polyline style="fill:none;stroke:green;" points="693,-744 693,-712 "/>
+<polyline style="fill:none;stroke:green;" points="693,-744 697,-748 "/>
+<text text-anchor="middle" x="613" y="-723.5" style="font-family:Times New Roman;font-size:20.00;">create_island_seed</text>
+</g>
+<!-- t2->t23 -->
+<g id="edge99" class="edge"><title>t2->t23</title>
+<path style="fill:none;stroke:gray;" d="M540,-778C551,-771 564,-762 576,-754"/>
+<polygon style="fill:gray;stroke:gray;" points="578.621,-756.459 585,-748 574.738,-750.635 578.621,-756.459"/>
+</g>
+<!-- t38 -->
+<g id="node76" class="node"><title>t38</title>
+<polygon style="fill:#90ee90;stroke:green;" points="333.879,-361.794 485,-334.5 636.121,-361.794 635.98,-405.956 334.02,-405.956 333.879,-361.794"/>
+<polygon style="fill:none;stroke:green;" points="329.868,-358.453 485,-330.435 640.132,-358.453 639.967,-409.956 330.033,-409.956 329.868,-358.453"/>
+<text text-anchor="middle" x="485" y="-367.5" style="font-family:Times New Roman;font-size:20.00;">make_karyotype_file_for_circos</text>
+</g>
+<!-- t2->t38 -->
+<g id="edge113" class="edge"><title>t2->t38</title>
+<path style="fill:none;stroke:gray;" d="M512,-778C507,-716 494,-513 488,-420"/>
+<polygon style="fill:gray;stroke:gray;" points="491.478,-419.602 487,-410 484.512,-420.299 491.478,-419.602"/>
+</g>
+<!-- t33->t46 -->
+<g id="edge117" class="edge"><title>t33->t46</title>
+<path style="fill:none;stroke:gray;" d="M234,-143C257,-136 285,-128 309,-121"/>
+<polygon style="fill:gray;stroke:gray;" points="310.427,-124.226 319,-118 308.416,-117.521 310.427,-124.226"/>
+</g>
+<!-- t44 -->
+<g id="node52" class="node"><title>t44</title>
+<polygon style="fill:#90ee90;stroke:green;" points="147,-1368 49,-1368 45,-1364 45,-1332 143,-1332 147,-1336 147,-1368"/>
+<polyline style="fill:none;stroke:green;" points="143,-1364 45,-1364 "/>
+<polyline style="fill:none;stroke:green;" points="143,-1364 143,-1332 "/>
+<polyline style="fill:none;stroke:green;" points="143,-1364 147,-1368 "/>
+<text text-anchor="middle" x="96" y="-1343.5" style="font-family:Times New Roman;font-size:20.00;">velvet_run</text>
+</g>
+<!-- t43->t44 -->
+<g id="edge49" class="edge"><title>t43->t44</title>
+<path style="fill:none;stroke:gray;" d="M116,-1390C114,-1386 111,-1382 109,-1377"/>
+<polygon style="fill:gray;stroke:gray;" points="112.26,-1375.72 105,-1368 105.863,-1378.56 112.26,-1375.72"/>
+</g>
+<!-- t45 -->
+<g id="node54" class="node"><title>t45</title>
+<polygon style="fill:#90ee90;stroke:green;" points="112,-1178 20,-1178 16,-1174 16,-1142 108,-1142 112,-1146 112,-1178"/>
+<polyline style="fill:none;stroke:green;" points="108,-1174 16,-1174 "/>
+<polyline style="fill:none;stroke:green;" points="108,-1174 108,-1142 "/>
+<polyline style="fill:none;stroke:green;" points="108,-1174 112,-1178 "/>
+<text text-anchor="middle" x="64" y="-1153.5" style="font-family:Times New Roman;font-size:20.00;">oases_run</text>
+</g>
+<!-- t44->t45 -->
+<g id="edge51" class="edge"><title>t44->t45</title>
+<path style="fill:none;stroke:gray;" d="M93,-1332C88,-1298 75,-1227 69,-1188"/>
+<polygon style="fill:gray;stroke:gray;" points="72.3933,-1187.12 67,-1178 65.5292,-1188.49 72.3933,-1187.12"/>
+</g>
+<!-- t45->t46 -->
+<g id="edge79" class="edge"><title>t45->t46</title>
+<path style="fill:none;stroke:gray;" d="M52,-1142C38,-1120 17,-1081 17,-1044 17,-1044 17,-1044 17,-216 17,-178 24,-161 54,-140 74,-126 208,-113 299,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="299.398,-109.478 309,-105 298.701,-102.512 299.398,-109.478"/>
+</g>
+<!-- t20->t34 -->
+<g id="edge103" class="edge"><title>t20->t34</title>
+<path style="fill:none;stroke:gray;" d="M758,-712C731,-701 698,-689 669,-678"/>
+<polygon style="fill:gray;stroke:gray;" points="670.56,-674.863 660,-674 667.717,-681.26 670.56,-674.863"/>
+</g>
+<!-- t20->t46 -->
+<g id="edge123" class="edge"><title>t20->t46</title>
+<path style="fill:none;stroke:gray;" d="M829,-712C858,-687 903,-641 903,-590 903,-590 903,-590 903,-216 903,-171 608,-128 461,-109"/>
+<polygon style="fill:gray;stroke:gray;" points="461.299,-105.512 451,-108 460.602,-112.478 461.299,-105.512"/>
+</g>
+<!-- t19->t22 -->
+<g id="edge57" class="edge"><title>t19->t22</title>
+<path style="fill:none;stroke:gray;" d="M795,-836C783,-830 769,-824 756,-818"/>
+<polygon style="fill:gray;stroke:gray;" points="757.56,-814.863 747,-814 754.717,-821.26 757.56,-814.863"/>
+</g>
+<!-- t19->t46 -->
+<g id="edge125" class="edge"><title>t19->t46</title>
+<path style="fill:none;stroke:gray;" d="M859,-836C885,-814 923,-774 923,-730 923,-730 923,-730 923,-216 923,-158 995,-197 760,-140 706,-126 556,-113 461,-106"/>
+<polygon style="fill:gray;stroke:gray;" points="461.299,-102.512 451,-105 460.602,-109.478 461.299,-102.512"/>
+</g>
+<!-- t22->t23 -->
+<g id="edge59" class="edge"><title>t22->t23</title>
+<path style="fill:none;stroke:gray;" d="M681,-778C671,-771 658,-762 647,-754"/>
+<polygon style="fill:gray;stroke:gray;" points="649.1,-751.2 639,-748 644.9,-756.8 649.1,-751.2"/>
+</g>
+<!-- t23->t34 -->
+<g id="edge101" class="edge"><title>t23->t34</title>
+<path style="fill:none;stroke:gray;" d="M613,-712C613,-703 613,-694 613,-684"/>
+<polygon style="fill:gray;stroke:gray;" points="616.5,-684 613,-674 609.5,-684 616.5,-684"/>
+</g>
+<!-- t35 -->
+<g id="node66" class="node"><title>t35</title>
+<polygon style="fill:#90ee90;stroke:green;" points="864,-608 602,-608 598,-604 598,-572 860,-572 864,-576 864,-608"/>
+<polyline style="fill:none;stroke:green;" points="860,-604 598,-604 "/>
+<polyline style="fill:none;stroke:green;" points="860,-604 860,-572 "/>
+<polyline style="fill:none;stroke:green;" points="860,-604 864,-608 "/>
+<text text-anchor="middle" x="731" y="-583.5" style="font-family:Times New Roman;font-size:20.00;">make_fasta_for_RepeatMasker</text>
+</g>
+<!-- t34->t35 -->
+<g id="edge63" class="edge"><title>t34->t35</title>
+<path style="fill:none;stroke:gray;" d="M645,-638C659,-631 675,-621 689,-613"/>
+<polygon style="fill:gray;stroke:gray;" points="690.958,-615.916 698,-608 687.559,-609.797 690.958,-615.916"/>
+</g>
+<!-- t34->t37 -->
+<g id="edge105" class="edge"><title>t34->t37</title>
+<path style="fill:none;stroke:gray;" d="M602,-638C597,-629 592,-618 589,-608 580,-567 571,-548 593,-514 600,-504 610,-496 620,-489"/>
+<polygon style="fill:gray;stroke:gray;" points="621.958,-491.916 629,-484 618.559,-485.797 621.958,-491.916"/>
+</g>
+<!-- t39 -->
+<g id="node72" class="node"><title>t39</title>
+<polygon style="fill:none;stroke:blue;" points="884,-392 682,-392 678,-388 678,-356 880,-356 884,-360 884,-392"/>
+<polyline style="fill:none;stroke:blue;" points="880,-388 678,-388 "/>
+<polyline style="fill:none;stroke:blue;" points="880,-388 880,-356 "/>
+<polyline style="fill:none;stroke:blue;" points="880,-388 884,-392 "/>
+<text text-anchor="middle" x="781" y="-367.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">remove_simple_repeats</text>
+</g>
+<!-- t34->t39 -->
+<g id="edge109" class="edge"><title>t34->t39</title>
+<path style="fill:none;stroke:gray;" d="M665,-654C733,-650 847,-639 873,-608 926,-544 849,-445 806,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="808.546,-396.596 799,-392 803.596,-401.546 808.546,-396.596"/>
+</g>
+<!-- t40 -->
+<g id="node74" class="node"><title>t40</title>
+<polygon style="fill:none;stroke:blue;" points="750.217,-288.774 659,-303.92 567.783,-288.774 567.868,-264.266 750.132,-264.266 750.217,-288.774"/>
+<polygon style="fill:none;stroke:blue;" points="754.229,-292.162 659,-307.975 563.771,-292.162 563.882,-260.266 754.118,-260.266 754.229,-292.162"/>
+<text text-anchor="middle" x="659" y="-275.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">report_islands</text>
+</g>
+<!-- t34->t40 -->
+<g id="edge111" class="edge"><title>t34->t40</title>
+<path style="fill:none;stroke:gray;" d="M597,-638C590,-629 583,-619 579,-608 554,-537 524,-499 569,-440 592,-410 626,-446 649,-418 662,-403 662,-353 661,-318"/>
+<polygon style="fill:gray;stroke:gray;" points="664.5,-318 661,-308 657.5,-318 664.5,-318"/>
+</g>
+<!-- t36 -->
+<g id="node68" class="node"><title>t36</title>
+<polygon style="fill:#90ee90;stroke:green;" points="852,-550 606,-550 602,-546 602,-514 848,-514 852,-518 852,-550"/>
+<polyline style="fill:none;stroke:green;" points="848,-546 602,-546 "/>
+<polyline style="fill:none;stroke:green;" points="848,-546 848,-514 "/>
+<polyline style="fill:none;stroke:green;" points="848,-546 852,-550 "/>
+<text text-anchor="middle" x="727" y="-525.5" style="font-family:Times New Roman;font-size:20.00;">run_RepeatMasker_on_reads</text>
+</g>
+<!-- t35->t36 -->
+<g id="edge65" class="edge"><title>t35->t36</title>
+<path style="fill:none;stroke:gray;" d="M730,-572C730,-568 730,-564 729,-560"/>
+<polygon style="fill:gray;stroke:gray;" points="732.478,-559.602 728,-550 725.512,-560.299 732.478,-559.602"/>
+</g>
+<!-- t36->t37 -->
+<g id="edge67" class="edge"><title>t36->t37</title>
+<path style="fill:none;stroke:gray;" d="M716,-514C712,-508 707,-500 703,-493"/>
+<polygon style="fill:gray;stroke:gray;" points="705.916,-491.042 698,-484 699.797,-494.441 705.916,-491.042"/>
+</g>
+<!-- t37->t39 -->
+<g id="edge69" class="edge"><title>t37->t39</title>
+<path style="fill:none;stroke:gray;" d="M705,-448C719,-434 738,-415 754,-399"/>
+<polygon style="fill:gray;stroke:gray;" points="756.779,-401.219 762,-392 752.169,-395.951 756.779,-401.219"/>
+</g>
+<!-- t37->t38 -->
+<g id="edge73" class="edge"><title>t37->t38</title>
+<path style="fill:none;stroke:gray;" d="M646,-448C626,-438 599,-426 573,-414"/>
+<polygon style="fill:gray;stroke:gray;" points="574.56,-410.863 564,-410 571.717,-417.26 574.56,-410.863"/>
+</g>
+<!-- t39->t40 -->
+<g id="edge71" class="edge"><title>t39->t40</title>
+<path style="fill:none;stroke:blue;" d="M757,-356C739,-343 715,-324 695,-309"/>
+<polygon style="fill:blue;stroke:blue;" points="697.1,-306.2 687,-303 692.9,-311.8 697.1,-306.2"/>
+</g>
+<!-- t41 -->
+<g id="node78" class="node"><title>t41</title>
+<polygon style="fill:none;stroke:blue;" points="585,-234 389,-234 385,-230 385,-198 581,-198 585,-202 585,-234"/>
+<polyline style="fill:none;stroke:blue;" points="581,-230 385,-230 "/>
+<polyline style="fill:none;stroke:blue;" points="581,-230 581,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="581,-230 585,-234 "/>
+<text text-anchor="middle" x="485" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">make_circos_conf_file</text>
+</g>
+<!-- t40->t41 -->
+<g id="edge75" class="edge"><title>t40->t41</title>
+<path style="fill:none;stroke:blue;" d="M602,-260C583,-253 561,-245 542,-238"/>
+<polygon style="fill:blue;stroke:blue;" points="543.56,-234.863 533,-234 540.717,-241.26 543.56,-234.863"/>
+</g>
+<!-- t38->t41 -->
+<g id="edge115" class="edge"><title>t38->t41</title>
+<path style="fill:none;stroke:gray;" d="M485,-330C485,-303 485,-268 485,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="488.5,-244 485,-234 481.5,-244 488.5,-244"/>
+</g>
+<!-- t42 -->
+<g id="node80" class="node"><title>t42</title>
+<polygon style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="531,-176 371,-176 367,-172 367,-140 527,-140 531,-144 531,-176"/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="527,-172 367,-172 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="527,-172 527,-140 "/>
+<polyline style="fill:none;stroke:blue;stroke-dasharray:5,2;" points="527,-172 531,-176 "/>
+<text text-anchor="middle" x="449" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">run_circos_islands</text>
+</g>
+<!-- t41->t42 -->
+<g id="edge77" class="edge"><title>t41->t42</title>
+<path style="fill:none;stroke:blue;" d="M474,-198C472,-194 469,-189 466,-185"/>
+<polygon style="fill:blue;stroke:blue;" points="468.459,-182.379 460,-176 462.635,-186.262 468.459,-182.379"/>
+</g>
+<!-- t42->t46 -->
+<g id="edge81" class="edge"><title>t42->t46</title>
+<path style="fill:none;stroke:blue;" d="M428,-140C422,-135 416,-130 410,-125"/>
+<polygon style="fill:blue;stroke:blue;" points="411.831,-121.951 402,-118 407.221,-127.219 411.831,-121.951"/>
+</g>
+<!-- t47 -->
+<g id="node85" class="node"><title>t47</title>
+<polygon style="fill:#fff68f;stroke:black;" points="456,-60 308,-60 304,-56 304,-24 452,-24 456,-28 456,-60"/>
+<polyline style="fill:none;stroke:black;" points="452,-56 304,-56 "/>
+<polyline style="fill:none;stroke:black;" points="452,-56 452,-24 "/>
+<polyline style="fill:none;stroke:black;" points="452,-56 456,-60 "/>
+<text text-anchor="middle" x="380" y="-35.5" style="font-family:Times New Roman;font-size:20.00;">target_final_task</text>
+</g>
+<!-- t46->t47 -->
+<g id="edge83" class="edge"><title>t46->t47</title>
+<path style="fill:none;stroke:blue;" d="M380,-82C380,-78 380,-74 380,-70"/>
+<polygon style="fill:blue;stroke:blue;" points="383.5,-70 380,-60 376.5,-70 383.5,-70"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/gallery/gallery_snp_annotation.png b/doc/images/gallery/gallery_snp_annotation.png
new file mode 100644
index 0000000..c0e7ba3
Binary files /dev/null and b/doc/images/gallery/gallery_snp_annotation.png differ
diff --git a/doc/images/gallery/gallery_snp_annotation.svg b/doc/images/gallery/gallery_snp_annotation.svg
new file mode 100644
index 0000000..9a7995c
--- /dev/null
+++ b/doc/images/gallery/gallery_snp_annotation.svg
@@ -0,0 +1,470 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: Andreas Heger -->
+<!-- Title: tree Pages: 1 -->
+<svg width="864pt" height="420pt"
+ viewBox="0.00 0.00 432.00 203.22" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.311688 0.311688) rotate(0) translate(4 648)">
+<title>SNP Annotation Pipeline</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-648 1382,-648 1382,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-628 1194,-628 1194,-16 8,-16"/>
+<text text-anchor="middle" x="601" y="-597" style="font-family:Times New Roman;font-size:20.00pt;fill:red;"><tspan font-weight = "bold">SNP Annotation Pipeline:</tspan></text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="1202,-324 1202,-636 1370,-636 1370,-324 1202,-324"/>
+<text text-anchor="middle" x="1286" y="-605" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t8 -->
+<g id="node2" class="node"><title>t8</title>
+<polygon style="fill:#90ee90;stroke:green;" points="804,-486 662,-486 658,-482 658,-450 800,-450 804,-454 804,-486"/>
+<polyline style="fill:none;stroke:green;" points="800,-482 658,-482 "/>
+<polyline style="fill:none;stroke:green;" points="800,-482 800,-450 "/>
+<polyline style="fill:none;stroke:green;" points="800,-482 804,-486 "/>
+<text text-anchor="middle" x="731" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildTranscripts</text>
+</g>
+<!-- t9 -->
+<g id="node3" class="node"><title>t9</title>
+<polygon style="fill:#90ee90;stroke:green;" points="812,-376 676,-376 672,-372 672,-340 808,-340 812,-344 812,-376"/>
+<polyline style="fill:none;stroke:green;" points="808,-372 672,-372 "/>
+<polyline style="fill:none;stroke:green;" points="808,-372 808,-340 "/>
+<polyline style="fill:none;stroke:green;" points="808,-372 812,-376 "/>
+<text text-anchor="middle" x="742" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">loadTranscripts</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge3" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:gray;" d="M733,-450C735,-433 737,-407 739,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="742.488,-386.299 740,-376 735.522,-385.602 742.488,-386.299"/>
+</g>
+<!-- t27 -->
+<g id="node41" class="node"><title>t27</title>
+<polygon style="fill:#90ee90;stroke:green;" points="654,-376 544,-376 540,-372 540,-340 650,-340 654,-344 654,-376"/>
+<polyline style="fill:none;stroke:green;" points="650,-372 540,-372 "/>
+<polyline style="fill:none;stroke:green;" points="650,-372 650,-340 "/>
+<polyline style="fill:none;stroke:green;" points="650,-372 654,-376 "/>
+<text text-anchor="middle" x="597" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">buildAlleles</text>
+</g>
+<!-- t8->t27 -->
+<g id="edge69" class="edge"><title>t8->t27</title>
+<path style="fill:none;stroke:gray;" d="M709,-450C686,-432 652,-403 627,-382"/>
+<polygon style="fill:gray;stroke:gray;" points="629.1,-379.2 619,-376 624.9,-384.8 629.1,-379.2"/>
+</g>
+<!-- t64 -->
+<g id="node12" class="node"><title>t64</title>
+<polygon style="fill:none;stroke:blue;" points="906,-292 834,-292 830,-288 830,-256 902,-256 906,-260 906,-292"/>
+<polyline style="fill:none;stroke:blue;" points="902,-288 830,-288 "/>
+<polyline style="fill:none;stroke:blue;" points="902,-288 902,-256 "/>
+<polyline style="fill:none;stroke:blue;" points="902,-288 906,-292 "/>
+<text text-anchor="middle" x="868" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">prepare</text>
+</g>
+<!-- t9->t64 -->
+<g id="edge53" class="edge"><title>t9->t64</title>
+<path style="fill:none;stroke:gray;" d="M769,-340C787,-328 812,-311 832,-298"/>
+<polygon style="fill:gray;stroke:gray;" points="834.621,-300.459 841,-292 830.738,-294.635 834.621,-300.459"/>
+</g>
+<!-- t11 -->
+<g id="node5" class="node"><title>t11</title>
+<polygon style="fill:#90ee90;stroke:green;" points="854,-578 632,-578 628,-574 628,-542 850,-542 854,-546 854,-578"/>
+<polyline style="fill:none;stroke:green;" points="850,-574 628,-574 "/>
+<polyline style="fill:none;stroke:green;" points="850,-574 850,-542 "/>
+<polyline style="fill:none;stroke:green;" points="850,-574 854,-578 "/>
+<text text-anchor="middle" x="741" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">loadTranscriptInformation</text>
+</g>
+<!-- t11->t64 -->
+<g id="edge9" class="edge"><title>t11->t64</title>
+<path style="fill:none;stroke:gray;" d="M780,-542C793,-534 805,-524 813,-512 861,-436 810,-396 841,-314 843,-309 845,-305 848,-301"/>
+<polygon style="fill:gray;stroke:gray;" points="851.203,-302.441 853,-292 845.084,-299.042 851.203,-302.441"/>
+</g>
+<!-- t16 -->
+<g id="node15" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="476.809,-455.794 554,-428.5 631.191,-455.794 631.119,-499.956 476.881,-499.956 476.809,-455.794"/>
+<polygon style="fill:none;stroke:green;" points="472.804,-452.967 554,-424.257 635.196,-452.967 635.113,-503.956 472.887,-503.956 472.804,-452.967"/>
+<text text-anchor="middle" x="554" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildSelenoList</text>
+</g>
+<!-- t11->t16 -->
+<g id="edge59" class="edge"><title>t11->t16</title>
+<path style="fill:none;stroke:gray;" d="M704,-542C684,-532 660,-520 636,-508"/>
+<polygon style="fill:gray;stroke:gray;" points="637.56,-504.863 627,-504 634.717,-511.26 637.56,-504.863"/>
+</g>
+<!-- t4 -->
+<g id="node6" class="node"><title>t4</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1034,-578 876,-578 872,-574 872,-542 1030,-542 1034,-546 1034,-578"/>
+<polyline style="fill:none;stroke:green;" points="1030,-574 872,-574 "/>
+<polyline style="fill:none;stroke:green;" points="1030,-574 1030,-542 "/>
+<polyline style="fill:none;stroke:green;" points="1030,-574 1034,-578 "/>
+<text text-anchor="middle" x="953" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">buildGeneRegions</text>
+</g>
+<!-- t5 -->
+<g id="node7" class="node"><title>t5</title>
+<polygon style="fill:#90ee90;stroke:green;" points="973,-486 871,-486 867,-482 867,-450 969,-450 973,-454 973,-486"/>
+<polyline style="fill:none;stroke:green;" points="969,-482 867,-482 "/>
+<polyline style="fill:none;stroke:green;" points="969,-482 969,-450 "/>
+<polyline style="fill:none;stroke:green;" points="969,-482 973,-486 "/>
+<text text-anchor="middle" x="920" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildGenes</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge5" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:gray;" d="M946,-542C942,-529 935,-511 930,-496"/>
+<polygon style="fill:gray;stroke:gray;" points="933.226,-494.573 927,-486 926.521,-496.584 933.226,-494.573"/>
+</g>
+<!-- t7 -->
+<g id="node9" class="node"><title>t7</title>
+<polygon style="fill:#90ee90;stroke:green;" points="982,-376 854,-376 850,-372 850,-340 978,-340 982,-344 982,-376"/>
+<polyline style="fill:none;stroke:green;" points="978,-372 850,-372 "/>
+<polyline style="fill:none;stroke:green;" points="978,-372 978,-340 "/>
+<polyline style="fill:none;stroke:green;" points="978,-372 982,-376 "/>
+<text text-anchor="middle" x="916" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">loadGeneStats</text>
+</g>
+<!-- t5->t7 -->
+<g id="edge7" class="edge"><title>t5->t7</title>
+<path style="fill:none;stroke:gray;" d="M919,-450C918,-433 917,-407 917,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="920.5,-386 917,-376 913.5,-386 920.5,-386"/>
+</g>
+<!-- t7->t64 -->
+<g id="edge55" class="edge"><title>t7->t64</title>
+<path style="fill:none;stroke:gray;" d="M906,-340C899,-329 890,-314 883,-301"/>
+<polygon style="fill:gray;stroke:gray;" points="885.916,-299.042 878,-292 879.797,-302.441 885.916,-299.042"/>
+</g>
+<!-- t6 -->
+<g id="node11" class="node"><title>t6</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1186,-376 1004,-376 1000,-372 1000,-340 1182,-340 1186,-344 1186,-376"/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1000,-372 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1182,-340 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1186,-376 "/>
+<text text-anchor="middle" x="1093" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">loadGeneInformation</text>
+</g>
+<!-- t6->t64 -->
+<g id="edge57" class="edge"><title>t6->t64</title>
+<path style="fill:none;stroke:gray;" d="M1054,-340C1035,-331 1012,-322 991,-314 966,-304 939,-295 916,-288"/>
+<polygon style="fill:gray;stroke:gray;" points="916.584,-284.521 906,-285 914.573,-291.226 916.584,-284.521"/>
+</g>
+<!-- t68 -->
+<g id="node51" class="node"><title>t68</title>
+<polygon style="fill:#fff68f;stroke:black;" points="587,-60 537,-60 533,-56 533,-24 583,-24 587,-28 587,-60"/>
+<polyline style="fill:none;stroke:black;" points="583,-56 533,-56 "/>
+<polyline style="fill:none;stroke:black;" points="583,-56 583,-24 "/>
+<polyline style="fill:none;stroke:black;" points="583,-56 587,-60 "/>
+<text text-anchor="middle" x="560" y="-35.5" style="font-family:Times New Roman;font-size:20.00;">full</text>
+</g>
+<!-- t64->t68 -->
+<g id="edge51" class="edge"><title>t64->t68</title>
+<path style="fill:none;stroke:blue;" d="M862,-256C852,-228 829,-174 793,-140 738,-86 648,-59 597,-49"/>
+<polygon style="fill:blue;stroke:blue;" points="597.492,-45.5292 587,-47 596.119,-52.3933 597.492,-45.5292"/>
+</g>
+<!-- t15 -->
+<g id="node14" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="610,-578 468,-578 464,-574 464,-542 606,-542 610,-546 610,-578"/>
+<polyline style="fill:none;stroke:green;" points="606,-574 464,-574 "/>
+<polyline style="fill:none;stroke:green;" points="606,-574 606,-542 "/>
+<polyline style="fill:none;stroke:green;" points="606,-574 610,-578 "/>
+<text text-anchor="middle" x="537" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">loadProteinStats</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge11" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:gray;" d="M540,-542C541,-534 544,-524 545,-514"/>
+<polygon style="fill:gray;stroke:gray;" points="548.471,-514.492 547,-504 541.607,-513.119 548.471,-514.492"/>
+</g>
+<!-- t25 -->
+<g id="node17" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="522,-376 410,-376 406,-372 406,-340 518,-340 522,-344 522,-376"/>
+<polyline style="fill:none;stroke:green;" points="518,-372 406,-372 "/>
+<polyline style="fill:none;stroke:green;" points="518,-372 518,-340 "/>
+<polyline style="fill:none;stroke:green;" points="518,-372 522,-376 "/>
+<text text-anchor="middle" x="464" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">makeEffects</text>
+</g>
+<!-- t16->t25 -->
+<g id="edge13" class="edge"><title>t16->t25</title>
+<path style="fill:none;stroke:gray;" d="M526,-434C513,-418 498,-399 485,-384"/>
+<polygon style="fill:gray;stroke:gray;" points="487.8,-381.9 479,-376 482.2,-386.1 487.8,-381.9"/>
+</g>
+<!-- t16->t27 -->
+<g id="edge35" class="edge"><title>t16->t27</title>
+<path style="fill:none;stroke:gray;" d="M569,-430C575,-416 581,-399 586,-385"/>
+<polygon style="fill:gray;stroke:gray;" points="589.137,-386.56 590,-376 582.74,-383.717 589.137,-386.56"/>
+</g>
+<!-- t26 -->
+<g id="node19" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="554,-292 450,-292 446,-288 446,-256 550,-256 554,-260 554,-292"/>
+<polyline style="fill:none;stroke:green;" points="550,-288 446,-288 "/>
+<polyline style="fill:none;stroke:green;" points="550,-288 550,-256 "/>
+<polyline style="fill:none;stroke:green;" points="550,-288 554,-292 "/>
+<text text-anchor="middle" x="500" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">loadEffects</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge15" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M472,-340C477,-329 483,-314 488,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="491.536,-302.585 492,-292 485.036,-299.985 491.536,-302.585"/>
+</g>
+<!-- t65 -->
+<g id="node21" class="node"><title>t65</title>
+<polygon style="fill:none;stroke:blue;" points="486,-234 366,-234 362,-230 362,-198 482,-198 486,-202 486,-234"/>
+<polyline style="fill:none;stroke:blue;" points="482,-230 362,-230 "/>
+<polyline style="fill:none;stroke:blue;" points="482,-230 482,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="482,-230 486,-234 "/>
+<text text-anchor="middle" x="424" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">consequences</text>
+</g>
+<!-- t25->t65 -->
+<g id="edge61" class="edge"><title>t25->t65</title>
+<path style="fill:none;stroke:gray;" d="M455,-340C450,-327 442,-308 437,-292 433,-277 430,-259 427,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="430.478,-243.602 426,-234 423.512,-244.299 430.478,-243.602"/>
+</g>
+<!-- t26->t65 -->
+<g id="edge17" class="edge"><title>t26->t65</title>
+<path style="fill:none;stroke:gray;" d="M476,-256C470,-251 463,-246 456,-240"/>
+<polygon style="fill:gray;stroke:gray;" points="458.1,-237.2 448,-234 453.9,-242.8 458.1,-237.2"/>
+</g>
+<!-- t65->t68 -->
+<g id="edge49" class="edge"><title>t65->t68</title>
+<path style="fill:none;stroke:blue;" d="M445,-198C452,-191 460,-183 466,-176 497,-141 526,-96 544,-69"/>
+<polygon style="fill:blue;stroke:blue;" points="547.203,-70.4414 549,-60 541.084,-67.0418 547.203,-70.4414"/>
+</g>
+<!-- t49 -->
+<g id="node23" class="node"><title>t49</title>
+<polygon style="fill:#90ee90;stroke:green;" points="19.9571,-345.794 114,-318.5 208.043,-345.794 207.955,-389.956 20.0448,-389.956 19.9571,-345.794"/>
+<polygon style="fill:none;stroke:green;" points="15.9511,-342.791 114,-314.335 212.049,-342.791 211.947,-393.956 16.0528,-393.956 15.9511,-342.791"/>
+<text text-anchor="middle" x="114" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">buildPolyphenInput</text>
+</g>
+<!-- t50 -->
+<g id="node24" class="node"><title>t50</title>
+<polygon style="fill:#90ee90;stroke:green;" points="214,-292 20,-292 16,-288 16,-256 210,-256 214,-260 214,-292"/>
+<polyline style="fill:none;stroke:green;" points="210,-288 16,-288 "/>
+<polyline style="fill:none;stroke:green;" points="210,-288 210,-256 "/>
+<polyline style="fill:none;stroke:green;" points="210,-288 214,-292 "/>
+<text text-anchor="middle" x="115" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">buildPolyphenFeatures</text>
+</g>
+<!-- t49->t50 -->
+<g id="edge19" class="edge"><title>t49->t50</title>
+<path style="fill:none;stroke:gray;" d="M115,-314C115,-310 115,-306 115,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="118.5,-302 115,-292 111.5,-302 118.5,-302"/>
+</g>
+<!-- t52 -->
+<g id="node30" class="node"><title>t52</title>
+<polygon style="fill:#90ee90;stroke:green;" points="457,-176 301,-176 297,-172 297,-140 453,-140 457,-144 457,-176"/>
+<polyline style="fill:none;stroke:green;" points="453,-172 297,-172 "/>
+<polyline style="fill:none;stroke:green;" points="453,-172 453,-140 "/>
+<polyline style="fill:none;stroke:green;" points="453,-172 457,-176 "/>
+<text text-anchor="middle" x="377" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">loadPolyphenMap</text>
+</g>
+<!-- t49->t52 -->
+<g id="edge25" class="edge"><title>t49->t52</title>
+<path style="fill:none;stroke:gray;" d="M172,-331C192,-320 214,-307 232,-292 249,-278 249,-270 263,-256 290,-229 322,-202 346,-182"/>
+<polygon style="fill:gray;stroke:gray;" points="348.1,-184.8 354,-176 343.9,-179.2 348.1,-184.8"/>
+</g>
+<!-- t57 -->
+<g id="node35" class="node"><title>t57</title>
+<polygon style="fill:none;stroke:blue;" points="271,-234 173,-234 169,-230 169,-198 267,-198 271,-202 271,-234"/>
+<polyline style="fill:none;stroke:blue;" points="267,-230 169,-230 "/>
+<polyline style="fill:none;stroke:blue;" points="267,-230 267,-198 "/>
+<polyline style="fill:none;stroke:blue;" points="267,-230 271,-234 "/>
+<text text-anchor="middle" x="220" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">runPanther</text>
+</g>
+<!-- t49->t57 -->
+<g id="edge63" class="edge"><title>t49->t57</title>
+<path style="fill:none;stroke:gray;" d="M179,-333C196,-323 213,-310 223,-292 231,-278 231,-259 228,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="231.393,-243.119 226,-234 224.529,-244.492 231.393,-243.119"/>
+</g>
+<!-- t51 -->
+<g id="node26" class="node"><title>t51</title>
+<polygon style="fill:#90ee90;stroke:green;" points="149,-234 37,-234 33,-230 33,-198 145,-198 149,-202 149,-234"/>
+<polyline style="fill:none;stroke:green;" points="145,-230 33,-230 "/>
+<polyline style="fill:none;stroke:green;" points="145,-230 145,-198 "/>
+<polyline style="fill:none;stroke:green;" points="145,-230 149,-234 "/>
+<text text-anchor="middle" x="91" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">runPolyphen</text>
+</g>
+<!-- t50->t51 -->
+<g id="edge21" class="edge"><title>t50->t51</title>
+<path style="fill:none;stroke:gray;" d="M108,-256C106,-252 104,-248 102,-243"/>
+<polygon style="fill:gray;stroke:gray;" points="105.26,-241.717 98,-234 98.8631,-244.56 105.26,-241.717"/>
+</g>
+<!-- t53 -->
+<g id="node28" class="node"><title>t53</title>
+<polygon style="fill:#90ee90;stroke:green;" points="151,-176 31,-176 27,-172 27,-140 147,-140 151,-144 151,-176"/>
+<polyline style="fill:none;stroke:green;" points="147,-172 27,-172 "/>
+<polyline style="fill:none;stroke:green;" points="147,-172 147,-140 "/>
+<polyline style="fill:none;stroke:green;" points="147,-172 151,-176 "/>
+<text text-anchor="middle" x="89" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">loadPolyphen</text>
+</g>
+<!-- t51->t53 -->
+<g id="edge23" class="edge"><title>t51->t53</title>
+<path style="fill:none;stroke:gray;" d="M90,-198C90,-194 90,-190 90,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="93.5001,-186 90,-176 86.5001,-186 93.5001,-186"/>
+</g>
+<!-- t67 -->
+<g id="node39" class="node"><title>t67</title>
+<polygon style="fill:none;stroke:blue;" points="336,-118 270,-118 266,-114 266,-82 332,-82 336,-86 336,-118"/>
+<polyline style="fill:none;stroke:blue;" points="332,-114 266,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="332,-114 332,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="332,-114 336,-118 "/>
+<text text-anchor="middle" x="301" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">effects</text>
+</g>
+<!-- t53->t67 -->
+<g id="edge65" class="edge"><title>t53->t67</title>
+<path style="fill:none;stroke:gray;" d="M151,-141C185,-132 226,-121 256,-113"/>
+<polygon style="fill:gray;stroke:gray;" points="257.427,-116.226 266,-110 255.416,-109.521 257.427,-116.226"/>
+</g>
+<!-- t52->t67 -->
+<g id="edge67" class="edge"><title>t52->t67</title>
+<path style="fill:none;stroke:gray;" d="M353,-140C347,-135 340,-130 333,-124"/>
+<polygon style="fill:gray;stroke:gray;" points="335.1,-121.2 325,-118 330.9,-126.8 335.1,-121.2"/>
+</g>
+<!-- t12 -->
+<g id="node32" class="node"><title>t12</title>
+<polygon style="fill:#90ee90;stroke:green;" points="388,-376 234,-376 230,-372 230,-340 384,-340 388,-344 388,-376"/>
+<polyline style="fill:none;stroke:green;" points="384,-372 230,-372 "/>
+<polyline style="fill:none;stroke:green;" points="384,-372 384,-340 "/>
+<polyline style="fill:none;stroke:green;" points="384,-372 388,-376 "/>
+<text text-anchor="middle" x="309" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">buildPeptideFasta</text>
+</g>
+<!-- t56 -->
+<g id="node33" class="node"><title>t56</title>
+<polygon style="fill:#90ee90;stroke:green;" points="408,-292 276,-292 272,-288 272,-256 404,-256 408,-260 408,-292"/>
+<polyline style="fill:none;stroke:green;" points="404,-288 272,-288 "/>
+<polyline style="fill:none;stroke:green;" points="404,-288 404,-256 "/>
+<polyline style="fill:none;stroke:green;" points="404,-288 408,-292 "/>
+<text text-anchor="middle" x="340" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">preparePanther</text>
+</g>
+<!-- t12->t56 -->
+<g id="edge27" class="edge"><title>t12->t56</title>
+<path style="fill:none;stroke:gray;" d="M316,-340C320,-329 325,-314 330,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="333.479,-302.584 333,-292 326.774,-300.573 333.479,-302.584"/>
+</g>
+<!-- t56->t57 -->
+<g id="edge29" class="edge"><title>t56->t57</title>
+<path style="fill:none;stroke:gray;" d="M303,-256C291,-250 278,-244 266,-238"/>
+<polygon style="fill:gray;stroke:gray;" points="267.56,-234.863 257,-234 264.717,-241.26 267.56,-234.863"/>
+</g>
+<!-- t58 -->
+<g id="node37" class="node"><title>t58</title>
+<polygon style="fill:none;stroke:blue;" points="279,-176 173,-176 169,-172 169,-140 275,-140 279,-144 279,-176"/>
+<polyline style="fill:none;stroke:blue;" points="275,-172 169,-172 "/>
+<polyline style="fill:none;stroke:blue;" points="275,-172 275,-140 "/>
+<polyline style="fill:none;stroke:blue;" points="275,-172 279,-176 "/>
+<text text-anchor="middle" x="224" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">loadPanther</text>
+</g>
+<!-- t57->t58 -->
+<g id="edge31" class="edge"><title>t57->t58</title>
+<path style="fill:none;stroke:blue;" d="M221,-198C221,-194 221,-190 222,-186"/>
+<polygon style="fill:blue;stroke:blue;" points="225.488,-186.299 223,-176 218.522,-185.602 225.488,-186.299"/>
+</g>
+<!-- t58->t67 -->
+<g id="edge33" class="edge"><title>t58->t67</title>
+<path style="fill:none;stroke:blue;" d="M248,-140C254,-135 262,-129 269,-124"/>
+<polygon style="fill:blue;stroke:blue;" points="271.1,-126.8 277,-118 266.9,-121.2 271.1,-126.8"/>
+</g>
+<!-- t67->t68 -->
+<g id="edge45" class="edge"><title>t67->t68</title>
+<path style="fill:none;stroke:blue;" d="M336,-92C385,-81 473,-62 523,-50"/>
+<polygon style="fill:blue;stroke:blue;" points="523.881,-53.3933 533,-48 522.508,-46.5292 523.881,-53.3933"/>
+</g>
+<!-- t28 -->
+<g id="node43" class="node"><title>t28</title>
+<polygon style="fill:#90ee90;stroke:green;" points="680,-292 576,-292 572,-288 572,-256 676,-256 680,-260 680,-292"/>
+<polyline style="fill:none;stroke:green;" points="676,-288 572,-288 "/>
+<polyline style="fill:none;stroke:green;" points="676,-288 676,-256 "/>
+<polyline style="fill:none;stroke:green;" points="676,-288 680,-292 "/>
+<text text-anchor="middle" x="626" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">loadAlleles</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge37" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M603,-340C607,-329 612,-314 617,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="620.479,-302.584 620,-292 613.774,-300.573 620.479,-302.584"/>
+</g>
+<!-- t66 -->
+<g id="node49" class="node"><title>t66</title>
+<polygon style="fill:none;stroke:blue;" points="672,-118 608,-118 604,-114 604,-82 668,-82 672,-86 672,-118"/>
+<polyline style="fill:none;stroke:blue;" points="668,-114 604,-114 "/>
+<polyline style="fill:none;stroke:blue;" points="668,-114 668,-82 "/>
+<polyline style="fill:none;stroke:blue;" points="668,-114 672,-118 "/>
+<text text-anchor="middle" x="638" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">alleles</text>
+</g>
+<!-- t27->t66 -->
+<g id="edge75" class="edge"><title>t27->t66</title>
+<path style="fill:none;stroke:gray;" d="M623,-340C636,-332 650,-322 663,-314 724,-276 766,-295 801,-234 823,-196 805,-168 771,-140 746,-119 710,-109 682,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="682.492,-101.529 672,-103 681.119,-108.393 682.492,-101.529"/>
+</g>
+<!-- t29 -->
+<g id="node45" class="node"><title>t29</title>
+<polygon style="fill:#90ee90;stroke:green;" points="792,-234 528,-234 524,-230 524,-198 788,-198 792,-202 792,-234"/>
+<polyline style="fill:none;stroke:green;" points="788,-230 524,-230 "/>
+<polyline style="fill:none;stroke:green;" points="788,-230 788,-198 "/>
+<polyline style="fill:none;stroke:green;" points="788,-230 792,-234 "/>
+<text text-anchor="middle" x="658" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">summarizeAllelesPerTranscript</text>
+</g>
+<!-- t28->t29 -->
+<g id="edge39" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M636,-256C638,-252 641,-247 643,-243"/>
+<polygon style="fill:gray;stroke:gray;" points="646.203,-244.441 648,-234 640.084,-241.042 646.203,-244.441"/>
+</g>
+<!-- t28->t66 -->
+<g id="edge73" class="edge"><title>t28->t66</title>
+<path style="fill:none;stroke:gray;" d="M572,-257C546,-249 519,-239 515,-234 491,-199 491,-173 515,-140 525,-126 563,-115 594,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="594.881,-111.393 604,-106 593.508,-104.529 594.881,-111.393"/>
+</g>
+<!-- t30 -->
+<g id="node47" class="node"><title>t30</title>
+<polygon style="fill:#90ee90;stroke:green;" points="752,-176 528,-176 524,-172 524,-140 748,-140 752,-144 752,-176"/>
+<polyline style="fill:none;stroke:green;" points="748,-172 524,-172 "/>
+<polyline style="fill:none;stroke:green;" points="748,-172 748,-140 "/>
+<polyline style="fill:none;stroke:green;" points="748,-172 752,-176 "/>
+<text text-anchor="middle" x="638" y="-151.5" style="font-family:Times New Roman;font-size:20.00;">summarizeAllelesPerGene</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge41" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M652,-198C651,-194 649,-190 648,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="650.964,-183.985 644,-176 644.464,-186.585 650.964,-183.985"/>
+</g>
+<!-- t29->t66 -->
+<g id="edge71" class="edge"><title>t29->t66</title>
+<path style="fill:none;stroke:gray;" d="M721,-198C740,-191 756,-184 761,-176 771,-162 771,-152 761,-140 752,-126 714,-115 682,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="682.492,-104.529 672,-106 681.119,-111.393 682.492,-104.529"/>
+</g>
+<!-- t30->t66 -->
+<g id="edge43" class="edge"><title>t30->t66</title>
+<path style="fill:none;stroke:gray;" d="M638,-140C638,-136 638,-132 638,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="641.5,-128 638,-118 634.5,-128 641.5,-128"/>
+</g>
+<!-- t66->t68 -->
+<g id="edge47" class="edge"><title>t66->t68</title>
+<path style="fill:none;stroke:blue;" d="M614,-82C608,-77 600,-71 593,-66"/>
+<polygon style="fill:blue;stroke:blue;" points="594.262,-62.6349 584,-60 590.379,-68.4592 594.262,-62.6349"/>
+</g>
+<!-- k1 -->
+<g id="node69" class="node"><title>k1</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1362,-585.5 1214,-585.5 1210,-581.5 1210,-534.5 1358,-534.5 1362,-538.5 1362,-585.5"/>
+<polyline style="fill:none;stroke:green;" points="1358,-581.5 1210,-581.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-581.5 1358,-534.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-581.5 1362,-585.5 "/>
+<text text-anchor="middle" x="1286" y="-553.5" style="font-family:Times New Roman;font-size:20.00;">Up-to-date task</text>
+</g>
+<!-- k2 -->
+<g id="node70" class="node"><title>k2</title>
+<polygon style="fill:none;stroke:blue;" points="1345,-493.5 1231,-493.5 1227,-489.5 1227,-442.5 1341,-442.5 1345,-446.5 1345,-493.5"/>
+<polyline style="fill:none;stroke:blue;" points="1341,-489.5 1227,-489.5 "/>
+<polyline style="fill:none;stroke:blue;" points="1341,-489.5 1341,-442.5 "/>
+<polyline style="fill:none;stroke:blue;" points="1341,-489.5 1345,-493.5 "/>
+<text text-anchor="middle" x="1286" y="-461.5" style="font-family:Times New Roman;font-size:20.00;fill:blue;">Task to run</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge78" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:gray;" d="M1286,-534C1286,-524 1286,-514 1286,-504"/>
+<polygon style="fill:gray;stroke:gray;" points="1289.5,-504 1286,-494 1282.5,-504 1289.5,-504"/>
+</g>
+<!-- k3 -->
+<g id="node72" class="node"><title>k3</title>
+<polygon style="fill:#fff68f;stroke:black;" points="1347,-383.5 1229,-383.5 1225,-379.5 1225,-332.5 1343,-332.5 1347,-336.5 1347,-383.5"/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1225,-379.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1343,-332.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1347,-383.5 "/>
+<text text-anchor="middle" x="1286" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">Final target</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge80" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:blue;" d="M1286,-442C1286,-428 1286,-410 1286,-394"/>
+<polygon style="fill:blue;stroke:blue;" points="1289.5,-394 1286,-384 1282.5,-394 1289.5,-394"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/gallery/gallery_snp_annotation_consequences.png b/doc/images/gallery/gallery_snp_annotation_consequences.png
new file mode 100644
index 0000000..8e223bd
Binary files /dev/null and b/doc/images/gallery/gallery_snp_annotation_consequences.png differ
diff --git a/doc/images/gallery/gallery_snp_annotation_consequences.svg b/doc/images/gallery/gallery_snp_annotation_consequences.svg
new file mode 100644
index 0000000..c25b7fa
--- /dev/null
+++ b/doc/images/gallery/gallery_snp_annotation_consequences.svg
@@ -0,0 +1,471 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+ "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
+ <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+]>
+<!-- Generated by Graphviz version 2.20.2 (Mon Aug 4 08:59:22 UTC 2008)
+ For user: Andreas Heger -->
+<!-- Title: tree Pages: 1 -->
+<svg width="864pt" height="420pt"
+ viewBox="0.00 0.00 432.00 210.08" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(0.311688 0.311688) rotate(0) translate(4 670)">
+<title>SNP Annotation Pipeline (consequences)</title>
+<polygon style="fill:white;stroke:white;" points="-4,4 -4,-670 1382,-670 1382,4 -4,4"/>
+<g id="cluster2" class="cluster"><title>clustertasks</title>
+<polygon style="fill:none;stroke:black;" points="8,-16 8,-639 1194,-639 1194,-16 8,-16"/>
+<text text-anchor="middle" x="601" y="-608" style="font-family:Times New Roman;font-size:20.00pt;fill:red;"><tspan font-weight = "bold">SNP Annotation Pipeline (consequences):</tspan></text>
+</g>
+<g id="cluster3" class="cluster"><title>clusterkey</title>
+<polygon style="fill:#e5e5e5;stroke:#e5e5e5;" points="1202,-324 1202,-658 1370,-658 1370,-324 1202,-324"/>
+<text text-anchor="middle" x="1286" y="-627" style="font-family:Times New Roman;font-size:30.00;">Key:</text>
+</g>
+<!-- t11 -->
+<g id="node2" class="node"><title>t11</title>
+<polygon style="fill:#90ee90;stroke:green;" points="851,-589 629,-589 625,-585 625,-553 847,-553 851,-557 851,-589"/>
+<polyline style="fill:none;stroke:green;" points="847,-585 625,-585 "/>
+<polyline style="fill:none;stroke:green;" points="847,-585 847,-553 "/>
+<polyline style="fill:none;stroke:green;" points="847,-585 851,-589 "/>
+<text text-anchor="middle" x="738" y="-564.5" style="font-family:Times New Roman;font-size:20.00;">loadTranscriptInformation</text>
+</g>
+<!-- t16 -->
+<g id="node4" class="node"><title>t16</title>
+<polygon style="fill:#90ee90;stroke:green;" points="917.809,-455.794 995,-428.5 1072.19,-455.794 1072.12,-499.956 917.881,-499.956 917.809,-455.794"/>
+<polygon style="fill:none;stroke:green;" points="913.804,-452.967 995,-424.257 1076.2,-452.967 1076.11,-503.956 913.887,-503.956 913.804,-452.967"/>
+<text text-anchor="middle" x="995" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">buildSelenoList</text>
+</g>
+<!-- t11->t16 -->
+<g id="edge25" class="edge"><title>t11->t16</title>
+<path style="fill:none;stroke:gray;" d="M783,-553C816,-539 864,-521 905,-504"/>
+<polygon style="fill:gray;stroke:gray;" points="906.283,-507.26 914,-500 903.44,-500.863 906.283,-507.26"/>
+</g>
+<!-- t64 -->
+<g id="node21" class="node"><title>t64</title>
+<polygon style="fill:white;stroke:gray;" points="672,-292 600,-292 596,-288 596,-256 668,-256 672,-260 672,-292"/>
+<polyline style="fill:none;stroke:gray;" points="668,-288 596,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="668,-288 668,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="668,-288 672,-292 "/>
+<text text-anchor="middle" x="634" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">prepare</text>
+</g>
+<!-- t11->t64 -->
+<g id="edge19" class="edge"><title>t11->t64</title>
+<path style="fill:none;stroke:gray;" d="M707,-553C665,-526 592,-472 567,-402 555,-365 548,-348 567,-314 572,-305 579,-299 587,-293"/>
+<polygon style="fill:gray;stroke:gray;" points="588.958,-295.916 596,-288 585.559,-289.797 588.958,-295.916"/>
+</g>
+<!-- t15 -->
+<g id="node3" class="node"><title>t15</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1068,-589 926,-589 922,-585 922,-553 1064,-553 1068,-557 1068,-589"/>
+<polyline style="fill:none;stroke:green;" points="1064,-585 922,-585 "/>
+<polyline style="fill:none;stroke:green;" points="1064,-585 1064,-553 "/>
+<polyline style="fill:none;stroke:green;" points="1064,-585 1068,-589 "/>
+<text text-anchor="middle" x="995" y="-564.5" style="font-family:Times New Roman;font-size:20.00;">loadProteinStats</text>
+</g>
+<!-- t15->t16 -->
+<g id="edge3" class="edge"><title>t15->t16</title>
+<path style="fill:none;stroke:gray;" d="M995,-553C995,-542 995,-528 995,-514"/>
+<polygon style="fill:gray;stroke:gray;" points="998.5,-514 995,-504 991.5,-514 998.5,-514"/>
+</g>
+<!-- t25 -->
+<g id="node6" class="node"><title>t25</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1186,-376 1074,-376 1070,-372 1070,-340 1182,-340 1186,-344 1186,-376"/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1070,-372 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1182,-340 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-372 1186,-376 "/>
+<text text-anchor="middle" x="1128" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">makeEffects</text>
+</g>
+<!-- t16->t25 -->
+<g id="edge5" class="edge"><title>t16->t25</title>
+<path style="fill:none;stroke:gray;" d="M1032,-437C1053,-420 1079,-399 1098,-383"/>
+<polygon style="fill:gray;stroke:gray;" points="1100.78,-385.219 1106,-376 1096.17,-379.951 1100.78,-385.219"/>
+</g>
+<!-- t27 -->
+<g id="node49" class="node"><title>t27</title>
+<polygon style="fill:white;stroke:gray;" points="1052,-376 942,-376 938,-372 938,-340 1048,-340 1052,-344 1052,-376"/>
+<polyline style="fill:none;stroke:gray;" points="1048,-372 938,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="1048,-372 1048,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="1048,-372 1052,-376 "/>
+<text text-anchor="middle" x="995" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildAlleles</text>
+</g>
+<!-- t16->t27 -->
+<g id="edge53" class="edge"><title>t16->t27</title>
+<path style="fill:none;stroke:gray;" d="M995,-424C995,-412 995,-398 995,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="998.5,-386 995,-376 991.5,-386 998.5,-386"/>
+</g>
+<!-- t26 -->
+<g id="node8" class="node"><title>t26</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1186,-292 1082,-292 1078,-288 1078,-256 1182,-256 1186,-260 1186,-292"/>
+<polyline style="fill:none;stroke:green;" points="1182,-288 1078,-288 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-288 1182,-256 "/>
+<polyline style="fill:none;stroke:green;" points="1182,-288 1186,-292 "/>
+<text text-anchor="middle" x="1132" y="-267.5" style="font-family:Times New Roman;font-size:20.00;">loadEffects</text>
+</g>
+<!-- t25->t26 -->
+<g id="edge7" class="edge"><title>t25->t26</title>
+<path style="fill:none;stroke:gray;" d="M1129,-340C1130,-329 1130,-315 1130,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="1133.49,-302.299 1131,-292 1126.52,-301.602 1133.49,-302.299"/>
+</g>
+<!-- t65 -->
+<g id="node10" class="node"><title>t65</title>
+<polygon style="fill:#fff68f;stroke:black;" points="1140,-234 1020,-234 1016,-230 1016,-198 1136,-198 1140,-202 1140,-234"/>
+<polyline style="fill:none;stroke:black;" points="1136,-230 1016,-230 "/>
+<polyline style="fill:none;stroke:black;" points="1136,-230 1136,-198 "/>
+<polyline style="fill:none;stroke:black;" points="1136,-230 1140,-234 "/>
+<text text-anchor="middle" x="1078" y="-209.5" style="font-family:Times New Roman;font-size:20.00;">consequences</text>
+</g>
+<!-- t25->t65 -->
+<g id="edge27" class="edge"><title>t25->t65</title>
+<path style="fill:none;stroke:gray;" d="M1106,-340C1092,-328 1077,-311 1069,-292 1063,-277 1065,-258 1068,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="1071.48,-244.584 1071,-234 1064.77,-242.573 1071.48,-244.584"/>
+</g>
+<!-- t26->t65 -->
+<g id="edge9" class="edge"><title>t26->t65</title>
+<path style="fill:none;stroke:gray;" d="M1115,-256C1111,-252 1106,-247 1102,-242"/>
+<polygon style="fill:gray;stroke:gray;" points="1104.22,-239.221 1095,-234 1098.95,-243.831 1104.22,-239.221"/>
+</g>
+<!-- t68 -->
+<g id="node63" class="node"><title>t68</title>
+<polygon style="fill:white;stroke:gray;" points="765,-60 715,-60 711,-56 711,-24 761,-24 765,-28 765,-60"/>
+<polyline style="fill:none;stroke:gray;" points="761,-56 711,-56 "/>
+<polyline style="fill:none;stroke:gray;" points="761,-56 761,-24 "/>
+<polyline style="fill:none;stroke:gray;" points="761,-56 765,-60 "/>
+<text text-anchor="middle" x="738" y="-35.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">full</text>
+</g>
+<!-- t65->t68 -->
+<g id="edge71" class="edge"><title>t65->t68</title>
+<path style="fill:none;stroke:gray;" d="M1056,-198C1036,-182 1006,-158 977,-140 931,-110 919,-103 867,-82 837,-70 801,-59 775,-52"/>
+<polygon style="fill:gray;stroke:gray;" points="775.584,-48.5212 765,-49 773.573,-55.2259 775.584,-48.5212"/>
+</g>
+<!-- t8 -->
+<g id="node12" class="node"><title>t8</title>
+<polygon style="fill:white;stroke:gray;" points="809,-486 667,-486 663,-482 663,-450 805,-450 809,-454 809,-486"/>
+<polyline style="fill:none;stroke:gray;" points="805,-482 663,-482 "/>
+<polyline style="fill:none;stroke:gray;" points="805,-482 805,-450 "/>
+<polyline style="fill:none;stroke:gray;" points="805,-482 809,-486 "/>
+<text text-anchor="middle" x="736" y="-461.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildTranscripts</text>
+</g>
+<!-- t9 -->
+<g id="node13" class="node"><title>t9</title>
+<polygon style="fill:white;stroke:gray;" points="716,-376 580,-376 576,-372 576,-340 712,-340 716,-344 716,-376"/>
+<polyline style="fill:none;stroke:gray;" points="712,-372 576,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="712,-372 712,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="712,-372 716,-376 "/>
+<text text-anchor="middle" x="646" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadTranscripts</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge11" class="edge"><title>t8->t9</title>
+<path style="fill:none;stroke:gray;" d="M721,-450C707,-432 684,-405 667,-384"/>
+<polygon style="fill:gray;stroke:gray;" points="669.8,-381.9 661,-376 664.2,-386.1 669.8,-381.9"/>
+</g>
+<!-- t8->t27 -->
+<g id="edge51" class="edge"><title>t8->t27</title>
+<path style="fill:none;stroke:gray;" d="M802,-450C840,-439 888,-422 929,-402 941,-397 952,-389 963,-382"/>
+<polygon style="fill:gray;stroke:gray;" points="965.1,-384.8 971,-376 960.9,-379.2 965.1,-384.8"/>
+</g>
+<!-- t9->t64 -->
+<g id="edge17" class="edge"><title>t9->t64</title>
+<path style="fill:none;stroke:gray;" d="M643,-340C641,-329 640,-314 638,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="641.478,-301.602 637,-292 634.512,-302.299 641.478,-301.602"/>
+</g>
+<!-- t4 -->
+<g id="node15" class="node"><title>t4</title>
+<polygon style="fill:white;stroke:gray;" points="553,-589 395,-589 391,-585 391,-553 549,-553 553,-557 553,-589"/>
+<polyline style="fill:none;stroke:gray;" points="549,-585 391,-585 "/>
+<polyline style="fill:none;stroke:gray;" points="549,-585 549,-553 "/>
+<polyline style="fill:none;stroke:gray;" points="549,-585 553,-589 "/>
+<text text-anchor="middle" x="472" y="-564.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildGeneRegions</text>
+</g>
+<!-- t5 -->
+<g id="node16" class="node"><title>t5</title>
+<polygon style="fill:white;stroke:gray;" points="525,-486 423,-486 419,-482 419,-450 521,-450 525,-454 525,-486"/>
+<polyline style="fill:none;stroke:gray;" points="521,-482 419,-482 "/>
+<polyline style="fill:none;stroke:gray;" points="521,-482 521,-450 "/>
+<polyline style="fill:none;stroke:gray;" points="521,-482 525,-486 "/>
+<text text-anchor="middle" x="472" y="-461.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildGenes</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge13" class="edge"><title>t4->t5</title>
+<path style="fill:none;stroke:gray;" d="M472,-553C472,-537 472,-514 472,-496"/>
+<polygon style="fill:gray;stroke:gray;" points="475.5,-496 472,-486 468.5,-496 475.5,-496"/>
+</g>
+<!-- t7 -->
+<g id="node18" class="node"><title>t7</title>
+<polygon style="fill:white;stroke:gray;" points="538,-376 410,-376 406,-372 406,-340 534,-340 538,-344 538,-376"/>
+<polyline style="fill:none;stroke:gray;" points="534,-372 406,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="534,-372 534,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="534,-372 538,-376 "/>
+<text text-anchor="middle" x="472" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadGeneStats</text>
+</g>
+<!-- t5->t7 -->
+<g id="edge15" class="edge"><title>t5->t7</title>
+<path style="fill:none;stroke:gray;" d="M472,-450C472,-433 472,-407 472,-386"/>
+<polygon style="fill:gray;stroke:gray;" points="475.5,-386 472,-376 468.5,-386 475.5,-386"/>
+</g>
+<!-- t7->t64 -->
+<g id="edge21" class="edge"><title>t7->t64</title>
+<path style="fill:none;stroke:gray;" d="M504,-340C518,-332 536,-322 552,-314 563,-308 575,-302 587,-297"/>
+<polygon style="fill:gray;stroke:gray;" points="588.958,-299.916 596,-292 585.559,-293.797 588.958,-299.916"/>
+</g>
+<!-- t6 -->
+<g id="node20" class="node"><title>t6</title>
+<polygon style="fill:white;stroke:gray;" points="920,-376 738,-376 734,-372 734,-340 916,-340 920,-344 920,-376"/>
+<polyline style="fill:none;stroke:gray;" points="916,-372 734,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="916,-372 916,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="916,-372 920,-376 "/>
+<text text-anchor="middle" x="827" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadGeneInformation</text>
+</g>
+<!-- t6->t64 -->
+<g id="edge23" class="edge"><title>t6->t64</title>
+<path style="fill:none;stroke:gray;" d="M786,-340C755,-326 713,-309 681,-294"/>
+<polygon style="fill:gray;stroke:gray;" points="682.56,-290.863 672,-290 679.717,-297.26 682.56,-290.863"/>
+</g>
+<!-- t64->t68 -->
+<g id="edge69" class="edge"><title>t64->t68</title>
+<path style="fill:none;stroke:gray;" d="M640,-256C650,-230 668,-181 686,-140 697,-115 712,-88 723,-69"/>
+<polygon style="fill:gray;stroke:gray;" points="726.203,-70.4414 728,-60 720.084,-67.0418 726.203,-70.4414"/>
+</g>
+<!-- t49 -->
+<g id="node28" class="node"><title>t49</title>
+<polygon style="fill:white;stroke:gray;" points="19.9571,-345.794 114,-318.5 208.043,-345.794 207.955,-389.956 20.0448,-389.956 19.9571,-345.794"/>
+<polygon style="fill:none;stroke:gray;" points="15.9511,-342.791 114,-314.335 212.049,-342.791 211.947,-393.956 16.0528,-393.956 15.9511,-342.791"/>
+<text text-anchor="middle" x="114" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildPolyphenInput</text>
+</g>
+<!-- t50 -->
+<g id="node29" class="node"><title>t50</title>
+<polygon style="fill:white;stroke:gray;" points="299,-292 105,-292 101,-288 101,-256 295,-256 299,-260 299,-292"/>
+<polyline style="fill:none;stroke:gray;" points="295,-288 101,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="295,-288 295,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="295,-288 299,-292 "/>
+<text text-anchor="middle" x="200" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildPolyphenFeatures</text>
+</g>
+<!-- t49->t50 -->
+<g id="edge29" class="edge"><title>t49->t50</title>
+<path style="fill:none;stroke:gray;" d="M148,-324C156,-316 165,-307 174,-299"/>
+<polygon style="fill:gray;stroke:gray;" points="176.404,-301.546 181,-292 171.454,-296.596 176.404,-301.546"/>
+</g>
+<!-- t52 -->
+<g id="node35" class="node"><title>t52</title>
+<polygon style="fill:white;stroke:gray;" points="176,-176 20,-176 16,-172 16,-140 172,-140 176,-144 176,-176"/>
+<polyline style="fill:none;stroke:gray;" points="172,-172 16,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="172,-172 172,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="172,-172 176,-176 "/>
+<text text-anchor="middle" x="96" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadPolyphenMap</text>
+</g>
+<!-- t49->t52 -->
+<g id="edge35" class="edge"><title>t49->t52</title>
+<path style="fill:none;stroke:gray;" d="M99,-319C96,-310 93,-301 92,-292 87,-256 89,-213 92,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="95.4875,-186.299 93,-176 88.5222,-185.602 95.4875,-186.299"/>
+</g>
+<!-- t57 -->
+<g id="node40" class="node"><title>t57</title>
+<polygon style="fill:white;stroke:gray;" points="442,-234 344,-234 340,-230 340,-198 438,-198 442,-202 442,-234"/>
+<polyline style="fill:none;stroke:gray;" points="438,-230 340,-230 "/>
+<polyline style="fill:none;stroke:gray;" points="438,-230 438,-198 "/>
+<polyline style="fill:none;stroke:gray;" points="438,-230 442,-234 "/>
+<text text-anchor="middle" x="391" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">runPanther</text>
+</g>
+<!-- t49->t57 -->
+<g id="edge39" class="edge"><title>t49->t57</title>
+<path style="fill:none;stroke:gray;" d="M174,-332C189,-325 206,-319 221,-314 259,-301 277,-316 308,-292 323,-280 316,-269 328,-256 334,-250 340,-245 347,-240"/>
+<polygon style="fill:gray;stroke:gray;" points="349.1,-242.8 355,-234 344.9,-237.2 349.1,-242.8"/>
+</g>
+<!-- t51 -->
+<g id="node31" class="node"><title>t51</title>
+<polygon style="fill:white;stroke:gray;" points="286,-234 174,-234 170,-230 170,-198 282,-198 286,-202 286,-234"/>
+<polyline style="fill:none;stroke:gray;" points="282,-230 170,-230 "/>
+<polyline style="fill:none;stroke:gray;" points="282,-230 282,-198 "/>
+<polyline style="fill:none;stroke:gray;" points="282,-230 286,-234 "/>
+<text text-anchor="middle" x="228" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">runPolyphen</text>
+</g>
+<!-- t50->t51 -->
+<g id="edge31" class="edge"><title>t50->t51</title>
+<path style="fill:none;stroke:gray;" d="M209,-256C211,-252 213,-248 215,-243"/>
+<polygon style="fill:gray;stroke:gray;" points="218.137,-244.56 219,-234 211.74,-241.717 218.137,-244.56"/>
+</g>
+<!-- t53 -->
+<g id="node33" class="node"><title>t53</title>
+<polygon style="fill:white;stroke:gray;" points="318,-176 198,-176 194,-172 194,-140 314,-140 318,-144 318,-176"/>
+<polyline style="fill:none;stroke:gray;" points="314,-172 194,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="314,-172 314,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="314,-172 318,-176 "/>
+<text text-anchor="middle" x="256" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadPolyphen</text>
+</g>
+<!-- t51->t53 -->
+<g id="edge33" class="edge"><title>t51->t53</title>
+<path style="fill:none;stroke:gray;" d="M237,-198C239,-194 241,-190 243,-185"/>
+<polygon style="fill:gray;stroke:gray;" points="246.137,-186.56 247,-176 239.74,-183.717 246.137,-186.56"/>
+</g>
+<!-- t67 -->
+<g id="node45" class="node"><title>t67</title>
+<polygon style="fill:white;stroke:gray;" points="359,-118 293,-118 289,-114 289,-82 355,-82 359,-86 359,-118"/>
+<polyline style="fill:none;stroke:gray;" points="355,-114 289,-114 "/>
+<polyline style="fill:none;stroke:gray;" points="355,-114 355,-82 "/>
+<polyline style="fill:none;stroke:gray;" points="355,-114 359,-118 "/>
+<text text-anchor="middle" x="324" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">effects</text>
+</g>
+<!-- t53->t67 -->
+<g id="edge45" class="edge"><title>t53->t67</title>
+<path style="fill:none;stroke:gray;" d="M277,-140C283,-135 289,-130 295,-124"/>
+<polygon style="fill:gray;stroke:gray;" points="297.1,-126.8 303,-118 292.9,-121.2 297.1,-126.8"/>
+</g>
+<!-- t52->t67 -->
+<g id="edge47" class="edge"><title>t52->t67</title>
+<path style="fill:none;stroke:gray;" d="M167,-140C204,-131 248,-120 279,-111"/>
+<polygon style="fill:gray;stroke:gray;" points="279.881,-114.393 289,-109 278.508,-107.529 279.881,-114.393"/>
+</g>
+<!-- t12 -->
+<g id="node37" class="node"><title>t12</title>
+<polygon style="fill:white;stroke:gray;" points="388,-376 234,-376 230,-372 230,-340 384,-340 388,-344 388,-376"/>
+<polyline style="fill:none;stroke:gray;" points="384,-372 230,-372 "/>
+<polyline style="fill:none;stroke:gray;" points="384,-372 384,-340 "/>
+<polyline style="fill:none;stroke:gray;" points="384,-372 388,-376 "/>
+<text text-anchor="middle" x="309" y="-351.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">buildPeptideFasta</text>
+</g>
+<!-- t56 -->
+<g id="node38" class="node"><title>t56</title>
+<polygon style="fill:white;stroke:gray;" points="473,-292 341,-292 337,-288 337,-256 469,-256 473,-260 473,-292"/>
+<polyline style="fill:none;stroke:gray;" points="469,-288 337,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="469,-288 469,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="469,-288 473,-292 "/>
+<text text-anchor="middle" x="405" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">preparePanther</text>
+</g>
+<!-- t12->t56 -->
+<g id="edge37" class="edge"><title>t12->t56</title>
+<path style="fill:none;stroke:gray;" d="M330,-340C343,-328 361,-312 376,-299"/>
+<polygon style="fill:gray;stroke:gray;" points="378.779,-301.219 384,-292 374.169,-295.951 378.779,-301.219"/>
+</g>
+<!-- t56->t57 -->
+<g id="edge41" class="edge"><title>t56->t57</title>
+<path style="fill:none;stroke:gray;" d="M401,-256C400,-252 399,-248 398,-244"/>
+<polygon style="fill:gray;stroke:gray;" points="401.393,-243.119 396,-234 394.529,-244.492 401.393,-243.119"/>
+</g>
+<!-- t58 -->
+<g id="node43" class="node"><title>t58</title>
+<polygon style="fill:white;stroke:gray;" points="446,-176 340,-176 336,-172 336,-140 442,-140 446,-144 446,-176"/>
+<polyline style="fill:none;stroke:gray;" points="442,-172 336,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="442,-172 442,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="442,-172 446,-176 "/>
+<text text-anchor="middle" x="391" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadPanther</text>
+</g>
+<!-- t57->t58 -->
+<g id="edge43" class="edge"><title>t57->t58</title>
+<path style="fill:none;stroke:gray;" d="M391,-198C391,-194 391,-190 391,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="394.5,-186 391,-176 387.5,-186 394.5,-186"/>
+</g>
+<!-- t58->t67 -->
+<g id="edge49" class="edge"><title>t58->t67</title>
+<path style="fill:none;stroke:gray;" d="M370,-140C364,-135 359,-130 353,-125"/>
+<polygon style="fill:gray;stroke:gray;" points="354.831,-121.951 345,-118 350.221,-127.219 354.831,-121.951"/>
+</g>
+<!-- t67->t68 -->
+<g id="edge73" class="edge"><title>t67->t68</title>
+<path style="fill:none;stroke:gray;" d="M359,-95C437,-84 621,-58 701,-47"/>
+<polygon style="fill:gray;stroke:gray;" points="701.398,-50.4778 711,-46 700.701,-43.5125 701.398,-50.4778"/>
+</g>
+<!-- t28 -->
+<g id="node52" class="node"><title>t28</title>
+<polygon style="fill:white;stroke:gray;" points="1040,-292 936,-292 932,-288 932,-256 1036,-256 1040,-260 1040,-292"/>
+<polyline style="fill:none;stroke:gray;" points="1036,-288 932,-288 "/>
+<polyline style="fill:none;stroke:gray;" points="1036,-288 1036,-256 "/>
+<polyline style="fill:none;stroke:gray;" points="1036,-288 1040,-292 "/>
+<text text-anchor="middle" x="986" y="-267.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">loadAlleles</text>
+</g>
+<!-- t27->t28 -->
+<g id="edge55" class="edge"><title>t27->t28</title>
+<path style="fill:none;stroke:gray;" d="M993,-340C991,-329 990,-315 989,-302"/>
+<polygon style="fill:gray;stroke:gray;" points="992.478,-301.602 988,-292 985.512,-302.299 992.478,-301.602"/>
+</g>
+<!-- t66 -->
+<g id="node58" class="node"><title>t66</title>
+<polygon style="fill:white;stroke:gray;" points="858,-118 794,-118 790,-114 790,-82 854,-82 858,-86 858,-118"/>
+<polyline style="fill:none;stroke:gray;" points="854,-114 790,-114 "/>
+<polyline style="fill:none;stroke:gray;" points="854,-114 854,-82 "/>
+<polyline style="fill:none;stroke:gray;" points="854,-114 858,-118 "/>
+<text text-anchor="middle" x="824" y="-93.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">alleles</text>
+</g>
+<!-- t27->t66 -->
+<g id="edge61" class="edge"><title>t27->t66</title>
+<path style="fill:none;stroke:gray;" d="M972,-340C959,-331 944,-321 929,-314 833,-266 764,-321 701,-234 689,-216 689,-156 701,-140 711,-126 749,-115 780,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="780.881,-111.393 790,-106 779.508,-104.529 780.881,-111.393"/>
+</g>
+<!-- t29 -->
+<g id="node54" class="node"><title>t29</title>
+<polygon style="fill:white;stroke:gray;" points="978,-234 714,-234 710,-230 710,-198 974,-198 978,-202 978,-234"/>
+<polyline style="fill:none;stroke:gray;" points="974,-230 710,-230 "/>
+<polyline style="fill:none;stroke:gray;" points="974,-230 974,-198 "/>
+<polyline style="fill:none;stroke:gray;" points="974,-230 978,-234 "/>
+<text text-anchor="middle" x="844" y="-209.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">summarizeAllelesPerTranscript</text>
+</g>
+<!-- t28->t29 -->
+<g id="edge57" class="edge"><title>t28->t29</title>
+<path style="fill:none;stroke:gray;" d="M942,-256C928,-250 912,-244 897,-238"/>
+<polygon style="fill:gray;stroke:gray;" points="898.56,-234.863 888,-234 895.717,-241.26 898.56,-234.863"/>
+</g>
+<!-- t28->t66 -->
+<g id="edge63" class="edge"><title>t28->t66</title>
+<path style="fill:none;stroke:gray;" d="M987,-256C987,-249 987,-241 987,-234 988,-190 991,-168 957,-140 932,-119 896,-109 868,-105"/>
+<polygon style="fill:gray;stroke:gray;" points="868.492,-101.529 858,-103 867.119,-108.393 868.492,-101.529"/>
+</g>
+<!-- t30 -->
+<g id="node56" class="node"><title>t30</title>
+<polygon style="fill:white;stroke:gray;" points="938,-176 714,-176 710,-172 710,-140 934,-140 938,-144 938,-176"/>
+<polyline style="fill:none;stroke:gray;" points="934,-172 710,-172 "/>
+<polyline style="fill:none;stroke:gray;" points="934,-172 934,-140 "/>
+<polyline style="fill:none;stroke:gray;" points="934,-172 938,-176 "/>
+<text text-anchor="middle" x="824" y="-151.5" style="font-family:Times New Roman;font-size:20.00;fill:gray;">summarizeAllelesPerGene</text>
+</g>
+<!-- t29->t30 -->
+<g id="edge59" class="edge"><title>t29->t30</title>
+<path style="fill:none;stroke:gray;" d="M838,-198C837,-194 835,-190 834,-186"/>
+<polygon style="fill:gray;stroke:gray;" points="836.964,-183.985 830,-176 830.464,-186.585 836.964,-183.985"/>
+</g>
+<!-- t29->t66 -->
+<g id="edge65" class="edge"><title>t29->t66</title>
+<path style="fill:none;stroke:gray;" d="M907,-198C926,-191 942,-184 947,-176 957,-162 957,-152 947,-140 938,-126 900,-115 868,-108"/>
+<polygon style="fill:gray;stroke:gray;" points="868.492,-104.529 858,-106 867.119,-111.393 868.492,-104.529"/>
+</g>
+<!-- t30->t66 -->
+<g id="edge67" class="edge"><title>t30->t66</title>
+<path style="fill:none;stroke:gray;" d="M824,-140C824,-136 824,-132 824,-128"/>
+<polygon style="fill:gray;stroke:gray;" points="827.5,-128 824,-118 820.5,-128 827.5,-128"/>
+</g>
+<!-- t66->t68 -->
+<g id="edge75" class="edge"><title>t66->t68</title>
+<path style="fill:none;stroke:gray;" d="M797,-82C790,-77 781,-71 773,-66"/>
+<polygon style="fill:gray;stroke:gray;" points="775.1,-63.2 765,-60 770.9,-68.8 775.1,-63.2"/>
+</g>
+<!-- k1 -->
+<g id="node69" class="node"><title>k1</title>
+<polygon style="fill:white;stroke:gray;" points="1347,-608 1229,-608 1225,-604 1225,-534 1343,-534 1347,-538 1347,-608"/>
+<polyline style="fill:none;stroke:gray;" points="1343,-604 1225,-604 "/>
+<polyline style="fill:none;stroke:gray;" points="1343,-604 1343,-534 "/>
+<polyline style="fill:none;stroke:gray;" points="1343,-604 1347,-608 "/>
+<text text-anchor="middle" x="1286" y="-576" style="font-family:Times New Roman;font-size:20.00;fill:gray;">Up-to-date</text>
+<text text-anchor="middle" x="1286" y="-553" style="font-family:Times New Roman;font-size:20.00;fill:gray;">dependence</text>
+</g>
+<!-- k2 -->
+<g id="node70" class="node"><title>k2</title>
+<polygon style="fill:#90ee90;stroke:green;" points="1362,-493.5 1214,-493.5 1210,-489.5 1210,-442.5 1358,-442.5 1362,-446.5 1362,-493.5"/>
+<polyline style="fill:none;stroke:green;" points="1358,-489.5 1210,-489.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-489.5 1358,-442.5 "/>
+<polyline style="fill:none;stroke:green;" points="1358,-489.5 1362,-493.5 "/>
+<text text-anchor="middle" x="1286" y="-461.5" style="font-family:Times New Roman;font-size:20.00;">Up-to-date task</text>
+</g>
+<!-- k1->k2 -->
+<g id="edge78" class="edge"><title>k1->k2</title>
+<path style="fill:none;stroke:gray;" d="M1286,-534C1286,-524 1286,-514 1286,-504"/>
+<polygon style="fill:gray;stroke:gray;" points="1289.5,-504 1286,-494 1282.5,-504 1289.5,-504"/>
+</g>
+<!-- k3 -->
+<g id="node72" class="node"><title>k3</title>
+<polygon style="fill:#fff68f;stroke:black;" points="1347,-383.5 1229,-383.5 1225,-379.5 1225,-332.5 1343,-332.5 1347,-336.5 1347,-383.5"/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1225,-379.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1343,-332.5 "/>
+<polyline style="fill:none;stroke:black;" points="1343,-379.5 1347,-383.5 "/>
+<text text-anchor="middle" x="1286" y="-351.5" style="font-family:Times New Roman;font-size:20.00;">Final target</text>
+</g>
+<!-- k2->k3 -->
+<g id="edge80" class="edge"><title>k2->k3</title>
+<path style="fill:none;stroke:gray;" d="M1286,-442C1286,-428 1286,-410 1286,-394"/>
+<polygon style="fill:gray;stroke:gray;" points="1289.5,-394 1286,-384 1282.5,-394 1289.5,-394"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/history_html_flowchart.png b/doc/images/history_html_flowchart.png
new file mode 100644
index 0000000..a939228
Binary files /dev/null and b/doc/images/history_html_flowchart.png differ
diff --git a/doc/images/history_html_flowchart.svg b/doc/images/history_html_flowchart.svg
new file mode 100644
index 0000000..268a187
--- /dev/null
+++ b/doc/images/history_html_flowchart.svg
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.36.0 (20140111.2315)
+ -->
+<!-- Title: Pipeline: Pages: 1 -->
+<svg width="673pt" height="633pt"
+ viewBox="0.00 0.00 673.00 633.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 629)">
+<title>Pipeline:</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-629 669,-629 669,4 -4,4"/>
+<g id="clust1" class="cluster"><title>clustertasks</title>
+<polygon fill="none" stroke="black" points="8,-8 8,-617 519,-617 519,-8 8,-8"/>
+<text text-anchor="middle" x="263.5" y="-589" font-family="Times,serif" font-size="30.00" fill="#ff3232">Pipeline:</text>
+</g>
+<g id="clust2" class="cluster"><title>clusterkey1</title>
+<polygon fill="#f6f4f4" stroke="#f6f4f4" points="527,-66 527,-610 657,-610 657,-66 527,-66"/>
+<text text-anchor="middle" x="592" y="-590" font-family="Times,serif" font-size="20.00">Key:</text>
+</g>
+<!-- t0 -->
+<g id="node1" class="node"><title>t0</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="370.25,-568 215.75,-568 211.75,-564 211.75,-532 366.25,-532 370.25,-536 370.25,-568"/>
+<polyline fill="none" stroke="#006000" points="366.25,-564 211.75,-564 "/>
+<polyline fill="none" stroke="#006000" points="366.25,-564 366.25,-532 "/>
+<polyline fill="none" stroke="#006000" points="366.25,-564 370.25,-568 "/>
+<text text-anchor="middle" x="291" y="-545" font-family="Times,serif" font-size="20.00" fill="#006000">Up_to_date_task1</text>
+</g>
+<!-- t4 -->
+<g id="node2" class="node"><title>t4</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="293.25,-454 82.75,-454 78.75,-450 78.75,-418 289.25,-418 293.25,-422 293.25,-454"/>
+<polyline fill="none" stroke="#006000" points="289.25,-450 78.75,-450 "/>
+<polyline fill="none" stroke="#006000" points="289.25,-450 289.25,-418 "/>
+<polyline fill="none" stroke="#006000" points="289.25,-450 293.25,-454 "/>
+<text text-anchor="middle" x="186" y="-431" font-family="Times,serif" font-size="20.00" fill="#006000">Explicitly_specified_task</text>
+</g>
+<!-- t0->t4 -->
+<g id="edge1" class="edge"><title>t0->t4</title>
+<path fill="none" stroke="gray" d="M275.053,-531.99C257.476,-513.241 229.048,-482.918 209.048,-461.585"/>
+<polygon fill="gray" stroke="gray" points="211.49,-459.072 202.097,-454.17 206.383,-463.86 211.49,-459.072"/>
+</g>
+<!-- t1 -->
+<g id="node8" class="node"><title>t1</title>
+<g id="a_node8"><a xlink:href="http://cnn.com" xlink:title="What is this?<BR/> What <FONT COLOR="red">is</FONT>this???">
+<polygon fill="#b8cc6e" stroke="#006000" points="466,-490 328,-490 328,-486 324,-486 324,-482 328,-482 328,-390 324,-390 324,-386 328,-386 328,-382 466,-382 466,-490"/>
+<polyline fill="none" stroke="#006000" points="328,-486 332,-486 332,-482 328,-482 "/>
+<polyline fill="none" stroke="#006000" points="328,-390 332,-390 332,-386 328,-386 "/>
+<polygon fill="none" stroke="#006000" points="470,-494 324,-494 324,-490 320,-490 320,-486 324,-486 324,-386 320,-386 320,-382 324,-382 324,-378 470,-378 470,-494"/>
+<polyline fill="none" stroke="#006000" points="324,-490 328,-490 328,-486 324,-486 "/>
+<polyline fill="none" stroke="#006000" points="324,-386 328,-386 328,-382 324,-382 "/>
+<polygon fill="none" stroke="#006000" points="474,-498 320,-498 320,-494 316,-494 316,-490 320,-490 320,-382 316,-382 316,-378 320,-378 320,-374 474,-374 474,-498"/>
+<polyline fill="none" stroke="#006000" points="320,-494 324,-494 324,-490 320,-490 "/>
+<polyline fill="none" stroke="#006000" points="320,-382 324,-382 324,-378 320,-378 "/>
+<polygon fill="none" stroke="#006000" points="478,-502 316,-502 316,-498 312,-498 312,-494 316,-494 316,-378 312,-378 312,-374 316,-374 316,-370 478,-370 478,-502"/>
+<polyline fill="none" stroke="#006000" points="316,-498 320,-498 320,-494 316,-494 "/>
+<polyline fill="none" stroke="#006000" points="316,-378 320,-378 320,-374 316,-374 "/>
+<polygon fill="none" stroke="#006000" points="482,-506 312,-506 312,-502 308,-502 308,-498 312,-498 312,-374 308,-374 308,-370 312,-370 312,-366 482,-366 482,-506"/>
+<polyline fill="none" stroke="#006000" points="312,-502 316,-502 316,-498 312,-498 "/>
+<polyline fill="none" stroke="#006000" points="312,-374 316,-374 316,-370 312,-370 "/>
+<text text-anchor="start" x="345.5" y="-441" font-family="Times,serif" font-size="20.00" fill="#006000">What is this?</text>
+<text text-anchor="start" x="336" y="-421" font-family="Times,serif" font-size="20.00" fill="#006000"> What </text>
+<text text-anchor="start" x="390" y="-421" font-family="Times,serif" font-size="20.00" fill="red">is</text>
+<text text-anchor="start" x="403" y="-421" font-family="Times,serif" font-size="20.00" fill="#006000">this???</text>
+</a>
+</g>
+</g>
+<!-- t0->t1 -->
+<g id="edge8" class="edge"><title>t0->t1</title>
+<path fill="none" stroke="gray" d="M307.099,-531.99C312.118,-526.687 318.013,-520.458 324.344,-513.768"/>
+<polygon fill="gray" stroke="gray" points="327.033,-516.019 331.365,-506.351 321.949,-511.208 327.033,-516.019"/>
+</g>
+<!-- t5 -->
+<g id="node3" class="node"><title>t5</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="248.25,-340 127.75,-340 123.75,-336 123.75,-304 244.25,-304 248.25,-308 248.25,-340"/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-336 123.75,-336 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-336 244.25,-304 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-336 248.25,-340 "/>
+<text text-anchor="middle" x="186" y="-317" font-family="Times,serif" font-size="20.00" fill="#0044a0">Task_to_run1</text>
+</g>
+<!-- t4->t5 -->
+<g id="edge2" class="edge"><title>t4->t5</title>
+<path fill="none" stroke="gray" d="M186,-417.99C186,-400.063 186,-371.555 186,-350.442"/>
+<polygon fill="gray" stroke="gray" points="189.5,-350.17 186,-340.17 182.5,-350.171 189.5,-350.17"/>
+</g>
+<!-- t6 -->
+<g id="node4" class="node"><title>t6</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="248.25,-267 127.75,-267 123.75,-263 123.75,-231 244.25,-231 248.25,-235 248.25,-267"/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-263 123.75,-263 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-263 244.25,-231 "/>
+<polyline fill="none" stroke="#0044a0" points="244.25,-263 248.25,-267 "/>
+<text text-anchor="middle" x="186" y="-244" font-family="Times,serif" font-size="20.00" fill="#0044a0">Task_to_run2</text>
+</g>
+<!-- t5->t6 -->
+<g id="edge3" class="edge"><title>t5->t6</title>
+<path fill="none" stroke="#0044a0" d="M186,-303.813C186,-295.789 186,-286.047 186,-277.069"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="189.5,-277.029 186,-267.029 182.5,-277.029 189.5,-277.029"/>
+</g>
+<!-- t8 -->
+<g id="node5" class="node"><title>t8</title>
+<polygon fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="312.25,-187 29.75,-187 25.75,-183 25.75,-151 308.25,-151 312.25,-155 312.25,-187"/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="308.25,-183 25.75,-183 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="308.25,-183 308.25,-151 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="308.25,-183 312.25,-187 "/>
+<text text-anchor="middle" x="169" y="-164" font-family="Times,serif" font-size="20.00" fill="#0044a0">Up_to_date_task_forced_to_rerun</text>
+</g>
+<!-- t6->t8 -->
+<g id="edge4" class="edge"><title>t6->t8</title>
+<path fill="none" stroke="#0044a0" d="M182.227,-230.689C180.092,-220.894 177.374,-208.422 174.958,-197.335"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="178.312,-196.288 172.762,-187.262 171.472,-197.778 178.312,-196.288"/>
+</g>
+<!-- t7 -->
+<g id="node6" class="node"><title>t7</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="455.25,-187 334.75,-187 330.75,-183 330.75,-151 451.25,-151 455.25,-155 455.25,-187"/>
+<polyline fill="none" stroke="#0044a0" points="451.25,-183 330.75,-183 "/>
+<polyline fill="none" stroke="#0044a0" points="451.25,-183 451.25,-151 "/>
+<polyline fill="none" stroke="#0044a0" points="451.25,-183 455.25,-187 "/>
+<text text-anchor="middle" x="393" y="-164" font-family="Times,serif" font-size="20.00" fill="#0044a0">Task_to_run3</text>
+</g>
+<!-- t6->t7 -->
+<g id="edge5" class="edge"><title>t6->t7</title>
+<path fill="none" stroke="#0044a0" d="M232.616,-230.946C258.658,-221.366 291.675,-209.132 321,-198 327.007,-195.72 333.287,-193.312 339.528,-190.905"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="341.194,-194.014 349.257,-187.141 338.668,-187.485 341.194,-194.014"/>
+</g>
+<!-- t9 -->
+<g id="node7" class="node"><title>t9</title>
+<polygon fill="#efa03b" stroke="black" points="335.5,-114 228.5,-114 224.5,-110 224.5,-78 331.5,-78 335.5,-82 335.5,-114"/>
+<polyline fill="none" stroke="black" points="331.5,-110 224.5,-110 "/>
+<polyline fill="none" stroke="black" points="331.5,-110 331.5,-78 "/>
+<polyline fill="none" stroke="black" points="331.5,-110 335.5,-114 "/>
+<text text-anchor="middle" x="280" y="-91" font-family="Times,serif" font-size="20.00">Final_target</text>
+</g>
+<!-- t8->t9 -->
+<g id="edge6" class="edge"><title>t8->t9</title>
+<path fill="none" stroke="#0044a0" d="M195.587,-150.994C210.307,-141.579 228.791,-129.755 244.707,-119.575"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="246.707,-122.45 253.245,-114.114 242.935,-116.554 246.707,-122.45"/>
+</g>
+<!-- t7->t9 -->
+<g id="edge7" class="edge"><title>t7->t9</title>
+<path fill="none" stroke="#0044a0" d="M365.934,-150.994C350.949,-141.579 332.132,-129.755 315.929,-119.575"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="317.567,-116.47 307.237,-114.114 313.842,-122.397 317.567,-116.47"/>
+</g>
+<!-- t10 -->
+<g id="node11" class="node"><title>t10</title>
+<polygon fill="white" stroke="gray" points="254.25,-52 19.75,-52 15.75,-48 15.75,-16 250.25,-16 254.25,-20 254.25,-52"/>
+<polyline fill="none" stroke="gray" points="250.25,-48 15.75,-48 "/>
+<polyline fill="none" stroke="gray" points="250.25,-48 250.25,-16 "/>
+<polyline fill="none" stroke="gray" points="250.25,-48 254.25,-52 "/>
+<text text-anchor="middle" x="135" y="-29" font-family="Times,serif" font-size="20.00" fill="gray">Downstream_task1_ignored</text>
+</g>
+<!-- t9->t10 -->
+<g id="edge11" class="edge"><title>t9->t10</title>
+<path fill="none" stroke="gray" d="M238.837,-77.9669C222.306,-71.1265 203.083,-63.1724 185.703,-55.9807"/>
+<polygon fill="gray" stroke="gray" points="186.845,-52.6652 176.266,-52.0757 184.168,-59.1333 186.845,-52.6652"/>
+</g>
+<!-- t11 -->
+<g id="node12" class="node"><title>t11</title>
+<polygon fill="white" stroke="gray" points="511.25,-52 276.75,-52 272.75,-48 272.75,-16 507.25,-16 511.25,-20 511.25,-52"/>
+<polyline fill="none" stroke="gray" points="507.25,-48 272.75,-48 "/>
+<polyline fill="none" stroke="gray" points="507.25,-48 507.25,-16 "/>
+<polyline fill="none" stroke="gray" points="507.25,-48 511.25,-52 "/>
+<text text-anchor="middle" x="392" y="-29" font-family="Times,serif" font-size="20.00" fill="gray">Downstream_task2_ignored</text>
+</g>
+<!-- t9->t11 -->
+<g id="edge12" class="edge"><title>t9->t11</title>
+<path fill="none" stroke="gray" d="M311.795,-77.9669C323.973,-71.4432 338.041,-63.9065 350.963,-56.9843"/>
+<polygon fill="gray" stroke="gray" points="352.963,-59.8831 360.0px5,-52.0757 349.658,-53.7128 352.963,-59.8831"/>
+</g>
+<!-- t2 -->
+<g id="node9" class="node"><title>t2</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="476.25,-340 321.75,-340 317.75,-336 317.75,-304 472.25,-304 476.25,-308 476.25,-340"/>
+<polyline fill="none" stroke="#006000" points="472.25,-336 317.75,-336 "/>
+<polyline fill="none" stroke="#006000" points="472.25,-336 472.25,-304 "/>
+<polyline fill="none" stroke="#006000" points="472.25,-336 476.25,-340 "/>
+<text text-anchor="middle" x="397" y="-317" font-family="Times,serif" font-size="20.00" fill="#006000">Up_to_date_task3</text>
+</g>
+<!-- t1->t2 -->
+<g id="edge9" class="edge"><title>t1->t2</title>
+<path fill="none" stroke="gray" d="M397,-365.982C397,-360.465 397,-355.126 397,-350.196"/>
+<polygon fill="gray" stroke="gray" points="400.5,-350.148 397,-340.148 393.5,-350.148 400.5,-350.148"/>
+</g>
+<!-- t3 -->
+<g id="node10" class="node"><title>t3</title>
+<polygon fill="#efa03b" stroke="#006000" points="501.25,-267 296.75,-267 292.75,-263 292.75,-231 497.25,-231 501.25,-235 501.25,-267"/>
+<polyline fill="none" stroke="#006000" points="497.25,-263 292.75,-263 "/>
+<polyline fill="none" stroke="#006000" points="497.25,-263 497.25,-231 "/>
+<polyline fill="none" stroke="#006000" points="497.25,-263 501.25,-267 "/>
+<text text-anchor="middle" x="397" y="-244" font-family="Times,serif" font-size="20.00" fill="#006000">Up_to_date_final_target</text>
+</g>
+<!-- t2->t3 -->
+<g id="edge10" class="edge"><title>t2->t3</title>
+<path fill="none" stroke="gray" d="M397,-303.813C397,-295.789 397,-286.047 397,-277.069"/>
+<polygon fill="gray" stroke="gray" points="400.5,-277.029 397,-267.029 393.5,-277.029 400.5,-277.029"/>
+</g>
+<!-- k1_1 -->
+<g id="node13" class="node"><title>k1_1</title>
+<polygon fill="white" stroke="gray" points="642.5,-571.5 545.5,-571.5 541.5,-567.5 541.5,-528.5 638.5,-528.5 642.5,-532.5 642.5,-571.5"/>
+<polyline fill="none" stroke="gray" points="638.5,-567.5 541.5,-567.5 "/>
+<polyline fill="none" stroke="gray" points="638.5,-567.5 638.5,-528.5 "/>
+<polyline fill="none" stroke="gray" points="638.5,-567.5 642.5,-571.5 "/>
+<text text-anchor="middle" x="592" y="-546.3" font-family="Times,serif" font-size="14.00" fill="gray">Down stream</text>
+</g>
+<!-- k2_1 -->
+<g id="node14" class="node"><title>k2_1</title>
+<polygon fill="#b8cc6e" stroke="#006000" points="648.25,-457.5 539.75,-457.5 535.75,-453.5 535.75,-414.5 644.25,-414.5 648.25,-418.5 648.25,-457.5"/>
+<polyline fill="none" stroke="#006000" points="644.25,-453.5 535.75,-453.5 "/>
+<polyline fill="none" stroke="#006000" points="644.25,-453.5 644.25,-414.5 "/>
+<polyline fill="none" stroke="#006000" points="644.25,-453.5 648.25,-457.5 "/>
+<text text-anchor="middle" x="592" y="-432.3" font-family="Times,serif" font-size="14.00" fill="#006000">Up-to-date task</text>
+</g>
+<!-- k1_1->k2_1 -->
+<g id="edge13" class="edge"><title>k1_1->k2_1</title>
+<path fill="none" stroke="gray" d="M592,-528.457C592,-511.513 592,-487.212 592,-467.917"/>
+<polygon fill="gray" stroke="gray" points="595.5,-467.792 592,-457.792 588.5,-467.792 595.5,-467.792"/>
+</g>
+<!-- k3_1 -->
+<g id="node15" class="node"><title>k3_1</title>
+<polygon fill="#ebf3ff" stroke="#0044a0" points="637.25,-343.5 550.75,-343.5 546.75,-339.5 546.75,-300.5 633.25,-300.5 637.25,-304.5 637.25,-343.5"/>
+<polyline fill="none" stroke="#0044a0" points="633.25,-339.5 546.75,-339.5 "/>
+<polyline fill="none" stroke="#0044a0" points="633.25,-339.5 633.25,-300.5 "/>
+<polyline fill="none" stroke="#0044a0" points="633.25,-339.5 637.25,-343.5 "/>
+<text text-anchor="middle" x="592" y="-318.3" font-family="Times,serif" font-size="14.00" fill="#0044a0">Task to run</text>
+</g>
+<!-- k2_1->k3_1 -->
+<g id="edge14" class="edge"><title>k2_1->k3_1</title>
+<path fill="none" stroke="gray" d="M592,-414.457C592,-397.513 592,-373.212 592,-353.917"/>
+<polygon fill="gray" stroke="gray" points="595.5,-353.792 592,-343.792 588.5,-353.792 595.5,-353.792"/>
+</g>
+<!-- k4_1 -->
+<g id="node16" class="node"><title>k4_1</title>
+<polygon fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="648.25,-278 539.75,-278 535.75,-274 535.75,-220 644.25,-220 648.25,-224 648.25,-278"/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="644.25,-274 535.75,-274 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="644.25,-274 644.25,-220 "/>
+<polyline fill="none" stroke="#0044a0" stroke-dasharray="5,2" points="644.25,-274 648.25,-278 "/>
+<text text-anchor="middle" x="592" y="-252.8" font-family="Times,serif" font-size="14.00" fill="#0044a0">Up-to-date task</text>
+<text text-anchor="middle" x="592" y="-237.8" font-family="Times,serif" font-size="14.00" fill="#0044a0">forced to rerun</text>
+</g>
+<!-- k3_1->k4_1 -->
+<g id="edge15" class="edge"><title>k3_1->k4_1</title>
+<path fill="none" stroke="#0044a0" d="M592,-300.464C592,-296.656 592,-292.568 592,-288.426"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="595.5,-288.149 592,-278.149 588.5,-288.149 595.5,-288.149"/>
+</g>
+<!-- k5_1 -->
+<g id="node17" class="node"><title>k5_1</title>
+<polygon fill="#efa03b" stroke="#006000" points="637.25,-198 550.75,-198 546.75,-194 546.75,-140 633.25,-140 637.25,-144 637.25,-198"/>
+<polyline fill="none" stroke="#006000" points="633.25,-194 546.75,-194 "/>
+<polyline fill="none" stroke="#006000" points="633.25,-194 633.25,-140 "/>
+<polyline fill="none" stroke="#006000" points="633.25,-194 637.25,-198 "/>
+<text text-anchor="middle" x="592" y="-172.8" font-family="Times,serif" font-size="14.00" fill="#006000">Up-to-date</text>
+<text text-anchor="middle" x="592" y="-157.8" font-family="Times,serif" font-size="14.00" fill="#006000">Final target</text>
+</g>
+<!-- k4_1->k5_1 -->
+<g id="edge16" class="edge"><title>k4_1->k5_1</title>
+<path fill="none" stroke="#0044a0" d="M592,-219.84C592,-216.085 592,-212.184 592,-208.292"/>
+<polygon fill="#0044a0" stroke="#0044a0" points="595.5,-208.118 592,-198.118 588.5,-208.118 595.5,-208.118"/>
+</g>
+<!-- k6_1 -->
+<g id="node18" class="node"><title>k6_1</title>
+<polygon fill="#efa03b" stroke="black" points="637.25,-117.5 550.75,-117.5 546.75,-113.5 546.75,-74.5 633.25,-74.5 637.25,-78.5 637.25,-117.5"/>
+<polyline fill="none" stroke="black" points="633.25,-113.5 546.75,-113.5 "/>
+<polyline fill="none" stroke="black" points="633.25,-113.5 633.25,-74.5 "/>
+<polyline fill="none" stroke="black" points="633.25,-113.5 637.25,-117.5 "/>
+<text text-anchor="middle" x="592" y="-92.3" font-family="Times,serif" font-size="14.00">Final target</text>
+</g>
+<!-- k5_1->k6_1 -->
+<g id="edge17" class="edge"><title>k5_1->k6_1</title>
+<path fill="none" stroke="gray" d="M592,-139.939C592,-135.985 592,-131.903 592,-127.9"/>
+<polygon fill="gray" stroke="gray" points="595.5,-127.631 592,-117.632 588.5,-127.632 595.5,-127.631"/>
+</g>
+</g>
+</svg>
diff --git a/doc/images/jobs_limit.png b/doc/images/jobs_limit.png
new file mode 100644
index 0000000..4caac7d
Binary files /dev/null and b/doc/images/jobs_limit.png differ
diff --git a/doc/images/jobs_limit2.png b/doc/images/jobs_limit2.png
new file mode 100644
index 0000000..9455627
Binary files /dev/null and b/doc/images/jobs_limit2.png differ
diff --git a/doc/images/logo.jpg b/doc/images/logo.jpg
new file mode 100644
index 0000000..4d4b1ab
Binary files /dev/null and b/doc/images/logo.jpg differ
diff --git a/doc/images/manual_dependencies_flowchart.png.py b/doc/images/manual_dependencies_flowchart.png.py
new file mode 100755
index 0000000..6c22b12
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart.png.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+import sys, os
+
+# add self to search path for testing
+if __name__ == '__main__':
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# Use import path from <<../python_modules>>
+if __name__ == '__main__':
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"../..")))
+
+from ruffus import *
+import json
+
+import time
+def task_helper(infile, outfile):
+ """
+ cat input file content to output file
+ after writing out job parameters
+ """
+ if infile:
+ output_text = "".join(sorted(open(infile).readlines()))
+ else:
+ output_text = "None"
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task1
+#
+ at files(None, 'a.1')
+def task1(infile, outfile):
+ """
+ First task
+ """
+ task_helper(infile, outfile)
+
+
+
+#
+# task2
+#
+ at transform(task1, regex(r'.1'), '.2')
+def task2(infile, outfile):
+ """
+ Second task
+ """
+ task_helper(infile, outfile)
+
+
+
+#
+# task3
+#
+ at transform(task2, regex(r'.2'), '.3')
+def task3(infile, outfile):
+ """
+ Third task
+ """
+ task_helper(infile, outfile)
+
+
+
+#
+# task4
+#
+ at transform(task3, regex(r'.3'), '.4')
+def task4(infile, outfile):
+ """
+ Fourth task
+ """
+ task_helper(infile, outfile)
+
+for f in range(4):
+ fn = "a.%d" % (f + 1)
+ if os.path.exists(fn):
+ os.unlink(fn)
+pipeline_printout_graph ("manual_dependencies_flowchart1.png", "png", [task4], dpi = 72, size = (2,2))
+pipeline_run([task4])
+pipeline_printout_graph ("manual_dependencies_flowchart2.png", "png", [task4], dpi = 72, size = (2,2))
+open("a.1", "w")
+open("a.3", "w")
+pipeline_printout_graph ("manual_dependencies_flowchart3.png", "png", [task4], dpi = 72, size = (2,2))
+pipeline_printout_graph ("manual_dependencies_flowchart4.png", "png", [task4], [task1], dpi = 72, size = (2,2))
+for f in range(4):
+ fn = "a.%d" % (f + 1)
+ if os.path.exists(fn):
+ os.unlink(fn)
+
diff --git a/doc/images/manual_dependencies_flowchart1.dot b/doc/images/manual_dependencies_flowchart1.dot
new file mode 100644
index 0000000..d9daaeb
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart1.dot
@@ -0,0 +1,26 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=50;
+#rankdir="LR";
+ranksep = 0.1;
+sep = 0.001;
+esep = 0.001;
+width = 0.001;
+#rankdir="LR";
+subgraph clusterkey
+{
+fontsize=30;
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[fontcolor=blue, shape=plaintext, label="task1"];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=blue];
+t2[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t2[color=blue];
+t3[color=orange, fontcolor=orange, shape=tripleoctagon, label="task4"];
+t2 -> t3[color=blue];
+}
+}
diff --git a/doc/images/manual_dependencies_flowchart1.png b/doc/images/manual_dependencies_flowchart1.png
new file mode 100644
index 0000000..126851d
Binary files /dev/null and b/doc/images/manual_dependencies_flowchart1.png differ
diff --git a/doc/images/manual_dependencies_flowchart2.dot b/doc/images/manual_dependencies_flowchart2.dot
new file mode 100644
index 0000000..ddd42f1
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart2.dot
@@ -0,0 +1,20 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.1;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[style=filled, color=olivedrab, label="task1", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t1[style=filled, color=olivedrab, label="task2", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t0 -> t1[color=gray, arrowtype=normal];
+t2[style=filled, color=olivedrab, label="task3", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t1 -> t2[color=gray, arrowtype=normal];
+t3[color=gray, fontcolor=gray, shape=tripleoctagon, label="task4"];
+t2 -> t3[color=gray, arrowtype=normal];
+}
+}
diff --git a/doc/images/manual_dependencies_flowchart2.png b/doc/images/manual_dependencies_flowchart2.png
new file mode 100644
index 0000000..a1911f1
Binary files /dev/null and b/doc/images/manual_dependencies_flowchart2.png differ
diff --git a/doc/images/manual_dependencies_flowchart3.dot b/doc/images/manual_dependencies_flowchart3.dot
new file mode 100644
index 0000000..efff3d9
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart3.dot
@@ -0,0 +1,20 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.1;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[style=filled, color=olivedrab, label="task1", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=gray, arrowtype=normal];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task3"];
+t1 -> t2[color=blue];
+t3[color=orange, fontcolor=orange, shape=tripleoctagon, label="task4"];
+t2 -> t3[color=blue];
+}
+}
diff --git a/doc/images/manual_dependencies_flowchart3.png b/doc/images/manual_dependencies_flowchart3.png
new file mode 100644
index 0000000..0015bc5
Binary files /dev/null and b/doc/images/manual_dependencies_flowchart3.png differ
diff --git a/doc/images/manual_dependencies_flowchart4.dot b/doc/images/manual_dependencies_flowchart4.dot
new file mode 100644
index 0000000..a1ff9a4
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart4.dot
@@ -0,0 +1,30 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=30;
+ranksep = 0.1;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=20];
+t0[style=filled, color=olivedrab, label="up-to-date\ntask1", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=gray, arrowtype=normal];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task5"];
+t1 -> t2[color=blue];
+t3[color=orange, fontcolor=orange, shape=tripleoctagon, label="final_task"];
+t2 -> t3[color=blue];
+
+
+t4[style=filled, color=olivedrab, label="up-to-date\ntask3", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t5[style=filled, color=olivedrab, label="up-to-date\ntask4", shape=octagon, fillcolor=olivedrab, fontcolor=black];
+t4 -> t5[color=gray, arrowtype=normal];
+t6[fontcolor=blue, shape=plaintext, label="task6"];
+t5 -> t6[color=gray, arrowtype=normal];
+t6 -> t2[color=blue];
+
+
+}
+}
diff --git a/doc/images/manual_dependencies_flowchart4.png b/doc/images/manual_dependencies_flowchart4.png
new file mode 100644
index 0000000..9464a64
Binary files /dev/null and b/doc/images/manual_dependencies_flowchart4.png differ
diff --git a/doc/images/manual_dependencies_flowchart_intro.png b/doc/images/manual_dependencies_flowchart_intro.png
new file mode 100644
index 0000000..d95dfe9
Binary files /dev/null and b/doc/images/manual_dependencies_flowchart_intro.png differ
diff --git a/doc/images/manual_dependencies_flowchart_intro.png.py b/doc/images/manual_dependencies_flowchart_intro.png.py
new file mode 100755
index 0000000..739b091
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart_intro.png.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+import sys, os
+
+# add self to search path for testing
+if __name__ == '__main__':
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# Use import path from <<../python_modules>>
+if __name__ == '__main__':
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"../..")))
+
+from ruffus import *
+import json
+
+
+
+#
+# up_to_date_task1
+#
+ at files(None, 'a.1')
+def up_to_date_task1(infile, outfile):
+ open(outfile, "w")
+
+
+
+#
+# up_to_date_task2
+#
+ at transform(up_to_date_task1, suffix('.1'), '.2')
+def up_to_date_task2(infile, outfile):
+ open(outfile, "w")
+
+#
+# task3
+#
+ at transform(up_to_date_task2, suffix('.2'), '.3')
+def task3(infile, outfile):
+ open(outfile, "w")
+
+#
+# up_to_date_task4
+#
+ at files(None, 'a.4')
+def up_to_date_task4(infile, outfile):
+ open(outfile, "w")
+
+#
+# task5
+#
+ at transform(up_to_date_task4, suffix('.4'), '.5')
+def task5(infile, outfile):
+ open(outfile, "w")
+
+#
+# task6
+#
+ at follows(task3)
+ at transform([task5], suffix('.5'), '.6')
+def task6(infile, outfile):
+ open(outfile, "w")
+
+#
+# task7
+#
+ at transform(task6, suffix('.6'), '.7')
+def final_task(infile, outfile):
+ open(outfile, "w")
+
+
+
+for f in range(7):
+ fn = "a.%d" % (f + 1)
+ if os.path.exists(fn):
+ os.unlink(fn)
+import time
+open("a.3", "w")
+open("a.7", "w")
+open("a.5", "w")
+time.sleep(1)
+open("a.1", "w")
+time.sleep(1)
+open("a.2", "w")
+time.sleep(1)
+open("a.4", "w")
+time.sleep(1)
+open("a.6", "w")
+time.sleep(1)
+pipeline_printout_graph ("manual_dependencies_flowchart_intro.png", "png", [final_task], dpi = 72, size = (2,2))
+#pipeline_printout (sys.stdout, [final_task], verbose = 5)
+for f in range(7):
+ fn = "a.%d" % (f + 1)
+ if os.path.exists(fn):
+ os.unlink(fn)
+
diff --git a/doc/images/manual_dependencies_flowchart_legend.dot b/doc/images/manual_dependencies_flowchart_legend.dot
new file mode 100644
index 0000000..a55bfca
--- /dev/null
+++ b/doc/images/manual_dependencies_flowchart_legend.dot
@@ -0,0 +1,30 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=50;
+#rankdir="LR";
+ranksep = 0.2;
+sep = 0.001;
+esep = 0.001;
+width = 0.001;
+#rankdir="LR";
+subgraph clusterkey
+{
+style=filled;
+fontsize=20;
+color=gray97;
+label = "Key:";
+rft[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=13, label="Final target"];
+uft[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=13, label="Up-to-date Final target"];
+run[fontcolor=blue, shape=plaintext, fontsize=13, label="Task to run"];
+ufr[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=13, label="Up-to-date\nforced to rerun"];
+utd[style=filled, color=olivedrab, label="Up-to-date", shape=octagon, fontsize=13, fillcolor=olivedrab, fontcolor=black];
+run->ufr->rft[color=blue];
+rft->uft[color=gray];
+utd->run[color=gray];
+
+
+}
+
+}
diff --git a/doc/images/manual_dependencies_flowchart_legend.png b/doc/images/manual_dependencies_flowchart_legend.png
new file mode 100644
index 0000000..6625647
Binary files /dev/null and b/doc/images/manual_dependencies_flowchart_legend.png differ
diff --git a/doc/images/manual_exceptions.png b/doc/images/manual_exceptions.png
new file mode 100644
index 0000000..f1dd840
Binary files /dev/null and b/doc/images/manual_exceptions.png differ
diff --git a/doc/images/manual_follows1.jpg b/doc/images/manual_follows1.jpg
new file mode 100644
index 0000000..d886314
Binary files /dev/null and b/doc/images/manual_follows1.jpg differ
diff --git a/doc/images/manual_follows1.png b/doc/images/manual_follows1.png
new file mode 100644
index 0000000..583ad7f
Binary files /dev/null and b/doc/images/manual_follows1.png differ
diff --git a/doc/images/manual_follows1.png.py b/doc/images/manual_follows1.png.py
new file mode 100755
index 0000000..7c71dda
--- /dev/null
+++ b/doc/images/manual_follows1.png.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+from ruffus import *
+import sys
+
+def first_task():
+ print "First task"
+
+ at follows(first_task)
+def second_task():
+ print "Second task"
+
+ at follows(second_task)
+def final_task():
+ print "Final task"
+pipeline_printout_graph ( "manual_follows1.png",
+ "png",
+ [final_task],
+ no_key_legend=True)
+
diff --git a/doc/images/manual_split_merge_example.jpg b/doc/images/manual_split_merge_example.jpg
new file mode 100644
index 0000000..ab9e5bb
Binary files /dev/null and b/doc/images/manual_split_merge_example.jpg differ
diff --git a/doc/images/manual_transform.png b/doc/images/manual_transform.png
new file mode 100644
index 0000000..9d89086
Binary files /dev/null and b/doc/images/manual_transform.png differ
diff --git a/doc/images/manual_transform_complex_outputs.png b/doc/images/manual_transform_complex_outputs.png
new file mode 100644
index 0000000..0b00542
Binary files /dev/null and b/doc/images/manual_transform_complex_outputs.png differ
diff --git a/doc/images/pretty_flowchart.png b/doc/images/pretty_flowchart.png
new file mode 100644
index 0000000..200338a
Binary files /dev/null and b/doc/images/pretty_flowchart.png differ
diff --git a/doc/images/pretty_flowchart.png.py b/doc/images/pretty_flowchart.png.py
new file mode 100755
index 0000000..e19bd4f
--- /dev/null
+++ b/doc/images/pretty_flowchart.png.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+import sys, os
+
+# add self to search path for testing
+if __name__ == '__main__':
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# Use import path from <<../python_modules>>
+if __name__ == '__main__':
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"../..")))
+
+from ruffus import *
+import json
+
+
+def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+ at follows(Explicitly_specified_task)
+def Task_to_run(infile, outfile):
+ pass
+
+ at follows(Task_to_run)
+def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+ at follows(Up_to_date_task_forced_to_rerun)
+def Final_target(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task2_ignored(infile, outfile):
+ pass
+
+pipeline_printout_graph ("pretty_flowchart.png", "png", [Final_target], [Explicitly_specified_task], dpi = 72, size = (2,2), draw_vertically = False, no_key_legend = True)
+
diff --git a/doc/images/simple_tutorial_complex_flowchart.dot b/doc/images/simple_tutorial_complex_flowchart.dot
new file mode 100644
index 0000000..1e47b27
--- /dev/null
+++ b/doc/images/simple_tutorial_complex_flowchart.dot
@@ -0,0 +1,67 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=50;
+ranksep = 0.3;
+#rankdir="LR";
+subgraph clustertasks
+{
+label = "Pipeline:";
+node[fontsize=40];
+t0[fontcolor=blue, shape=plaintext, label="task1"];
+t2[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task7"];
+t0 -> t2[color=blue];
+t3[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task8"];
+t2 -> t3[color=blue];
+t4[fontcolor=blue, shape=plaintext, label="task9"];
+t3 -> t4[color=blue];
+t5[fontcolor=blue, shape=plaintext, label="task10"];
+t4 -> t5[color=blue];
+t6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task11"];
+t5 -> t6[color=blue];
+t1[fontcolor=blue, shape=plaintext, label="task2"];
+t0 -> t1[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t10[fontcolor=blue, shape=plaintext, label="task5"];
+t9 -> t10[color=blue];
+t11[fontcolor=blue, shape=plaintext, label="task6"];
+t10 -> t11[color=blue];
+t7[fontcolor=blue, shape=plaintext, label="task12"];
+t11 -> t7[color=blue];
+t6 -> t7[color=blue];
+t13[fontcolor=blue, shape=plaintext, label="task18"];
+t7 -> t13[color=blue];
+t19[fontcolor=blue, shape=plaintext, label="task19"];
+t13 -> t19[color=blue];
+t20[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task20"];
+t19 -> t20[color=blue];
+t21[fontcolor=blue, shape=plaintext, label="task21"];
+t20 -> t21[color=blue];
+t8 -> t21[color=blue];
+t22[fontcolor=blue, shape=plaintext, label="task22"];
+t21 -> t22[color=blue];
+t23[fontcolor=blue, shape=plaintext, label="task23"];
+t22 -> t23[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[color=olivedrab, fontcolor=blue, shape=tripleoctagon, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[fontcolor=blue, shape=plaintext, label="task17"];
+t16 -> t17[color=blue];
+t18[fontcolor=blue, shape=plaintext, label="task24"];
+t17 -> t18[color=blue];
+t24[color=orange, fontcolor=orange, shape=tripleoctagon, label="task25"];
+t23 -> t24[color=blue];
+t18 -> t24[color=blue];
+}
+}
\ No newline at end of file
diff --git a/doc/images/simple_tutorial_complex_flowchart.png b/doc/images/simple_tutorial_complex_flowchart.png
new file mode 100644
index 0000000..63b4d85
Binary files /dev/null and b/doc/images/simple_tutorial_complex_flowchart.png differ
diff --git a/doc/images/simple_tutorial_complex_flowchart.py b/doc/images/simple_tutorial_complex_flowchart.py
new file mode 100644
index 0000000..0cf8237
--- /dev/null
+++ b/doc/images/simple_tutorial_complex_flowchart.py
@@ -0,0 +1,437 @@
+#!/usr/bin/env python
+"""
+
+ test2.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test2
+#
+#
+# Copyright (c) 7/16/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+# add self to search path for testing
+if __name__ == '__main__':
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# Use import path from <<../python_modules>>
+if __name__ == '__main__':
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"../..")))
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %progs [options]")
+ parser.add_option("-i", "--input_file", dest="input_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of input file. "
+ "Defaults to reading from STDIN.")
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+ parser.add_option("-L", "--log_file", dest="log_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of log file")
+ parser.add_option("--skip_parameter_logging", dest="skip_parameter_logging",
+ action="store_true", default=False,
+ help="Do not print program parameters to log.")
+ parser.add_option("--debug", dest="debug",
+ action="count", default=0,
+ help="Set default program parameters in debugging mode.")
+
+
+
+
+
+
+
+ #
+ # pipeline
+ #
+ parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="N",
+ type="int",
+ help="Allow N jobs (commands) to run simultaneously.")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Don't actually run any commands; just print the pipeline.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+
+ #
+ # Less common pipeline options
+ #
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+ parser.add_option("--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+ parser.add_option("--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+
+ # get help string
+ f =StringIO.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+ original_args = " ".join(sys.argv)
+ (options, remaining_args) = parser.parse_args()
+
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Debug: Change these #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ if options.debug:
+ options.log_file = os.path.join("test2.log")
+ options.verbose = 5
+ options.log_parameters = True
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Debug: Change these #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ #
+ # mandatory options
+ #
+ mandatory_options = []
+ def check_mandatory_options (options, mandatory_options, helpstr):
+ """
+ Check if specified mandatory options have b een defined
+ """
+ missing_options = []
+ for o in mandatory_options:
+ if not getattr(options, o):
+ missing_options.append("--" + o)
+
+ if not len(missing_options):
+ return
+
+ raise Exception("Missing mandatory parameter%s: %s.\n\n%s\n\n" %
+ ("s" if len(missing_options) > 1 else "",
+ ", ".join(missing_options),
+ helpstr))
+ check_mandatory_options (options, mandatory_options, helpstr)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+#from json import dumps
+#from collections import defaultdict
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Logger
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+if __name__ == '__main__':
+ import logging
+ import logging.handlers
+
+ MESSAGE = 15
+ logging.addLevelName(MESSAGE, "MESSAGE")
+
+ def setup_std_logging (logger, log_file, verbose):
+ """
+ set up logging using programme options
+ """
+ class debug_filter(logging.Filter):
+ """
+ Ignore INFO messages
+ """
+ def filter(self, record):
+ return logging.INFO != record.levelno
+
+ class NullHandler(logging.Handler):
+ """
+ for when there is no logging
+ """
+ def emit(self, record):
+ pass
+
+ # We are interesting in all messages
+ logger.setLevel(logging.DEBUG)
+ has_handler = False
+
+ # log to file if that is specified
+ if log_file:
+ handler = logging.FileHandler(log_file, delay=False)
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s"))
+ handler.setLevel(MESSAGE)
+ logger.addHandler(handler)
+ has_handler = True
+
+ # log to stderr if verbose
+ if verbose:
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ if log_file:
+ stderrhandler.addFilter(debug_filter())
+ logger.addHandler(stderrhandler)
+ has_handler = True
+
+ # no logging
+ if not has_handler:
+ logger.addHandler(NullHandler())
+
+
+ #
+ # set up log
+ #
+ logger = logging.getLogger(module_name)
+ setup_std_logging(logger, options.log_file, options.verbose)
+
+ #
+ # Allow logging across Ruffus pipeline
+ #
+ def get_logger (logger_name, args):
+ return logger
+
+ from ruffus.proxy_logger import *
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (get_logger,
+ module_name,
+ {})
+
+ #
+ # log programme parameters
+ #
+ if not options.skip_parameter_logging:
+ programme_name = os.path.split(sys.argv[0])[1]
+ logger.info("%s %s" % (programme_name, original_args))
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ at follows("task2")
+ at files(None, "a.1")
+def task1(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task1, suffix("1"), "2")
+def task2(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task2, suffix("2"), "3")
+def task3(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task3, suffix("3"), "4")
+def task4(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task4, suffix("4"), "5")
+def task5(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task5, suffix("5"), "6")
+def task6(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task1, suffix("1"), "7")
+def task7(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task7, suffix("7"), "8")
+def task8(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task8, suffix("8"), "9")
+def task9(input_file, output_file):
+ open(output_file, "w")
+
+ at follows("task5")
+ at follows("task12")
+ at transform(task9, suffix("9"), "10")
+def task10(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task10, suffix("10"), "11")
+def task11(input_file, output_file):
+ open(output_file, "w")
+
+ at follows(task6)
+ at transform(task11, suffix("11"), "12")
+def task12(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task12, suffix("12"), "13")
+def task13(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task13, suffix("13"), "14")
+def task14(input_file, output_file):
+ open(output_file, "w")
+
+ at follows(task9)
+ at transform(task14, suffix("14"), "15")
+def task15(input_file, output_file):
+ open(output_file, "w")
+
+ at follows(task11)
+ at transform(task15, suffix("15"), "16")
+def task16(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task16, suffix("16"), "17")
+def task17(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task12, suffix("12"), "18")
+def task18(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task18, suffix("18"), "19")
+def task19(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task19, suffix("19"), "20")
+def task20(input_file, output_file):
+ open(output_file, "w")
+
+ at follows(task3)
+ at transform(task20, suffix("20"), "21")
+def task21(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task21, suffix("21"), "22")
+def task22(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task22, suffix("22"), "23")
+def task23(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task23, suffix("23"), "24")
+def task24(input_file, output_file):
+ open(output_file, "w")
+
+ at follows(task17)
+ at transform(task24, suffix("24"), "25")
+def task25(input_file, output_file):
+ open(output_file, "w")
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+if __name__ == '__main__':
+# debug code not run if called as a module
+#
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose=options.verbose)
+
+ elif options.flowchart:
+ pipeline_printout_graph ( open(options.flowchart, "w"),
+ os.path.splitext(options.flowchart)[1][1:],
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ no_key_legend = not options.key_legend_in_graph,
+ user_colour_scheme = {"colour_scheme_index":0},
+ pipeline_name = "Complicated pipeline:",
+ size = (6,5),
+ dpi = 72,
+ )
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks,
+ multiprocess = options.jobs,
+ logger = stderr_logger,
+ verbose = options.verbose)
+
+
+
+
+
+
+
+
diff --git a/doc/images/simple_tutorial_complex_flowchart_error.dot b/doc/images/simple_tutorial_complex_flowchart_error.dot
new file mode 100644
index 0000000..dd56238
--- /dev/null
+++ b/doc/images/simple_tutorial_complex_flowchart_error.dot
@@ -0,0 +1,70 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=50;
+ranksep = 0.3;
+subgraph clustertasks
+{
+label = "Pipeline:";
+edge[minlen=2];
+node[fontsize=40];
+t2[fontcolor=blue, shape=plaintext, label="task7"];
+t0 -> t2[color=blue];
+t3[fontcolor=blue, shape=plaintext, label="task8"];
+t2 -> t3[color=blue];
+t4[fontcolor=blue, shape=plaintext, label="task9"];
+t3 -> t4[color=blue];
+t4 -> t5[color=blue];
+t8[fontcolor=blue, shape=plaintext, label="task3"];
+t1 -> t8[color=blue];
+t9[fontcolor=blue, shape=plaintext, label="task4"];
+t8 -> t9[color=blue];
+t9 -> t10[color=blue];
+t13[fontcolor=blue, shape=plaintext, label="task18"];
+t7 -> t13[color=blue];
+t19[fontcolor=blue, shape=plaintext, label="task19"];
+t13 -> t19[color=blue];
+t20[fontcolor=blue, shape=plaintext, label="task20"];
+t19 -> t20[color=blue];
+t21[fontcolor=blue, shape=plaintext, label="task21"];
+t20 -> t21[color=blue];
+t8 -> t21[color=blue];
+t22[fontcolor=blue, shape=plaintext, label="task22"];
+t21 -> t22[color=blue];
+t23[fontcolor=blue, shape=plaintext, label="task23"];
+t22 -> t23[color=blue];
+t12[fontcolor=blue, shape=plaintext, label="task13"];
+t7 -> t12[color=blue];
+t14[fontcolor=blue, shape=plaintext, label="task14"];
+t12 -> t14[color=blue];
+t15[fontcolor=blue, shape=plaintext, label="task15"];
+t14 -> t15[color=blue];
+t4 -> t15[color=blue];
+t16[fontcolor=blue, shape=plaintext, label="task16"];
+t15 -> t16[color=blue];
+t6 -> t16[color=blue];
+t17[fontcolor=blue, shape=plaintext, label="task17"];
+t16 -> t17[color=blue];
+t18[fontcolor=blue, shape=plaintext, label="task24"];
+t17 -> t18[color=blue];
+t24[color=orange, fontcolor=orange, shape=tripleoctagon, label="task25"];
+t23 -> t24[color=blue];
+t18 -> t24[color=blue];
+t1[shape=box, style=filled, fillcolor=red, label="task2"];
+t0 -> t1[color=red ];
+t0[shape=box, style=filled, fillcolor=red, label="task1"];
+t1 -> t0[color=red , constraint=false];
+t5[shape=box, style=filled, fillcolor=red, label="task10"];
+t7 -> t5[color=red , constraint=false];
+t6[shape=box, style=filled, fillcolor=red, label="task11"];
+t5 -> t6[color=red ];
+t10[shape=box, style=filled, fillcolor=red, label="task5"];
+t5 -> t10[color=red ];
+t11[shape=box, style=filled, fillcolor=red, label="task6"];
+t10 -> t11[color=red ];
+t7[shape=box, style=filled, fillcolor=red, label="task12"];
+t11 -> t7[color=red , constraint=false];
+t6 -> t7[color=red ];
+}
+}
diff --git a/doc/images/simple_tutorial_complex_flowchart_error.png b/doc/images/simple_tutorial_complex_flowchart_error.png
new file mode 100644
index 0000000..cb1f604
Binary files /dev/null and b/doc/images/simple_tutorial_complex_flowchart_error.png differ
diff --git a/doc/images/simple_tutorial_complex_flowchart_error_with_key.png b/doc/images/simple_tutorial_complex_flowchart_error_with_key.png
new file mode 100644
index 0000000..7bb254e
Binary files /dev/null and b/doc/images/simple_tutorial_complex_flowchart_error_with_key.png differ
diff --git a/doc/images/simple_tutorial_complex_flowchart_with_key.png b/doc/images/simple_tutorial_complex_flowchart_with_key.png
new file mode 100644
index 0000000..2907e9f
Binary files /dev/null and b/doc/images/simple_tutorial_complex_flowchart_with_key.png differ
diff --git a/doc/images/simple_tutorial_decorator_syntax.png b/doc/images/simple_tutorial_decorator_syntax.png
new file mode 100644
index 0000000..e8614f1
Binary files /dev/null and b/doc/images/simple_tutorial_decorator_syntax.png differ
diff --git a/doc/images/simple_tutorial_files1.png b/doc/images/simple_tutorial_files1.png
new file mode 100644
index 0000000..fc5708e
Binary files /dev/null and b/doc/images/simple_tutorial_files1.png differ
diff --git a/doc/images/simple_tutorial_files2.png b/doc/images/simple_tutorial_files2.png
new file mode 100644
index 0000000..77e5723
Binary files /dev/null and b/doc/images/simple_tutorial_files2.png differ
diff --git a/doc/images/simple_tutorial_files3.png b/doc/images/simple_tutorial_files3.png
new file mode 100644
index 0000000..bee05dc
Binary files /dev/null and b/doc/images/simple_tutorial_files3.png differ
diff --git a/doc/images/simple_tutorial_files4.png b/doc/images/simple_tutorial_files4.png
new file mode 100644
index 0000000..4dc023c
Binary files /dev/null and b/doc/images/simple_tutorial_files4.png differ
diff --git a/doc/images/simple_tutorial_files5.png b/doc/images/simple_tutorial_files5.png
new file mode 100644
index 0000000..731f5fd
Binary files /dev/null and b/doc/images/simple_tutorial_files5.png differ
diff --git a/doc/images/simple_tutorial_flowchart_legend.dot b/doc/images/simple_tutorial_flowchart_legend.dot
new file mode 100644
index 0000000..f9455c4
--- /dev/null
+++ b/doc/images/simple_tutorial_flowchart_legend.dot
@@ -0,0 +1,24 @@
+digraph tree
+{
+size="8,11!";
+splines=true;
+fontsize=50;
+ranksep = 0.3;
+#rankdir="LR";
+subgraph clusterkey
+{
+style=filled;
+fontsize=50;
+color=gray90;
+label = "Key:";
+node[fontsize=40];
+k1[color=orange, fontcolor=orange, shape=tripleoctagon, fontsize=15, label="Final target"];
+k2[shape=box, style=filled, fontsize=15, fillcolor=red, label="Vicious cycle"];
+k3[fontcolor=blue, shape=plaintext, fontsize=15, label="Task to run"];
+k4[color=blue, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Force pipeline run from this task"];
+k5[color=gray, fontcolor=gray, shape=tripleoctagon, fontsize=15, label="Up-to-date Final target"];
+k6[color=olivedrab, fontcolor=blue, shape=tripleoctagon, fontsize=15, label="Up-to-date task forced to rerun"];
+k7[style=filled, color=olivedrab, label="Up-to-date task", shape=octagon, fontsize=15, fillcolor=olivedrab, fontcolor=black];
+k8[style=filled, color=gray, label="Up-to-date dependence", shape=octagon, fontsize=15, fillcolor=white, fontcolor=gray];
+k1->k2[color=red];k2->k1 [color=red];k2->k3->k4->k5[color=blue];k5->k6->k7->k8[color=gray];}
+}
diff --git a/doc/images/simple_tutorial_flowchart_legend.png b/doc/images/simple_tutorial_flowchart_legend.png
new file mode 100644
index 0000000..ef34474
Binary files /dev/null and b/doc/images/simple_tutorial_flowchart_legend.png differ
diff --git a/doc/images/simple_tutorial_follows.png b/doc/images/simple_tutorial_follows.png
new file mode 100644
index 0000000..626a1ad
Binary files /dev/null and b/doc/images/simple_tutorial_follows.png differ
diff --git a/doc/images/simple_tutorial_hello_world.png b/doc/images/simple_tutorial_hello_world.png
new file mode 100644
index 0000000..78aa28d
Binary files /dev/null and b/doc/images/simple_tutorial_hello_world.png differ
diff --git a/doc/images/simple_tutorial_hello_world_output.png b/doc/images/simple_tutorial_hello_world_output.png
new file mode 100644
index 0000000..c98ec63
Binary files /dev/null and b/doc/images/simple_tutorial_hello_world_output.png differ
diff --git a/doc/images/simple_tutorial_intro_follows.png b/doc/images/simple_tutorial_intro_follows.png
new file mode 100644
index 0000000..dc46350
Binary files /dev/null and b/doc/images/simple_tutorial_intro_follows.png differ
diff --git a/doc/images/simple_tutorial_merge1.png b/doc/images/simple_tutorial_merge1.png
new file mode 100644
index 0000000..fe1b6ab
Binary files /dev/null and b/doc/images/simple_tutorial_merge1.png differ
diff --git a/doc/images/simple_tutorial_merge2.png b/doc/images/simple_tutorial_merge2.png
new file mode 100644
index 0000000..04835b7
Binary files /dev/null and b/doc/images/simple_tutorial_merge2.png differ
diff --git a/doc/images/simple_tutorial_pipeline_printout1.png b/doc/images/simple_tutorial_pipeline_printout1.png
new file mode 100644
index 0000000..73c1cd9
Binary files /dev/null and b/doc/images/simple_tutorial_pipeline_printout1.png differ
diff --git a/doc/images/simple_tutorial_pipeline_printout2.png b/doc/images/simple_tutorial_pipeline_printout2.png
new file mode 100644
index 0000000..bfe04ad
Binary files /dev/null and b/doc/images/simple_tutorial_pipeline_printout2.png differ
diff --git a/doc/images/simple_tutorial_pipeline_printout3.png b/doc/images/simple_tutorial_pipeline_printout3.png
new file mode 100644
index 0000000..96e0343
Binary files /dev/null and b/doc/images/simple_tutorial_pipeline_printout3.png differ
diff --git a/doc/images/simple_tutorial_posttask.png b/doc/images/simple_tutorial_posttask.png
new file mode 100644
index 0000000..c5e75d5
Binary files /dev/null and b/doc/images/simple_tutorial_posttask.png differ
diff --git a/doc/images/simple_tutorial_split.png b/doc/images/simple_tutorial_split.png
new file mode 100644
index 0000000..2f9d8d5
Binary files /dev/null and b/doc/images/simple_tutorial_split.png differ
diff --git a/doc/images/simple_tutorial_stage4_after.png b/doc/images/simple_tutorial_stage4_after.png
new file mode 100644
index 0000000..1095d57
Binary files /dev/null and b/doc/images/simple_tutorial_stage4_after.png differ
diff --git a/doc/images/simple_tutorial_stage4_before.png b/doc/images/simple_tutorial_stage4_before.png
new file mode 100644
index 0000000..1724e30
Binary files /dev/null and b/doc/images/simple_tutorial_stage4_before.png differ
diff --git a/doc/images/simple_tutorial_stage5_after.png b/doc/images/simple_tutorial_stage5_after.png
new file mode 100644
index 0000000..ac66986
Binary files /dev/null and b/doc/images/simple_tutorial_stage5_after.png differ
diff --git a/doc/images/simple_tutorial_stage5_before.png b/doc/images/simple_tutorial_stage5_before.png
new file mode 100644
index 0000000..96d5a4b
Binary files /dev/null and b/doc/images/simple_tutorial_stage5_before.png differ
diff --git a/doc/images/simple_tutorial_stage5_flowchart.png b/doc/images/simple_tutorial_stage5_flowchart.png
new file mode 100644
index 0000000..3714afb
Binary files /dev/null and b/doc/images/simple_tutorial_stage5_flowchart.png differ
diff --git a/doc/images/simple_tutorial_step2_ex1.png b/doc/images/simple_tutorial_step2_ex1.png
new file mode 100644
index 0000000..2620a92
Binary files /dev/null and b/doc/images/simple_tutorial_step2_ex1.png differ
diff --git a/doc/images/simple_tutorial_step2_ex2.png b/doc/images/simple_tutorial_step2_ex2.png
new file mode 100644
index 0000000..df5c741
Binary files /dev/null and b/doc/images/simple_tutorial_step2_ex2.png differ
diff --git a/doc/images/simple_tutorial_step3 copy.png b/doc/images/simple_tutorial_step3 copy.png
new file mode 100644
index 0000000..e4edcfd
Binary files /dev/null and b/doc/images/simple_tutorial_step3 copy.png differ
diff --git a/doc/images/simple_tutorial_step3.jpg b/doc/images/simple_tutorial_step3.jpg
new file mode 100644
index 0000000..eba0b47
Binary files /dev/null and b/doc/images/simple_tutorial_step3.jpg differ
diff --git a/doc/images/simple_tutorial_step4.png b/doc/images/simple_tutorial_step4.png
new file mode 100644
index 0000000..34a264a
Binary files /dev/null and b/doc/images/simple_tutorial_step4.png differ
diff --git a/doc/images/simple_tutorial_step5.png b/doc/images/simple_tutorial_step5.png
new file mode 100644
index 0000000..ce8b80e
Binary files /dev/null and b/doc/images/simple_tutorial_step5.png differ
diff --git a/doc/images/simple_tutorial_step5_sans_key.png b/doc/images/simple_tutorial_step5_sans_key.png
new file mode 100644
index 0000000..f14f775
Binary files /dev/null and b/doc/images/simple_tutorial_step5_sans_key.png differ
diff --git a/doc/images/simple_tutorial_transform.png b/doc/images/simple_tutorial_transform.png
new file mode 100644
index 0000000..6808632
Binary files /dev/null and b/doc/images/simple_tutorial_transform.png differ
diff --git a/doc/images/simple_tutorial_zoo_animals_formatter_example.jpg b/doc/images/simple_tutorial_zoo_animals_formatter_example.jpg
new file mode 100644
index 0000000..cfbfcc4
Binary files /dev/null and b/doc/images/simple_tutorial_zoo_animals_formatter_example.jpg differ
diff --git a/doc/images/src/Backup_of_complex_file_dag.cdr b/doc/images/src/Backup_of_complex_file_dag.cdr
new file mode 100644
index 0000000..af21bfd
Binary files /dev/null and b/doc/images/src/Backup_of_complex_file_dag.cdr differ
diff --git a/doc/images/src/Backup_of_complex_pipeline.cdr b/doc/images/src/Backup_of_complex_pipeline.cdr
new file mode 100644
index 0000000..7155f9e
Binary files /dev/null and b/doc/images/src/Backup_of_complex_pipeline.cdr differ
diff --git a/doc/images/src/complex_conceptual.cdr b/doc/images/src/complex_conceptual.cdr
new file mode 100644
index 0000000..cb5ce85
Binary files /dev/null and b/doc/images/src/complex_conceptual.cdr differ
diff --git a/doc/images/src/complex_file_dag.cdr b/doc/images/src/complex_file_dag.cdr
new file mode 100644
index 0000000..2a27b17
Binary files /dev/null and b/doc/images/src/complex_file_dag.cdr differ
diff --git a/doc/images/src/complex_pipeline.cdr b/doc/images/src/complex_pipeline.cdr
new file mode 100644
index 0000000..c8a5237
Binary files /dev/null and b/doc/images/src/complex_pipeline.cdr differ
diff --git a/doc/images/src/key.cdr b/doc/images/src/key.cdr
new file mode 100644
index 0000000..48123f7
Binary files /dev/null and b/doc/images/src/key.cdr differ
diff --git a/doc/images/theoretical_pipeline_schematic.png b/doc/images/theoretical_pipeline_schematic.png
new file mode 100644
index 0000000..a84cd9d
Binary files /dev/null and b/doc/images/theoretical_pipeline_schematic.png differ
diff --git a/doc/images/transform_1_to_1_example.png b/doc/images/transform_1_to_1_example.png
new file mode 100644
index 0000000..ed794d6
Binary files /dev/null and b/doc/images/transform_1_to_1_example.png differ
diff --git a/doc/images/tutorial_complete.jpg b/doc/images/tutorial_complete.jpg
new file mode 100644
index 0000000..35d5fc9
Binary files /dev/null and b/doc/images/tutorial_complete.jpg differ
diff --git a/doc/images/tutorial_force_from_task1.jpg b/doc/images/tutorial_force_from_task1.jpg
new file mode 100644
index 0000000..3444d56
Binary files /dev/null and b/doc/images/tutorial_force_from_task1.jpg differ
diff --git a/doc/images/tutorial_four_stage_pipeline.jpg b/doc/images/tutorial_four_stage_pipeline.jpg
new file mode 100644
index 0000000..1e6d1e0
Binary files /dev/null and b/doc/images/tutorial_four_stage_pipeline.jpg differ
diff --git a/doc/images/tutorial_key.jpg b/doc/images/tutorial_key.jpg
new file mode 100644
index 0000000..961aaf4
Binary files /dev/null and b/doc/images/tutorial_key.jpg differ
diff --git a/doc/images/tutorial_key.png b/doc/images/tutorial_key.png
new file mode 100644
index 0000000..a65505b
Binary files /dev/null and b/doc/images/tutorial_key.png differ
diff --git a/doc/images/tutorial_maximal_mode.jpg b/doc/images/tutorial_maximal_mode.jpg
new file mode 100644
index 0000000..8216807
Binary files /dev/null and b/doc/images/tutorial_maximal_mode.jpg differ
diff --git a/doc/images/tutorial_minimal_mode.jpg b/doc/images/tutorial_minimal_mode.jpg
new file mode 100644
index 0000000..3d5bdc5
Binary files /dev/null and b/doc/images/tutorial_minimal_mode.jpg differ
diff --git a/doc/images/tutorial_pipeline_key.jpg b/doc/images/tutorial_pipeline_key.jpg
new file mode 100644
index 0000000..961aaf4
Binary files /dev/null and b/doc/images/tutorial_pipeline_key.jpg differ
diff --git a/doc/images/tutorial_ruffus_files.jpg b/doc/images/tutorial_ruffus_files.jpg
new file mode 100644
index 0000000..00afb51
Binary files /dev/null and b/doc/images/tutorial_ruffus_files.jpg differ
diff --git a/doc/images/tutorial_step1_decorator_syntax.png b/doc/images/tutorial_step1_decorator_syntax.png
new file mode 100644
index 0000000..0fc68ab
Binary files /dev/null and b/doc/images/tutorial_step1_decorator_syntax.png differ
diff --git a/doc/images/tutorial_step1_follows.png b/doc/images/tutorial_step1_follows.png
new file mode 100644
index 0000000..3664730
Binary files /dev/null and b/doc/images/tutorial_step1_follows.png differ
diff --git a/doc/images/web_front_page.py b/doc/images/web_front_page.py
new file mode 100644
index 0000000..4eb8d28
--- /dev/null
+++ b/doc/images/web_front_page.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python
+"""
+
+ test2.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test2
+#
+#
+# Copyright (c) 7/16/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+# add self to search path for testing
+if __name__ == '__main__':
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# Use import path from <<../python_modules>>
+if __name__ == '__main__':
+ sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"../..")))
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %progs [options]")
+ parser.add_option("-i", "--input_file", dest="input_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of input file. "
+ "Defaults to reading from STDIN.")
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+ parser.add_option("-L", "--log_file", dest="log_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of log file")
+ parser.add_option("--skip_parameter_logging", dest="skip_parameter_logging",
+ action="store_true", default=False,
+ help="Do not print program parameters to log.")
+ parser.add_option("--debug", dest="debug",
+ action="count", default=0,
+ help="Set default program parameters in debugging mode.")
+
+
+
+
+
+
+
+ #
+ # pipeline
+ #
+ parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="N",
+ type="int",
+ help="Allow N jobs (commands) to run simultaneously.")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Don't actually run any commands; just print the pipeline.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+
+ #
+ # Less common pipeline options
+ #
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+ parser.add_option("--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+ parser.add_option("--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+
+ # get help string
+ f =StringIO.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+ original_args = " ".join(sys.argv)
+ (options, remaining_args) = parser.parse_args()
+
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Debug: Change these #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ options.flowchart = "front_page_flowchart.png"
+ options.key_legend_in_graph = True
+ if options.debug:
+ options.log_file = os.path.join("test2.log")
+ options.verbose = 5
+ options.log_parameters = True
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Debug: Change these #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ #
+ # mandatory options
+ #
+ mandatory_options = []
+ def check_mandatory_options (options, mandatory_options, helpstr):
+ """
+ Check if specified mandatory options have b een defined
+ """
+ missing_options = []
+ for o in mandatory_options:
+ if not getattr(options, o):
+ missing_options.append("--" + o)
+
+ if not len(missing_options):
+ return
+
+ raise Exception("Missing mandatory parameter%s: %s.\n\n%s\n\n" %
+ ("s" if len(missing_options) > 1 else "",
+ ", ".join(missing_options),
+ helpstr))
+ check_mandatory_options (options, mandatory_options, helpstr)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+#from json import dumps
+#from collections import defaultdict
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Logger
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+if __name__ == '__main__':
+ import logging
+ import logging.handlers
+
+ MESSAGE = 15
+ logging.addLevelName(MESSAGE, "MESSAGE")
+
+ def setup_std_logging (logger, log_file, verbose):
+ """
+ set up logging using programme options
+ """
+ class debug_filter(logging.Filter):
+ """
+ Ignore INFO messages
+ """
+ def filter(self, record):
+ return logging.INFO != record.levelno
+
+ class NullHandler(logging.Handler):
+ """
+ for when there is no logging
+ """
+ def emit(self, record):
+ pass
+
+ # We are interesting in all messages
+ logger.setLevel(logging.DEBUG)
+ has_handler = False
+
+ # log to file if that is specified
+ if log_file:
+ handler = logging.FileHandler(log_file, delay=False)
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s"))
+ handler.setLevel(MESSAGE)
+ logger.addHandler(handler)
+ has_handler = True
+
+ # log to stderr if verbose
+ if verbose:
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ if log_file:
+ stderrhandler.addFilter(debug_filter())
+ logger.addHandler(stderrhandler)
+ has_handler = True
+
+ # no logging
+ if not has_handler:
+ logger.addHandler(NullHandler())
+
+
+ #
+ # set up log
+ #
+ logger = logging.getLogger(module_name)
+ setup_std_logging(logger, options.log_file, options.verbose)
+
+ #
+ # Allow logging across Ruffus pipeline
+ #
+ def get_logger (logger_name, args):
+ return logger
+
+ from ruffus.proxy_logger import *
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (get_logger,
+ module_name,
+ {})
+
+ #
+ # log programme parameters
+ #
+ if not options.skip_parameter_logging:
+ programme_name = os.path.split(sys.argv[0])[1]
+ logger.info("%s %s" % (programme_name, original_args))
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ at files(None, "a.1")
+def task1(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task1, suffix("1"), "2")
+def task2(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task2, suffix("2"), "3")
+def task3(input_file, output_file):
+ open(output_file, "w")
+
+ at transform(task3, suffix("3"), "4")
+def task4(input_file, output_file):
+ open(output_file, "w")
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import time
+open("a.2", "w")
+time.sleep(1)
+open("a.1", "w")
+time.sleep(1)
+open("a.3", "w")
+pipeline_printout_graph ( open(options.flowchart, "w"),
+ os.path.splitext(options.flowchart)[1][1:],
+ [task4],
+ no_key_legend = not options.key_legend_in_graph,
+ user_colour_scheme = {"colour_scheme_index":0},
+ pipeline_name = "Pipeline Flowchart:",
+ size = (6,5),
+ dpi = 72,
+ )
+os.unlink("a.1")
+os.unlink("a.2")
+os.unlink("a.3")
+
+#pipeline_run(options.target_tasks, options.forced_tasks,
+# multiprocess = options.jobs,
+# logger = stderr_logger,
+# verbose = options.verbose)
+
+
+
+
+
+
+
+
diff --git a/doc/images/wikimedia_bandedkrait.jpg b/doc/images/wikimedia_bandedkrait.jpg
new file mode 100644
index 0000000..e227f02
Binary files /dev/null and b/doc/images/wikimedia_bandedkrait.jpg differ
diff --git a/doc/images/wikimedia_cyl_ruffus.jpg b/doc/images/wikimedia_cyl_ruffus.jpg
new file mode 100644
index 0000000..d60e3bc
Binary files /dev/null and b/doc/images/wikimedia_cyl_ruffus.jpg differ
diff --git a/doc/implementation_notes.rst b/doc/implementation_notes.rst
new file mode 100644
index 0000000..bd08d03
--- /dev/null
+++ b/doc/implementation_notes.rst
@@ -0,0 +1,442 @@
+##########################################
+Implementation Tips
+##########################################
+
+******************************************************************************
+Release
+******************************************************************************
+
+ * Change ``ruffus_version.py``
+
+ * Rebuild pdf and copy it to ``doc/static_data``
+
+ cd doc
+ make latexpdf
+ cp _build/latex/ruffus.pdf static_data
+
+ * Rebuild documentation::
+
+ make htmlsync
+
+ * tag git with, for example::
+
+ git tag -a v2.5 -m "Version 2.5"
+
+
+ * Upload to pypi::
+
+ python setup.py sdist --format=gztar upload
+
+ * Upload to repository::
+
+ git push googlecode
+ git push
+
+******************************************************************************
+dbdict.py
+******************************************************************************
+
+ This is an sqlite backed dictionary originally written by Jacob Sondergaard and
+ contributed by Jake Biesinger who added automatic pickling of python objects.
+
+ The pickling code was refactored out by Leo Goodstadt into separate functions as
+ part of the preparation to make Ruffus python3 ready.
+
+ Python original saved (pickled) objects as 7 bit ASCII strings. Later formats
+ (protocol = -1 is the latest format) uses 8 bit strings and are rather more efficient.
+
+ These then need to be saved as BLOBs to sqlite3 rather than normal strings. We
+ can signal this by wrapping the pickled string in a object providing a "buffer interface".
+ This is ``buffer`` in python2.6/2.7 and ``memoryview`` in python3.
+
+ http://bugs.python.org/issue7723 suggests there is no portable python2/3 way to write
+ blobs to Sqlite without these two incompatible wrappers.
+ This would require conditional compilation:
+
+ .. code-block:: python
+
+ if sys.hexversion >= 0x03000000:
+ value = memoryview(pickle.dumps(value, protocol = -1))
+ else:
+ value = buffer(pickle.dumps(value, protocol = -1))
+
+
+ Despite the discussion on the bug report, sqlite3.Binary seems to work.
+ We shall see if this is portable to python3.
+
+******************************************************************************
+how to write new decorators
+******************************************************************************
+
+
+ New placeholder class. E.g. for ``@new_deco``
+
+ .. code-block:: python
+
+ class new_deco(task_decorator):
+ pass
+
+ Add to list of action names and ids:
+
+ .. code-block:: python
+
+ action_names = ["unspecified",
+ ...
+ "task_new_deco",
+
+ action_task_new_deco = 15
+
+ Add function:
+
+ .. code-block:: python
+
+ def task_transform (self, orig_args):
+
+
+ Add documentation to:
+
+ * decorators/NEW_DECORATOR.rst
+ * decorators/decorators.rst
+ * _templates/layout.html
+ * manual
+
+
+
+
+##########################################
+Implementation notes
+##########################################
+
+N.B. Remember to cite Jake Biesinger and see if he is interested to be a co-author if we ever resubmit the drastically changed version...
+He contributed checkpointing, travis and tox etc.
+
+.. _todo.misfeatures:
+
+********************************************************************************************************
+``Ctrl-C`` handling
+********************************************************************************************************
+
+ Pressing ``Ctrl-C`` left dangling process in Ruffus 2.4 because ``KeyboardInterrupt`` does not play nice with python ``multiprocessing.Pool``
+ See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool/1408476#1408476
+
+ http://bryceboe.com/2012/02/14/python-multiprocessing-pool-and-keyboardinterrupt-revisited/ provides a reimplementation of Pool which
+ however only works when you have a fixed number of jobs which should then run in parallel to completion. Ruffus is considerably more
+ complicated because we have a variable number of jobs completing and being submitted into the job queue at any one time. Think
+ of tasks stalling waiting for the dependent tasks to complete and then all the jobs of the task being released onto the queue
+
+ The solution is
+
+ #. Use a ``timeout`` parameter when using ``IMapIterator.next(timeout=None)`` to iterate through ``pool.imap_unordered`` because only timed ``condition`` s can be interruptible by signals...!!
+ #. This involves rewriting the ``for`` loop manually as a ``while`` loop
+ #. We use a timeout of ``99999999``, i.e. 3 years, which should be enough for any job to complete...
+ #. Googling after the fact, it looks like the galaxy guys (cool dudes or what) have written similar `code <https://galaxy-dist.readthedocs.org/en/latest/_modules/galaxy/objectstore/s3_multipart_upload.html>`__
+ #. ``next()`` for normal iterators do not take ``timeout`` as an extra parameter so we have to wrap next in a conditional :-(. The galaxy guys do a `shim <http://en.wikipedia.org/wiki/Shim_(computing)>`__ around ``next()`` but that is as much obsfucation as a simple if...
+ #. After jobs are interrupted by a signal, we rethrow with our own exception because we want something that inherits from ``Exception`` unlike ``KeyboardInterrupt``
+ #. When a signal happens, we need to immediately stop ``feed_job_params_to_process_pool()`` from sending more parameters into the job queue (``parameter_q``)
+ We use a proxy to a ``multiprocessing.Event`` (via ``syncmanager.Event()``). When ``death_event`` is set, all further processing stops...
+ #. We also signal that all jobs should finish by putting ``all_tasks_complete()`` into ``parameter_q`` but only ``death_event`` prevents jobs already in the queue from going through
+ #. Ater signalling, some of the child processes appear to be dead by the time we start cleaning up. ``pool.terminate()`` sometimes tries and fails to
+ re-connect to the the ``death_event`` proxy via sockets and throws an exception. We should really figure out a better solution but in the meantime
+ wrapping it in a ``try / except`` allows a clean exit.
+ #. If a vanilla exception is raised without multiprocessing running, we still need to first save the exception in ``job_errors`` (even if it is just one) before
+ cleaning up, because the cleaning up process may lead to further (ignored) exceptions which would overwrite the current exception when we need to rethrow it
+
+
+ Exceptions thrown in the middle of a multiprocessing / multithreading job appear to be handled gracefully.
+
+ For drmaa jobs, ``qdel`` may still be necessary.
+
+
+******************************************************************************
+Python3 compatability
+******************************************************************************
+
+ Required extensive changes especially in unit test code.
+
+ Changes:
+
+ 1. ``sort`` in python3 does not order mixed types, i.e. ``int()``, ``list()`` and ``str()`` are incommensurate
+
+ * In ``task.get_output_files (...)``, sort after conversion to string
+
+ .. code-block:: python
+
+ sorted(self.output_filenames, key = lambda x: str(x))
+
+ * In ``file_name_parameters.py``: ``collate_param_factory (...)``, ``sort`` after conversion to string, then ``groupby`` without string conversion. This is
+ because we can't guarantee that two different objects do not have the same string representation. But ``groupby`` requires that similar things are adjacent...
+
+ In other words, ``groupby`` is a refinement of ``sorted``
+
+ .. code-block:: python
+
+ for output_extra_params, grouped_params in groupby(sorted(io_params_iter, key = get_output_extras_str), key = get_output_extras):
+ pass
+
+ 2. ``print()`` is a function
+
+ .. code-block:: python
+
+ from __future__ import print_function
+
+ 3. ``items()`` only returns a list in python2. Rewrite ``dict.iteritems()`` whenever this might cause a performance bottleneck
+ 4. ``zip`` and ``map`` return iterators. Conditionally import in python2
+
+ .. code-block:: python
+
+ import sys
+ if sys.hexversion < 0x03000000:
+ from future_builtins import zip, map
+
+ 5. ``cPickle->pickle`` ``CStringIO->io`` need to be conditionally imported
+
+ .. code-block:: python
+
+ try:
+ import StringIO as io
+ except:
+ import io as io
+
+
+ 6. ``map`` code can be changed to list comprehensions. Use ``2to3`` to do heavy lifting
+
+ 7. All normal strings are unicode in python3. Have to use ``bytes`` to support 8-bit char arrays.
+ Normally, this means that ``str`` "just works". However, to provide special handling of
+ both 8-bit and unicode strings in python2, we often need to check for ``isinstance(xxx, basestring)``.
+
+ We need to conditionally define:
+
+ .. code-block:: python
+
+ if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ path_str_type = str
+ else:
+ path_str_type = basestring
+
+ # further down...
+ if isinstance(compiled_regex, path_str_type):
+ pass
+
+
+
+******************************************************************************
+Refactoring: parameter handling
+******************************************************************************
+
+ Though the code is still split in a not very sensible way between ``ruffus_utility.py``, ``file_name_parameters.py`` and ``task.py``,
+ some rationalisation has taken place, and comments added so further refactoring can be made more easily.
+
+ Common code for::
+
+ file_name_parameters.split_ex_param_factory()
+ file_name_parameters.transform_param_factory()
+ file_name_parameters.collate_param_factory()
+
+ has been moved to ``file_name_parameters.py.yield_io_params_per_job()``
+
+
+ unit tests added to ``test_file_name_parameters.py`` and ``test_ruffus_utility.py``
+
+
+
+
+******************************************************************************
+``formatter``
+******************************************************************************
+ ``get_all_paths_components(paths, regex_str)`` in ``ruffus_utility.py``
+
+ Input files names are first squished into a flat list of files.
+ ``get_all_paths_components()`` returns both the regular expression matches and the break down of the path.
+
+ In case of name clashes, the classes with higher priority override:
+
+ 1) Captures by name
+ 2) Captures by index
+ 3) Path components:
+ 'ext' = extension with dot
+ 'basename' = file name without extension
+ 'path' = path before basename, not ending with slash
+ 'subdir' = list of directories starting with the most nested and ending with the root (if normalised)
+ 'subpath' = list of 'path' with successive directories removed starting with the most nested and ending with the root (if normalised)
+
+ E.g. ``name = '/a/b/c/sample1.bam'``, ``formatter=r"(.*)(?P<id>\d+)\.(.+)")`` returns:
+
+ .. code-block:: python
+
+ 0: '/a/b/c/sample1.bam', // Entire match captured by index
+ 1: '/a/b/c/sample', // captured by index
+ 2: 'bam', // captured by index
+ 'id': '1' // captured by name
+ 'ext': '.bam',
+ 'subdir': ['c', 'b', 'a', '/'],
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'basename': 'sample1',
+
+
+ The code is in ``ruffus_utility.py``:
+
+ .. code-block:: python
+
+ results = get_all_paths_components(paths, regex_str)
+ string.format(results[2])
+
+
+ All the magic is hidden inside black boxes ``filename_transform`` classes:
+
+ .. code-block:: python
+
+
+ class t_suffix_filename_transform(t_filename_transform):
+ class t_regex_filename_transform(t_filename_transform):
+ class t_format_filename_transform(t_filename_transform):
+
+===================================================
+``formatter()``: ``regex()`` and ``suffix()``
+===================================================
+
+
+ The previous behaviour with regex() where mismatches fail even if no substitution is made is retained by the use of ``re.subn()``.
+ This is a corner case but I didn't want user code to break
+
+ .. code-block:: python
+
+ # filter on ".txt"
+ input_filenames = ["a.wrong", "b.txt"]
+ regex("(.txt)$")
+
+ # fails, no substitution possible
+ r"\1"
+
+ # fails anyway even through regular expression matches not referenced...
+ r"output.filename"
+
+
+************************************************************************************************************************************************************
+ at product()
+************************************************************************************************************************************************************
+
+ * Use combinatoric generators from itertools and keep that naming scheme
+ * Put all new generators in an ``combinatorics`` submodule namespace to avoid breaking user code. (They can imported if necessary.)
+ * test code in test/test_combinatorics.py
+ * The ``itertools.product(repeat)`` parameter doesn't make sense for Ruffus and will not be used
+ * Flexible number of pairs of ``task`` / ``glob`` / file names + ``formatter()``
+ * Only ``formatter([OPTIONAl_REGEX])`` provides the necessary flexibility to construct the output so we won't bother with suffix and regex
+
+ * Similar to ``@transform`` but with extra level of nested-ness
+
+ Retain same code for ``@product`` and ``@transform`` by adding an additional level of indirection:
+ * generator wrap around ``get_strings_in_nested_sequence`` to convert nested input parameters either to a single flat list of file names or to nested lists of file names
+
+ .. code-block:: python
+
+ file_name_parameters.input_param_to_file_name_list (input_params)
+ file_name_parameters.list_input_param_to_file_name_list (input_params)
+
+ * ``t_file_names_transform`` class which stores a list of regular expressions, one for each ``formatter()`` object corresponding to a single set of input parameters
+
+ .. code-block:: python
+
+ t_formatter_file_names_transform
+ t_nested_formatter_file_names_transform
+
+ * string substitution functions which will apply a list of ``formatter`` changes
+
+ .. code-block:: python
+
+ ruffus.utility.t_formatter_replace()
+ ruffus.utility.t_nested_formatter_replace()
+
+ * ``ruffus_uilility.swap_doubly_nested_order()`` makes the syntax / implementation very orthogonal
+
+************************************************************************************************************************************************************
+``@permutations(...),`` ``@combinations(...),`` ``@combinations_with_replacement(...)``
+************************************************************************************************************************************************************
+
+ Similar to ``@product`` extra level of nested-ness is self versus self
+
+ Retain same code for ``@product``
+ * forward to a sinble ``file_name_parameters.combinatorics_param_factory()``
+ * use ``combinatorics_type`` to dispatch to ``combinatorics.permutations``, ``combinatorics.combinations`` and ``combinatorics.combinations_with_replacement``
+ * use ``list_input_param_to_file_name_list`` from ``file_name_parameters.product_param_factory()``
+
+
+
+************************************************************************************************************************************************************
+drmaa alternatives
+************************************************************************************************************************************************************
+
+ Alternative, non-drmaa polling code at
+
+ https://github.com/bjpop/rubra/blob/master/rubra/cluster_job.py
+
+
+
+************************************************************************************************************************************************************
+Task completion monitoring
+************************************************************************************************************************************************************
+
+===================================================
+ How easy is it to abstract out the database?
+===================================================
+
+ * The database is Jacob Sondergaard's ``dbdict`` which is a nosql / key-value store wrapper around sqlite
+ .. code-block:: python
+
+ job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)
+
+ * The key is the output file name, so it is important not to confuse Ruffus by having different tasks generate the same output file!
+ * Is it possible to abstract this so that **jobs** get timestamped as well?
+ * If we should ever want to abstract out ``dbdict``, we need to have a similar key-value store class,
+ and make sure that a single instance of ``dbdict`` is used through ``pipeline_run`` which is passed up
+ and down the function call chain. ``dbdict`` would then be drop-in replaceable by our custom (e.g. flat-file-based) dbdict alternative.
+
+
+ To peek into the database:
+
+ .. code-block:: bash
+
+ $ sqlite3 .ruffus_history.sqlite
+ sqlite> .tables
+ data
+ sqlite> .schema data
+ CREATE TABLE data (key PRIMARY KEY,value);
+ sqlite> select key from data order by key;
+
+======================================================================================================
+ Can we query the database, get Job history / stats?
+======================================================================================================
+
+ Yes, if we write a function to read and dump the entire database but this is only useful with timestamps and task names. See below
+
+======================================================================================================
+ What are the run time performance implications?
+======================================================================================================
+
+ Should be fast: a single db connection is created and used inside ``pipeline_run``, ``pipeline_printout``, ``pipeline_printout_graph``
+
+
+
+===================================================
+ Avoid pauses between tasks
+===================================================
+
+ Allows Ruffus to avoid adding an extra 1 second pause between tasks to guard against file systems with low timestamp granularity.
+
+ * If the local file time looks to be in sync with the underlying file system, saved system time is used instead of file timestamps
+
+
+
+
+******************************************************************************************
+``@mkdir(...),``
+******************************************************************************************
+
+ * ``mkdir`` continues to work seamlessly inside ``@follows``) but also as its own decorator ``@mkdir`` due to the original happy orthogonal design
+ * fixed bug in checking so that Ruffus does't blow up if non strings are in the output (number...)
+ * note: adding the decorator to a previously undecorated function might have unintended consequences. The undecorated function turns into a zombie.
+ * fixed ugly bug in ``pipeline_printout`` for printing single line output
+ * fixed description and printout indent
+
+
diff --git a/doc/installation.rst b/doc/installation.rst
new file mode 100644
index 0000000..f3ca116
--- /dev/null
+++ b/doc/installation.rst
@@ -0,0 +1,79 @@
+.. include:: global.inc
+.. _Installation:
+
+************************************
+Installation
+************************************
+
+:mod:`Ruffus` is a lightweight python module for building computational pipelines.
+
+
+The easy way
+============
+
+ *Ruffus* is available as an
+ `easy-install <http://peak.telecommunity.com/DevCenter/EasyInstall>`_ -able package
+ on the `Python Package Index <http://pypi.python.org/pypi/Sphinx>`_.
+
+ ::
+
+ sudo pip install ruffus --upgrade
+
+ This may also work for older installations
+
+ #) Install setuptools::
+
+ wget peak.telecommunity.com/dist/ez_setup.py
+ sudo python ez_setup.py
+
+ #) Install *Ruffus* automatically::
+
+ easy_install -U ruffus
+
+
+The most up-to-date code:
+==============================
+ * `Download the latest sources <https://pypi.python.org/pypi/ruffus>`_ or
+
+ * Check out the latest code from Google using git::
+
+ git clone https://bunbun68@code.google.com/p/ruffus/ .
+
+ * Bleeding edge Ruffus development takes place on github::
+
+ git clone git at github.com:bunbun/ruffus.git .
+
+
+ * To install after downloading, change to the , type::
+
+ python ./setup.py install
+
+
+======================
+Graphical flowcharts
+======================
+
+ **Ruffus** relies on the ``dot`` programme from `Graphviz <http://www.graphviz.org/>`_
+ ("Graph visualisation") to make pretty flowchart representations of your pipelines in multiple
+ graphical formats (e.g. ``png``, ``jpg``). The crossplatform Graphviz package can be
+ `downloaded here <http://www.graphviz.org/Download.php>`_ for Windows,
+ Linux, Macs and Solaris. Some Linux
+ distributions may include prebuilt packages.
+
+ For Fedora, try
+ ::
+
+ yum list 'graphviz*'
+
+ For ubuntu / Debian, try
+ ::
+
+ sudo apt-get install graphviz
+
+
+
+
+
+
+
+
diff --git a/doc/make.bat b/doc/make.bat
new file mode 100644
index 0000000..03c59c0
--- /dev/null
+++ b/doc/make.bat
@@ -0,0 +1,112 @@
+ at ECHO OFF
+
+REM Command file for Sphinx documentation
+
+set SPHINXBUILD=sphinx-build
+set ALLSPHINXOPTS=-d _build/doctrees %SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+ set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+ :help
+ echo.Please use `make ^<target^>` where ^<target^> is one of
+ echo. html to make standalone HTML files
+ echo. dirhtml to make HTML files named index.html in directories
+ echo. pickle to make pickle files
+ echo. json to make JSON files
+ echo. htmlhelp to make HTML files and a HTML help project
+ echo. qthelp to make HTML files and a qthelp project
+ echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+ echo. changes to make an overview over all changed/added/deprecated items
+ echo. linkcheck to check all external links for integrity
+ echo. doctest to run all doctests embedded in the documentation if enabled
+ goto end
+)
+
+if "%1" == "clean" (
+ for /d %%i in (_build\*) do rmdir /q /s %%i
+ del /q /s _build\*
+ goto end
+)
+
+if "%1" == "html" (
+ %SPHINXBUILD% -b html %ALLSPHINXOPTS% _build/html
+ echo.
+ echo.Build finished. The HTML pages are in _build/html.
+ goto end
+)
+
+if "%1" == "dirhtml" (
+ %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% _build/dirhtml
+ echo.
+ echo.Build finished. The HTML pages are in _build/dirhtml.
+ goto end
+)
+
+if "%1" == "pickle" (
+ %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% _build/pickle
+ echo.
+ echo.Build finished; now you can process the pickle files.
+ goto end
+)
+
+if "%1" == "json" (
+ %SPHINXBUILD% -b json %ALLSPHINXOPTS% _build/json
+ echo.
+ echo.Build finished; now you can process the JSON files.
+ goto end
+)
+
+if "%1" == "htmlhelp" (
+ %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% _build/htmlhelp
+ echo.
+ echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in _build/htmlhelp.
+ goto end
+)
+
+if "%1" == "qthelp" (
+ %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% _build/qthelp
+ echo.
+ echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in _build/qthelp, like this:
+ echo.^> qcollectiongenerator _build\qthelp\pypeline.qhcp
+ echo.To view the help file:
+ echo.^> assistant -collectionFile _build\qthelp\pypeline.ghc
+ goto end
+)
+
+if "%1" == "latex" (
+ %SPHINXBUILD% -b latex %ALLSPHINXOPTS% _build/latex
+ echo.
+ echo.Build finished; the LaTeX files are in _build/latex.
+ goto end
+)
+
+if "%1" == "changes" (
+ %SPHINXBUILD% -b changes %ALLSPHINXOPTS% _build/changes
+ echo.
+ echo.The overview file is in _build/changes.
+ goto end
+)
+
+if "%1" == "linkcheck" (
+ %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% _build/linkcheck
+ echo.
+ echo.Link check complete; look for any errors in the above output ^
+or in _build/linkcheck/output.txt.
+ goto end
+)
+
+if "%1" == "doctest" (
+ %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% _build/doctest
+ echo.
+ echo.Testing of doctests in the sources finished, look at the ^
+results in _build/doctest/output.txt.
+ goto end
+)
+
+:end
diff --git a/doc/manual_follows1.png b/doc/manual_follows1.png
new file mode 100644
index 0000000..14eedc2
Binary files /dev/null and b/doc/manual_follows1.png differ
diff --git a/doc/pipeline_functions.rst b/doc/pipeline_functions.rst
new file mode 100644
index 0000000..fd267fb
--- /dev/null
+++ b/doc/pipeline_functions.rst
@@ -0,0 +1,689 @@
+.. include:: global.inc
+.. _pipeline_functions:
+
+See :ref:`Decorators <decorators>` for more decorators
+
+.. |pipeline_run| replace:: `pipeline_run`
+.. _pipeline_run: `pipeline_functions.pipeline_run`_
+.. |pipeline_printout| replace:: `pipeline_printout`
+.. _pipeline_printout: `pipeline_functions.pipeline_printout`_
+.. |pipeline_printout_graph| replace:: `pipeline_printout_graph`
+.. _pipeline_printout_graph: `pipeline_functions.pipeline_printout_graph`_
+.. |pipeline_get_task_names| replace:: `pipeline_get_task_names`
+.. _pipeline_get_task_names: `pipeline_functions.pipeline_get_task_names`_
+
+
+.. |pr_target_tasks| replace:: `target_tasks`
+.. _pr_target_tasks: `pipeline_functions.pipeline_run.target_tasks`_
+.. |pr_forcedtorun_tasks| replace:: `forcedtorun_tasks`
+.. _pr_forcedtorun_tasks: `pipeline_functions.pipeline_run.forcedtorun_tasks`_
+.. |pr_multiprocess| replace:: `multiprocess`
+.. _pr_multiprocess: `pipeline_functions.pipeline_run.multiprocess`_
+.. |pr_logger| replace:: `logger`
+.. _pr_logger: `pipeline_functions.pipeline_run.logger`_
+.. |pr_gnu_make| replace:: `gnu_make_maximal_rebuild_mode`
+.. _pr_gnu_make: `pipeline_functions.pipeline_run.gnu_make`_
+.. |pr_verbose| replace:: `verbose`
+.. _pr_verbose: `pipeline_functions.pipeline_run.verbose`_
+.. |pr_runtime_data| replace:: `runtime_data`
+.. _pr_runtime_data: `pipeline_functions.pipeline_run.runtime_data`_
+.. |pr_one_second_per_job| replace:: `one_second_per_job`
+.. _pr_one_second_per_job: `pipeline_functions.pipeline_run.one_second_per_job`_
+.. |pr_touch_files_only| replace:: `touch_files_only`
+.. _pr_touch_files_only: `pipeline_functions.pipeline_run.touch_files_only`_
+
+.. |pr_exceptions_terminate_immediately| replace:: `exceptions_terminate_immediately`
+.. _pr_exceptions_terminate_immediately: `pipeline_functions.pipeline_run.exceptions_terminate_immediately`_
+.. |pr_log_exceptions| replace:: `log_exceptions`
+.. _pr_log_exceptions: `pipeline_functions.pipeline_run.log_exceptions`_
+.. |pr_multithread| replace:: `multithread`
+.. _pr_multithread: `pipeline_functions.pipeline_run.multithread`_
+.. |pr_checksum_level| replace:: `checksum_level`
+.. _pr_checksum_level: `pipeline_functions.pipeline_run.checksum_level`_
+.. |pr_history_file| replace:: `history_file`
+.. _pr_history_file: `pipeline_functions.pipeline_run.history_file`_
+.. |pr_verbose_abbreviated_path| replace:: `verbose_abbreviated_path`
+.. _pr_verbose_abbreviated_path: `pipeline_functions.pipeline_run.verbose_abbreviated_path`_
+
+
+.. |pp_output_stream| replace:: `output_stream`
+.. _pp_output_stream: `pipeline_functions.pipeline_printout.output_stream`_
+.. |pp_target_tasks| replace:: `target_tasks`
+.. _pp_target_tasks: `pipeline_functions.pipeline_printout.target_tasks`_
+.. |pp_forcedtorun_tasks| replace:: `forcedtorun_tasks`
+.. _pp_forcedtorun_tasks: `pipeline_functions.pipeline_printout.forcedtorun_tasks`_
+.. |pp_verbose| replace:: `verbose`
+.. _pp_verbose: `pipeline_functions.pipeline_printout.verbose`_
+.. |pp_indent| replace:: `indent`
+.. _pp_indent: `pipeline_functions.pipeline_printout.indent`_
+.. |pp_wrap_width| replace:: `wrap_width`
+.. _pp_wrap_width: `pipeline_functions.pipeline_printout.wrap_width`_
+.. |pp_gnu_make| replace:: `gnu_make_maximal_rebuild_mode`
+.. _pp_gnu_make: `pipeline_functions.pipeline_printout.gnu_make`_
+.. |pp_runtime_data| replace:: `runtime_data`
+.. _pp_runtime_data: `pipeline_functions.pipeline_printout.runtime_data`_
+.. |pp_checksum_level| replace:: `checksum_level`
+.. _pp_checksum_level: `pipeline_functions.pipeline_printout.checksum_level`_
+.. |pp_history_file| replace:: `history_file`
+.. _pp_history_file: `pipeline_functions.pipeline_printout.history_file`_
+.. |pp_verbose_abbreviated_path| replace:: `verbose_abbreviated_path`
+.. _pp_verbose_abbreviated_path: `pipeline_functions.pipeline_printout.verbose_abbreviated_path`_
+
+
+
+.. |ppg_stream| replace:: `stream`
+.. _ppg_stream: `pipeline_functions.pipeline_printout_graph.stream`_
+.. |ppg_output_format| replace:: `output_format`
+.. _ppg_output_format: `pipeline_functions.pipeline_printout_graph.output_format`_
+.. |ppg_target_tasks| replace:: `target_tasks`
+.. _ppg_target_tasks: `pipeline_functions.pipeline_printout_graph.target_tasks`_
+.. |ppg_forcedtorun_tasks| replace:: `forcedtorun_tasks`
+.. _ppg_forcedtorun_tasks: `pipeline_functions.pipeline_printout_graph.forcedtorun_tasks`_
+.. |ppg_draw_vertically| replace:: `draw_vertically`
+.. _ppg_draw_vertically: `pipeline_functions.pipeline_printout_graph.draw_vertically`_
+.. |ppg_ignore_upstream_of_target| replace:: `ignore_upstream_of_target`
+.. _ppg_ignore_upstream_of_target: `pipeline_functions.pipeline_printout_graph.ignore_upstream_of_target`_
+.. |ppg_skip_uptodate_tasks| replace:: `skip_uptodate_tasks`
+.. _ppg_skip_uptodate_tasks: `pipeline_functions.pipeline_printout_graph.skip_uptodate_tasks`_
+.. |ppg_gnu_make| replace:: `gnu_make_maximal_rebuild_mode`
+.. _ppg_gnu_make: `pipeline_functions.pipeline_printout_graph.gnu_make`_
+.. |ppg_test_all_task_for_update| replace:: `test_all_task_for_update`
+.. _ppg_test_all_task_for_update: `pipeline_functions.pipeline_printout_graph.test_all_task_for_update`_
+.. |ppg_no_key_legend| replace:: `no_key_legend`
+.. _ppg_no_key_legend: `pipeline_functions.pipeline_printout_graph.no_key_legend`_
+.. |ppg_minimal_key_legend| replace:: `minimal_key_legend`
+.. _ppg_minimal_key_legend: `pipeline_functions.pipeline_printout_graph.minimal_key_legend`_
+.. |ppg_pipeline_name| replace:: `pipeline_name`
+.. _ppg_pipeline_name: `pipeline_functions.pipeline_printout_graph.pipeline_name`_
+.. |ppg_user_colour_scheme| replace:: `user_colour_scheme`
+.. _ppg_user_colour_scheme: `pipeline_functions.pipeline_printout_graph.user_colour_scheme`_
+.. |ppg_size| replace:: `size`
+.. _ppg_size: `pipeline_functions.pipeline_printout_graph.size`_
+.. |ppg_dpi| replace:: `dpi`
+.. _ppg_dpi: `pipeline_functions.pipeline_printout_graph.dpi`_
+.. |ppg_runtime_data| replace:: `runtime_data`
+.. _ppg_runtime_data: `pipeline_functions.pipeline_printout_graph.runtime_data`_
+.. |ppg_checksum_level| replace:: `checksum_level`
+.. _ppg_checksum_level: `pipeline_functions.pipeline_printout_graph.checksum_level`_
+.. |ppg_history_file| replace:: `history_file`
+.. _ppg_history_file: `pipeline_functions.pipeline_printout_graph.history_file`_
+
+
+
+
+
+
+
+
+
+
+################################################
+Pipeline functions
+################################################
+
+ There are only four functions for **Ruffus** pipelines:
+
+ * |pipeline_run|_ executes a pipeline
+ * |pipeline_printout|_ prints a list of tasks and jobs which will be run in a pipeline
+ * |pipeline_printout_graph|_ prints a schematic flowchart of pipeline tasks in various graphical formats
+ * |pipeline_get_task_names|_ returns a list of all task names in the pipeline
+
+.. _pipeline_functions.pipeline_run:
+
+.. index::
+ single: pipeline functions; pipeline_run
+ pair: pipeline_run; Run pipeline
+
+**************************************************************************************************************************************************************************************
+*pipeline_run*
+**************************************************************************************************************************************************************************************
+**pipeline_run** ( |pr_target_tasks|_ = [], |pr_forcedtorun_tasks|_ = [], |pr_multiprocess|_ = 1, |pr_logger|_ = stderr_logger, |pr_gnu_make|_ = True, |pr_verbose|_ =1, |pr_runtime_data|_ = None, |pr_one_second_per_job|_ = True, |pr_touch_files_only|_ = False, |pr_exceptions_terminate_immediately|_ = None, |pr_log_exceptions|_ = None, |pr_history_file|_ = None, |pr_checksum_level|_ = None, |pr_multithread|_ = 0, |pr_verbose_abbreviated_path|_ = None)
+
+ **Purpose:**
+
+ Runs all specified pipelined functions if they or any antecedent tasks are
+ incomplete or out-of-date.
+
+ **Example**:
+
+ .. code-block:: python
+
+ #
+ # Run task2 whatever its state, and also task1 and antecedents if they are incomplete
+ # Do not log pipeline progress messages to stderr
+ #
+ pipeline_run([task1, task2], forcedtorun_tasks = [task2], logger = blackhole_logger)
+
+ **Parameters:**
+
+
+
+.. _pipeline_functions.pipeline_run.target_tasks:
+
+ * *target_tasks*
+ Pipeline functions and any necessary antecedents (specified implicitly or with :ref:`@follows <decorators.follows>`)
+ which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+.. _pipeline_functions.pipeline_run.forcedtorun_tasks:
+
+ * *forcedtorun_tasks*
+ Optional. These pipeline functions will be invoked regardless of their state.
+ Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+.. _pipeline_functions.pipeline_run.multiprocess:
+
+ * *multiprocess*
+ Optional. The number of processes which should be dedicated to running in parallel independent
+ tasks and jobs within each task. If ``multiprocess`` is set to 1, the pipeline will
+ execute in the main process.
+
+.. _pipeline_functions.pipeline_run.multithread:
+
+ * *multithread*
+ Optional. The number of threads which should be dedicated to running in parallel independent
+ tasks and jobs within each task. Should be used only with drmaa. Otherwise the CPython `global interpreter lock (GIL) <https://wiki.python.org/moin/GlobalInterpreterLock>`__
+ will slow down your pipeline
+
+.. _pipeline_functions.pipeline_run.logger:
+
+ * *logger*
+ For logging messages indicating the progress of the pipeline in terms of tasks and jobs.
+ Defaults to outputting to sys.stderr.
+ Setting ``logger=blackhole_logger`` will prevent any logging output.
+
+.. _pipeline_functions.pipeline_run.gnu_make:
+
+ * *gnu_make_maximal_rebuild_mode*
+ .. warning ::
+ This is a dangerous option. Use rarely and with caution
+
+ Optional parameter governing how **Ruffus** determines which part of the pipeline is
+ out of date and needs to be re-run. If set to ``False``, **ruffus** will work back
+ from the ``target_tasks`` and only execute the pipeline after the first up-to-date
+ tasks that it encounters. For example, if there are four tasks:
+
+ ::
+
+ #
+ # task1 -> task2 -> task3 -> task4 -> task5
+ #
+ target_tasks = [task5]
+
+ If ``task3()`` is up-to-date, then only ``task4()`` and ``task5()`` will be run.
+ This will be the case even if ``task2()`` and ``task1()`` are incomplete.
+
+ This allows you to remove all intermediate results produced by ``task1 -> task3``.
+
+
+
+.. _pipeline_functions.pipeline_run.verbose:
+
+ * *verbose*
+ Optional parameter indicating the verbosity of the messages sent to ``logger``:
+ (Defaults to level 1 if unspecified)
+
+ * level **0** : *nothing*
+ * level **1** : *Out-of-date Task names*
+ * level **2** : *All Tasks (including any task function docstrings)*
+ * level **3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * level **4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * level **5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * level **6** : *All jobs in All Tasks whether out of date or not*
+ * level **10**: *logs messages useful only for debugging ruffus pipeline code*
+
+
+ ``verbose >= 10`` are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+.. _pipeline_functions.pipeline_run.runtime_data:
+
+ * *runtime_data*
+ Experimental feature for passing data to tasks at run time
+
+.. _pipeline_functions.pipeline_run.one_second_per_job:
+
+ * *one_second_per_job*
+ To work around poor file timepstamp resolution for some file systems.
+ Defaults to True if checksum_level is 0 forcing Tasks to take a minimum of 1 second to complete.
+ If your file system has coarse grained time stamps, you can turn on this delay
+ by setting *one_second_per_job* to ``True``
+
+.. _pipeline_functions.pipeline_run.touch_files_only:
+
+ * *touch_files_only*
+ Create or update output files only to simulate the running of the pipeline.
+ Does not invoke real task functions to run jobs. This is most useful to force a
+ pipeline to acknowledge that a particular part is now up-to-date.
+
+ This will not work properly if the identities of some files are not known before hand,
+ and depend on run time. In other words, not recommended if ``@split`` or custom parameter generators are being used.
+
+
+
+.. _pipeline_functions.pipeline_run.exceptions_terminate_immediately:
+
+ * *exceptions_terminate_immediately*
+ Exceptions cause immediate termination of the pipeline.
+
+
+.. _pipeline_functions.pipeline_run.log_exceptions:
+
+ * *log_exceptions*
+ Print exceptions to the logger as soon as they occur.
+
+
+.. _pipeline_functions.pipeline_run.history_file:
+
+ * *history_file*
+ The database file which stores checksums and file timestamps for input/output files.
+ Defaults to ``.ruffus_history.sqlite`` if unspecified
+
+.. _pipeline_functions.pipeline_run.checksum_level:
+
+ * *checksum_level*
+ Several options for checking up-to-dateness are available: Default is level 1.
+
+ * level 0 : Use only file timestamps
+ * level 1 : above, plus timestamp of successful job completion
+ * level 2 : above, plus a checksum of the pipeline function body
+ * level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+.. _pipeline_functions.pipeline_run.verbose_abbreviated_path:
+
+ * *verbose_abbreviated_path*
+ Whether input and output paths are abbreviated. Defaults to 2 if unspecified
+
+ * level 0: The full (expanded, abspath) input or output path
+ * level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with ``[,,,]/``
+ * level < 0: Input / Output parameters are truncated to ``MMM`` letters where ``verbose_abbreviated_path ==-MMM``. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by ``<???>``
+
+
+
+
+
+
+
+
+.. _pipeline_functions.pipeline_printout:
+
+.. index::
+ single: pipeline functions; pipeline_run
+ pair: pipeline_printout; Printout simulated run of the pipeline
+
+**********************************************************************************************************************************************************************************************************
+*pipeline_printout*
+**********************************************************************************************************************************************************************************************************
+**pipeline_printout** (|pp_output_stream|_ = sys.stdout, |pp_target_tasks|_ = [], |pp_forcedtorun_tasks|_ = [], |pp_verbose|_ = 1, |pp_indent|_ = 4, |pp_gnu_make|_ = True, |pp_wrap_width|_ = 100, |pp_runtime_data|_ = None, |pp_checksum_level|_ = None, |pp_history_file|_ = None, |pr_verbose_abbreviated_path|_ = None)
+
+ **Purpose:**
+
+ Prints out all the pipelined functions which will be invoked given specified ``target_tasks``
+ without actually running the pipeline. Because this is a simulation, some of the job
+ parameters may be incorrect. For example, the results of a :ref:`@split<new_manual.split>`
+ operation is not predetermined and will only be known after the pipelined function
+ splits up the original data. Parameters of all downstream pipelined functions will
+ be changed depending on this initial operation.
+
+ **Example**:
+ ::
+
+ #
+ # Simulate running task2 whatever its state, and also task1 and antecedents
+ # if they are incomplete
+ # Print out results to STDOUT
+ #
+ pipeline_printout(sys.stdout, [task1, task2], forcedtorun_tasks = [task2], verbose = 1)
+
+ **Parameters:**
+
+.. _pipeline_functions.pipeline_printout.output_stream:
+
+ * *output_stream*
+ Where to printout the results of simulating the running of the pipeline.
+
+.. _pipeline_functions.pipeline_printout.target_tasks:
+
+ * *target_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`: Pipeline functions and any necessary antecedents (specified implicitly or with :ref:`@follows <decorators.follows>`)
+ which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+
+.. _pipeline_functions.pipeline_printout.forcedtorun_tasks:
+
+ * *forcedtorun_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:These pipeline functions will be invoked regardless of their state.
+ Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+
+.. _pipeline_functions.pipeline_printout.verbose:
+
+ * *verbose*
+ Optional parameter indicating the verbosity of the messages sent to ``logger``:
+ (Defaults to level 4 if unspecified)
+
+ * level **0** : *nothing*
+ * level **1** : *Out-of-date Task names*
+ * level **2** : *All Tasks (including any task function docstrings)*
+ * level **3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * level **4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * level **5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * level **6** : *All jobs in All Tasks whether out of date or not*
+ * level **10**: *logs messages useful only for debugging ruffus pipeline code*
+
+
+ ``verbose >= 10`` are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+.. _pipeline_functions.pipeline_printout.indent:
+
+ * *indent*
+ Optional parameter governing the indentation when printing out the component job
+ parameters of each task function.
+
+
+.. _pipeline_functions.pipeline_printout.gnu_make:
+
+ * *gnu_make_maximal_rebuild_mode*
+ .. warning ::
+ This is a dangerous option. Use rarely and with caution
+
+ See explanation in :ref:`pipeline_run <pipeline_functions.pipeline_run.gnu_make>`.
+
+.. _pipeline_functions.pipeline_printout.wrap_width:
+
+ * *wrap_width*
+ Optional parameter governing the length of each line before it starts wrapping
+ around.
+
+
+.. _pipeline_functions.pipeline_printout.runtime_data:
+
+ * *runtime_data*
+ Experimental feature for passing data to tasks at run time
+
+
+.. _pipeline_functions.pipeline_printout.history_file:
+
+ * *history_file*
+ The database file which stores checksums and file timestamps for input/output files.
+ Defaults to ``.ruffus_history.sqlite`` if unspecified
+
+.. _pipeline_functions.pipeline_printout.checksum_level:
+
+ * *checksum_level*
+ Several options for checking up-to-dateness are available: Default is level 1.
+
+ * level 0 : Use only file timestamps
+ * level 1 : above, plus timestamp of successful job completion
+ * level 2 : above, plus a checksum of the pipeline function body
+ * level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+
+.. _pipeline_functions.pipeline_printout.verbose_abbreviated_path:
+
+ * *verbose_abbreviated_path*
+ Whether input and output paths are abbreviated. Defaults to 2 if unspecified
+
+ * level 0: The full (expanded, abspath) input or output path
+ * level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with ``[,,,]/``
+ * level < 0: Input / Output parameters are truncated to ``MMM`` letters where ``verbose_abbreviated_path ==-MMM``. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by ``<???>``
+
+
+.. _pipeline_functions.pipeline_printout_graph:
+
+.. index::
+ single: pipeline functions; pipeline_printout_graph
+ pair: pipeline_printout_graph; print flowchart representation of pipeline functions
+
+
+
+
+
+************************************************************************************************************************************************************************************************************************************************************************************
+*pipeline_printout_graph*
+************************************************************************************************************************************************************************************************************************************************************************************
+
+**pipeline_printout_graph** (|ppg_stream|_, |ppg_output_format|_ = None, |ppg_target_tasks|_ = [], |ppg_forcedtorun_tasks|_ = [], |ppg_ignore_upstream_of_target|_ = False, |ppg_skip_uptodate_tasks|_ = False, |ppg_gnu_make|_ = True, |ppg_test_all_task_for_update|_ = True, |ppg_no_key_legend|_ = False, |ppg_minimal_key_legend|_ = True, |ppg_user_colour_scheme|_ = None, |ppg_pipeline_name|_ = "Pipeline", |ppg_size|_ = (11,8), |ppg_dpi|_ = 120, |ppg_runtime_data|_ = None, |ppg_checksum_leve [...]
+
+ **Purpose:**
+
+ Prints out flowchart of all the pipelined functions which will be invoked given specified ``target_tasks``
+ without actually running the pipeline.
+
+ See :ref:`Flowchart colours <new_manual.flowchart_colours>`
+
+ **Example**:
+ ::
+
+ pipeline_printout_graph("flowchart.jpg", "jpg", [task1, task16],
+ forcedtorun_tasks = [task2],
+ no_key_legend = True)
+
+ **Customising appearance:**
+
+ The :ref:`user_colour_scheme <pipeline_functions.pipeline_printout_graph.user_colour_scheme>` parameter can be used to change
+ flowchart colours. This allows the default :ref:`Colour Schemes <new_manual.flowchart_colours>`
+ to be set. An example of customising flowchart appearance is available :ref:`(see code) <new_manual.flowchart_colours.code>` .
+
+
+
+
+ **Parameters:**
+
+.. _pipeline_functions.pipeline_printout_graph.stream:
+
+ * *stream*
+ The file or file-like object to which the flowchart should be printed.
+ If a string is provided, it is assumed that this is the name of the output file
+ which will be opened automatically.
+
+
+.. _pipeline_functions.pipeline_printout_graph.output_format:
+
+ * *output_format*
+ If missing, defaults to the extension of the *stream* file name (i.e. ``jpg`` for ``a.jpg``)
+
+ | If the programme ``dot`` can be found on the executio path, this
+ can be any number of `formats <http://www.graphviz.org/doc/info/output.html>`_
+ supported by `Graphviz <http://www.graphviz.org/>`_, including, for example,
+ ``jpg``, ``png``, ``pdf``, ``svg`` etc.
+ | Otherwise, **ruffus** will only output without error in the `dot <http://en.wikipedia.org/wiki/DOT_language>`_ format, which
+ is a plain-text graph description language.
+
+.. _pipeline_functions.pipeline_printout_graph.target_tasks:
+
+ * *target_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`: Pipeline functions and any necessary antecedents (specified implicitly or with :ref:`@follows <decorators.follows>`)
+ which should be invoked with the appropriate parameters if they are incomplete or out-of-date.
+
+
+.. _pipeline_functions.pipeline_printout_graph.forcedtorun_tasks:
+
+ * *forcedtorun_tasks*
+ As in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:These pipeline functions will be invoked regardless of their state.
+ Any antecedents tasks will also be executed if they are out-of-date or incomplete.
+
+.. _pipeline_functions.pipeline_printout_graph.draw_vertically:
+
+ * *draw_vertically*
+ Draw flowchart in vertical orientation
+
+.. _pipeline_functions.pipeline_printout_graph.ignore_upstream_of_target:
+
+ * *ignore_upstream_of_target*
+ Start drawing flowchart from specified target tasks. Do not draw tasks which are
+ downstream (subsequent) to the targets.
+
+.. _pipeline_functions.pipeline_printout_graph.skip_uptodate_tasks:
+
+ * *ignore_upstream_of_target*
+ Do not draw up-to-date / completed tasks in the flowchart unless they are
+ lie on the execution path of the pipeline.
+
+.. _pipeline_functions.pipeline_printout_graph.gnu_make:
+
+ * *gnu_make_maximal_rebuild_mode*
+ .. warning ::
+ This is a dangerous option. Use rarely and with caution
+
+ See explanation in :ref:`pipeline_run <pipeline_functions.pipeline_run.gnu_make>`.
+
+.. _pipeline_functions.pipeline_printout_graph.test_all_task_for_update:
+
+ * *test_all_task_for_update*
+ | Indicates whether intermediate tasks are out of date or not. Normally **Ruffus** will
+ stop checking dependent tasks for completion or whether they are out-of-date once it has
+ discovered the maximal extent of the pipeline which has to be run.
+ | For displaying the flow of the pipeline, this is hardly very informative.
+
+.. _pipeline_functions.pipeline_printout_graph.no_key_legend:
+
+ * *no_key_legend*
+ Do not include key legend explaining the colour scheme of the flowchart.
+
+
+.. _pipeline_functions.pipeline_printout_graph.minimal_key_legend:
+
+ * *minimal_key_legend*
+ Do not include unused task types in key legend.
+
+.. _pipeline_functions.pipeline_printout_graph.user_colour_scheme:
+
+ * *user_colour_scheme*
+ Dictionary specifying colour scheme for flowchart
+
+ See complete :ref:`list of Colour Schemes <new_manual.flowchart_colours>`.
+
+ | Colours can be names e.g. ``"black"`` or quoted hex e.g. ``'"#F6F4F4"'`` (note extra quotes)
+ | Default values will be used unless specified
+
+ .. csv-table::
+ :header: "key", "Subkey", ""
+
+ "
+ - ``'colour_scheme_index'`` ", "| index of default colour scheme,
+ | 0-7, defaults to 0 unless specified", ""
+ "
+ - ``'Final target'``
+ - ``'Explicitly specified task'``
+ - ``'Task to run'``
+ - ``'Down stream'``
+ - ``'Up-to-date Final target'``
+ - ``'Up-to-date task forced to rerun'``
+ - ``'Up-to-date task'``
+ - ``'Vicious cycle'``
+ ","
+ - ``'fillcolor'``
+ - ``'fontcolor'``
+ - ``'color'``
+ - ``'dashed'`` = ``0/1``
+ ", "Colours / attributes for each task type"
+ "
+ - ``'Vicious cycle'``
+ - ``'Task to run'``
+ - ``'Up-to-date'``", "- ``'linecolor'``", "Colours for arrows between tasks"
+ "- ``'Pipeline'``", "- ``'fontcolor'``","Flowchart title colour"
+ "- ``'Key'``", "
+ - ``'fontcolor'``
+ - ``'fillcolor'``", "Legend colours"
+
+ Example:
+
+ Use colour scheme index = 1
+ ::
+
+ pipeline_printout_graph ("flowchart.svg", "svg", [final_task],
+ user_colour_scheme = {
+ "colour_scheme_index" :1,
+ "Pipeline" :{"fontcolor" : '"#FF3232"' },
+ "Key" :{"fontcolor" : "Red",
+ "fillcolor" : '"#F6F4F4"' },
+ "Task to run" :{"linecolor" : '"#0044A0"' },
+ "Final target" :{"fillcolor" : '"#EFA03B"',
+ "fontcolor" : "black",
+ "dashed" : 0 }
+ })
+
+
+
+
+.. _pipeline_functions.pipeline_printout_graph.pipeline_name:
+
+ * *pipeline_name*
+ Specify title for flowchart
+
+.. _pipeline_functions.pipeline_printout_graph.size:
+
+ * *size*
+ Size in inches for flowchart
+
+.. _pipeline_functions.pipeline_printout_graph.dpi:
+
+ * *dpi*
+ Resolution in dots per inch. Ignored for svg output
+
+.. _pipeline_functions.pipeline_printout_graph.runtime_data:
+
+ * *runtime_data*
+ Experimental feature for passing data to tasks at run time
+
+.. _pipeline_functions.pipeline_printout_graph.history_file:
+
+ * *history_file*
+ The database file which stores checksums and file timestamps for input/output files.
+ Defaults to ``.ruffus_history.sqlite`` if unspecified
+
+.. _pipeline_functions.pipeline_printout_graph.checksum_level:
+
+ * *checksum_level*
+ Several options for checking up-to-dateness are available: Default is level 1.
+
+ * level 0 : Use only file timestamps
+ * level 1 : above, plus timestamp of successful job completion
+ * level 2 : above, plus a checksum of the pipeline function body
+ * level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+
+
+
+.. _pipeline_functions.pipeline_get_task_names:
+
+.. index::
+ single: pipeline functions; pipeline_get_task_names
+ pair: pipeline_get_task_names; print list of task names without running the pipeline
+
+
+**************************************************************************************************************************************************************************************
+*pipeline_get_task_names*
+**************************************************************************************************************************************************************************************
+**pipeline_get_task_names** ()
+
+ **Purpose:**
+
+ Returns a list of all task names in the pipeline without running the pipeline or checking to see if the tasks are connected correctly
+
+ **Example**:
+
+ Given:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate([])
+ def create_data(output_files):
+ pass
+
+ @transform(create_data, suffix(".txt"), ".task1")
+ def task1(input_files, output_files):
+ pass
+
+ @transform(task1, suffix(".task1"), ".task2")
+ def task2(input_files, output_files):
+ pass
+
+ Produces a list of three task names:
+
+ .. code-block:: pycon
+
+ >>> pipeline_get_task_names ()
+ ['create_data', 'task1', 'task2']
diff --git a/doc/propset b/doc/propset
new file mode 100755
index 0000000..df8ac0f
--- /dev/null
+++ b/doc/propset
@@ -0,0 +1,5 @@
+svn propset -R svn:mime-type text/css `find html/ -name .svn -type f -prune -o -name *.css `
+svn propset -R svn:mime-type image/jpeg `find html/ -name .svn -type f -prune -o -name *.jpg `
+svn propset -R svn:mime-type text/javascript `find html/ -name .svn -type f -prune -o -name *.js `
+svn propset -R svn:mime-type image/x-png `find html/ -name .svn -type f -prune -o -name *.png `
+svn propset -R svn:mime-type text/html `find html/ -name .svn -type f -prune -o -name *.html `
diff --git a/doc/proxy_logger.rst b/doc/proxy_logger.rst
new file mode 100644
index 0000000..1e11a28
--- /dev/null
+++ b/doc/proxy_logger.rst
@@ -0,0 +1,24 @@
+.. include:: global.inc
+#####################
+ruffus.proxy_logger
+#####################
+
+.. _proxy-logger:
+
+.. automodule:: ruffus.proxy_logger
+ :undoc-members:
+
+
+===========================
+Proxies for a log:
+===========================
+
+.. autofunction:: make_shared_logger_and_proxy
+
+===========================
+Create a logging object
+===========================
+
+
+.. autofunction:: setup_std_shared_logger
+
diff --git a/doc/regenerate_figures b/doc/regenerate_figures
new file mode 100755
index 0000000..b353b5f
--- /dev/null
+++ b/doc/regenerate_figures
@@ -0,0 +1,6 @@
+images/manual_follows1.png.py
+images/manual_dependencies_flowchart_intro.png.py
+images/manual_dependencies_flowchart.png.py
+images/pretty_flowchart.png.py
+#images/simple_tutorial_complex_flowchart.py
+
diff --git a/doc/static_data/example_scripts/complicated_example.py b/doc/static_data/example_scripts/complicated_example.py
new file mode 100755
index 0000000..3c626a8
--- /dev/null
+++ b/doc/static_data/example_scripts/complicated_example.py
@@ -0,0 +1,527 @@
+#!/usr/bin/env python
+"""
+
+ complicated_example.py
+
+"""
+
+import os, sys
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from time import sleep
+import random
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = ["summarise_all"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+
+parser.add_option("-d", "--data_dir", dest="data_dir",
+ default="%s/data_for_complicated_example" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with starting data [*.fa].")
+parser.add_option("-w", "--working_dir", dest="working_dir",
+ default="/working_dir",
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+
+
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="store_true", default=False,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-D", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#_________________________________________________________________________________________
+#
+# Helper function:
+#
+# split_gene_files
+#
+#_________________________________________________________________________________________
+def split_gene_files ( gene_file_name,
+ job_completion_flag_file_name,
+ split_output_dir):
+ """
+ Helper function to simulate splitting gene files into "chunks" suitable for
+ parallel jobs on a computational cluster
+
+ The number of output files is only known at runtime
+ because the number of "chunks" depend on the size
+ of starting the gene sets
+
+ We simulate this using a random number from 20->50
+ """
+
+ #
+ # make output directory
+ #
+ if not os.path.exists(split_output_dir):
+ os.makedirs(split_output_dir)
+
+ # save number of chunks for later tasks
+ number_of_output_files = int(random.uniform(20, 50))
+
+ for index in range(number_of_output_files):
+ open("%s/%d.fa" % (split_output_dir, index), "w")
+ open(job_completion_flag_file_name, "w")
+
+
+#_________________________________________________________________________________________
+#
+# get_unknown_gene_set_names
+# get_species_names
+#
+#
+# functions for getting unknown gene set names and species names
+#
+#_________________________________________________________________________________________
+import glob, re
+def get_chunked_gene_file_names (dir_name):
+ """
+ Get list of gene file names
+ Helper function for getting unknown gene set names, and species names
+ """
+ regex = re.compile(r".+/(.+).genes.fa")
+ gene_set_names = []
+ for file_name in glob.glob("%s/%s/*.genes.fa" % (d_dir, dir_name)):
+ m = regex.search(file_name)
+ gene_set_names.append(m.group(1))
+ return gene_set_names
+def get_unknown_gene_set_names ():
+ return get_chunked_gene_file_names("unknown_genes")
+def get_species_names ():
+ return get_chunked_gene_file_names("all_genes_in_each_species")
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+d_dir = options.data_dir
+w_dir = options.working_dir
+
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 1:
+#
+# split_unknown_gene_set
+#
+# data_dir/unknown_genes/XXX.genes.fa
+# ->working_dir/XXX/split_gene_sets.completed
+# ->working_dir/XXX/NNN.fa
+#
+#_________________________________________________________________________________________
+ at follows(mkdir(w_dir))
+ at files_re("%s/unknown_genes/*.genes.fa" % d_dir,
+ r"(.*/)(.*)(\.genes.fa)",
+ r"\1\2\3", # unknown_gene_set file name
+ r"%s/\2/split_gene_sets.completed" % w_dir, # job_completion_flag
+ r"%s/\2" % w_dir) # split_output_dir
+def split_unknown_gene_set( starting_gene_set,
+ job_completion_flag,
+ split_output_dir):
+ """
+ Simulate splitting gene files for unknown gene set into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+ split_gene_files ( starting_gene_set,
+ job_completion_flag,
+ split_output_dir)
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 2:
+#
+# split_per_species_gene_sets
+
+# data_dir/all_genes_in_each_species/YYY.genes.fa
+# ->working_dir/species_YYY/split_gene_sets.completed
+# ->working_dir/species_YYY/MMM.fa
+#
+#_________________________________________________________________________________________
+ at follows(mkdir(w_dir))
+ at files_re("%s/all_genes_in_each_species/*.genes.fa" % d_dir,
+ r"(.*/)(.*)(\.genes.fa)",
+ r"\1\2\3", # all_genes_in_species
+ r"%s/species_\2/split_gene_sets.completed" % w_dir, # job_completion_flag
+ r"%s/species_\2" % w_dir) # split_output_dir
+def split_per_species_gene_sets(all_genes_in_species,
+ job_completion_flag,
+ split_output_dir):
+ """
+ Simulate splitting gene files for each species into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+ split_gene_files ( all_genes_in_species,
+ job_completion_flag,
+ split_output_dir)
+ sleep(1)
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 3:
+#
+# all_vs_all_comparisons
+# working_dir/species_YYY/MMM.fa
+# working_dir/XXX/NNN.fa
+# -> compare/x/y.n.m.comparison_res
+# -> compare/x/y.n.m.complete
+#
+#_________________________________________________________________________________________
+#
+# function for generating custom parameters
+#
+def generate_all_vs_all_params ():
+ """
+ Custom function to generate
+ all vs. all file names for the various "chunks"
+ """
+
+ chunk_index_regex = re.compile(r".+/(.+).fa")
+ def parse_index_from_chunk_filename (chunk_filename):
+ match = chunk_index_regex.search(chunk_filename)
+ return int(match.group(1))
+
+ species_names = get_species_names()
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ for y in species_names:
+ y = "species_" + y
+
+ m_files = glob.glob("%s/%s/*.fa" % (w_dir, x))
+ n_files = glob.glob("%s/%s/*.fa" % (w_dir, y))
+
+ #
+ # for each species chunk vs for each unknown chunk
+ #
+ for m_file in m_files:
+ for n_file in n_files:
+ input_files = [m_file, n_file]
+ output_dir = "%s/compare/%s" % (w_dir, x)
+
+ m = parse_index_from_chunk_filename(m_file)
+ n = parse_index_from_chunk_filename(n_file)
+
+ job_completion_flag = output_dir + "/%s.%d.%d.complete" % (y, m, n)
+ result_file = output_dir + "/%s.%d.%d.comparison_res" % (y, m, n)
+ name = "%s -> %d vs %d\n" % (y, m, n)
+ yield input_files, job_completion_flag, output_dir, result_file, name
+
+
+
+ at follows(split_unknown_gene_set, split_per_species_gene_sets)
+ at files(generate_all_vs_all_params)
+def all_vs_all_comparisons(file_chunks,
+ job_completion_flag,
+ output_dir,
+ result_file,
+ name):
+ """
+ Simulate comparison of gene chunks against each other
+ Normally runs in parallel on a computational cluster
+ """
+
+ #
+ # make output directory
+ #
+ try:
+ os.makedirs(output_dir)
+ except OSError:
+ pass
+
+ open(job_completion_flag, "w")
+ open(result_file, "w").write(name)
+
+
+#_________________________________________________________________________________________
+#
+# Step 4:
+#
+# Recombine: alignment results to make gene families
+# compare/x/*.comparison_res
+# -> multiple_alignment/x/x.gene_families
+#
+#_________________________________________________________________________________________
+
+#
+# generate_params_for_making_gene_families
+#
+# function for generating custom parameters
+#
+def generate_params_for_making_gene_families ():
+ """
+ Custom function to combining comparison files into gene families
+ """
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ results_files = glob.glob("%s/compare/%s/*.comparison_res" % (w_dir, x))
+ output_dir = "%s/multiple_alignment/%s" % (w_dir, x)
+ family_file = "%s/gene.families" % output_dir
+ yield results_files, family_file, output_dir
+
+
+ at follows(all_vs_all_comparisons)
+ at files(generate_params_for_making_gene_families)
+def combine_into_gene_familes (results_files, family_file_name, output_dir):
+ """
+ Simulate making gene families by concatenating comparison results :-)
+ """
+ #
+ # make output directory
+ #
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ family_file = open(family_file_name, "w")
+ for f in results_files:
+ family_file.write(open(f).read())
+ sleep(1)
+
+#_________________________________________________________________________________________
+#
+# Step 5:
+#
+# split_gene_family_for_evolutionary_analysis
+# multiple_alignment/x/x.gene_families
+# -> multiple_alignment/x/NNN.aln
+# -> multiple_alignment/x/split.completed
+#
+#_________________________________________________________________________________________
+ at follows(combine_into_gene_familes)
+ at files_re("%s/multiple_alignment/*/gene.families" % w_dir,
+ r"(.+)/(gene.families)",
+ r"\1/\2",
+ r"\1/split.completed",
+ r"\1")
+def split_gene_family_for_evolutionary_analysis( family_file,
+ job_completion_flag_file, split_output_dir):
+ """
+ Simulate splitting family of genes into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+
+ # save number of chunks for later tasks
+ number_of_output_files = int(random.uniform(20, 50))
+
+ for index in range(number_of_output_files):
+ open("%s/%d.aln" % (split_output_dir, index), "w").write("chunk %d" % index)
+ open(job_completion_flag_file, "w")
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 6:
+#
+# evolution_analysis
+# multiple_alignment/x/NNN.aln
+# -> multiple_alignment/x/NNN.evo_res
+#
+#_________________________________________________________________________________________
+ at follows(split_gene_family_for_evolutionary_analysis)
+ at files_re("%s/multiple_alignment/*/*.aln" % w_dir,
+ r"(.+).aln",
+ r"\1.evo_res")
+def evolution_analysis( family_file, result_file_name):
+ """
+ Simulate evolutionary analysis
+ """
+
+ result_file = open(result_file_name, "w")
+ result_file.write(family_file + "\n")
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 7:
+#
+# combine_evolution_analysis
+# multiple_alignment/x/NNN.evo_res
+# -> evolutionary_analysis/x.results
+#
+#_________________________________________________________________________________________
+
+#
+# generate_params_for_combining_evolutionary_analyses
+#
+# function for generating custom parameters
+#
+def generate_params_for_combining_evolutionary_analyses ():
+ """
+ Custom function to combining evolutionary analyses per unknown gene set
+ """
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ results_files = glob.glob("%s/multiple_alignment/%s/*.evo_res" % (w_dir, x))
+ combined_file = "%s/evolutionary_analysis/%s.results" % (w_dir, x)
+ yield results_files, combined_file
+
+ at follows(evolution_analysis, mkdir("%s/evolutionary_analysis" % w_dir))
+ at files(generate_params_for_combining_evolutionary_analyses)
+def combine_evolution_analysis (results_files, combined_file_name):
+ """
+ Simular combining evolutionary analyses
+ """
+ combined_file = open(combined_file_name, "w")
+ for f in results_files:
+ combined_file.write(open(f).read())
+ sleep(1)
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 8:
+#
+# summarise_evolution_analysis
+# evolutionary_analysis/x.results
+# -> evolutionary_analysis/x.summary
+#
+#_________________________________________________________________________________________
+ at follows(combine_evolution_analysis)
+ at files_re("%s/evolutionary_analysis/*.results" % w_dir,
+ r"(.+).results",
+ r"\1.summary")
+def summarise_evolution_analysis( results_file, summary_file_name):
+ """
+ Simulate summary of evolutionary analysis
+ """
+ summary_file = open(summary_file_name, "w")
+ summary_file.write("summary of " + open(results_file).read())
+ sleep(1)
+
+
+#_________________________________________________________________________________________
+#
+# Step 9:
+#
+# summarise_all
+# evolutionary_analysis/x.summary
+# -> all.total_summary
+#
+#_________________________________________________________________________________________
+summary_file_names = ["%s/evolutionary_analysis/%s.summary" % (w_dir, n)
+ for n in get_unknown_gene_set_names()]
+total_summary_file_name = "%s/all.total_summary" % w_dir
+
+ at follows(summarise_evolution_analysis)
+ at files(summary_file_names, total_summary_file_name)
+def summarise_all( summary_files, total_summary_file_name):
+ """
+ Simulate summarize all
+ """
+ total_summary_file = open(total_summary_file_name, "w")
+ total_summary_file.write("Over all Summary:\n")
+ for f in summary_files:
+ total_summary_file.write(open(f).read())
+ sleep(1)
+
+
+
+
+
+
+
+#888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# print pipeline or run pipeline
+#
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, long_winded=True)
+
+elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+
diff --git a/doc/static_data/example_scripts/intermediate_example.py b/doc/static_data/example_scripts/intermediate_example.py
new file mode 100755
index 0000000..8128903
--- /dev/null
+++ b/doc/static_data/example_scripts/intermediate_example.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+"""
+
+ intermediate_example.py
+
+ This script takes N pairs of input file pairs
+ (with the suffices .gene and .gwas)
+ and runs them against M sets of simulation data
+ (with the suffix .simulation)
+ A summary per input file pair is then produced
+
+
+ In pseudo-code:
+
+ STEP_1:
+
+ for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res
+
+
+ STEP_2:
+
+ for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean
+
+
+
+"""
+
+import os, sys
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+from ruffus import *
+from time import sleep
+import random
+from itertools import izip
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = ["statistical_summary"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+
+parser.add_option("-g", "--gene_data_dir", dest="gene_data_dir",
+ default="%s/data_for_intermediate_example/genes" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with gene data [*.genes / *.gwas].")
+parser.add_option("-s", "--simulation_data_dir", dest="simulation_data_dir",
+ default="%s/data_for_intermediate_example/simulation" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with simulation data [*.simulation].")
+parser.add_option("-w", "--working_dir", dest="working_dir",
+ default="/working_dir",
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+
+
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="store_true", default=False,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-D", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+import glob
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#_________________________________________________________________________________________
+#
+# get gene gwas file pairs
+#
+#_________________________________________________________________________________________
+def get_gene_gwas_file_pairs( ):
+ """
+ Helper function to get all *.gene, *.gwas from the direction specified
+ in --gene_data_dir
+
+ Returns
+ file pairs with both .gene and .gwas extensions,
+ corresponding roots (no extension) of each file
+ """
+
+
+ gene_files = glob.glob(os.path.join(options.gene_data_dir, "*.gene"))
+ gwas_files = glob.glob(os.path.join(options.gene_data_dir, "*.gwas"))
+
+ common_roots = set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gene_files))
+ common_roots &=set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gwas_files))
+ common_roots = list(common_roots)
+
+ p = os.path; g_dir = options.gene_data_dir
+
+ file_pairs = [[p.join(g_dir, x + ".gene"), p.join(g_dir, x + ".gwas")] for x in common_roots]
+
+ return file_pairs, common_roots
+
+#_________________________________________________________________________________________
+#
+# get simulation files
+#
+#_________________________________________________________________________________________
+def get_simulation_files( ):
+ """
+ Helper function to get all *.simulation from the direction specified
+ in --simulation_data_dir
+ Returns
+ file with .simulation extensions,
+ corresponding roots (no extension) of each file
+ """
+ simulation_files = glob.glob(os.path.join(options.simulation_data_dir, "*.simulation"))
+ simulation_roots =map(lambda x: os.path.splitext(os.path.split(x)[1])[0], simulation_files)
+ return simulation_files, simulation_roots
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+working_dir = options.working_dir
+
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 1:
+#
+# for n_file in NNN_pairs_of_input_files:
+# for m_file in MMM_simulation_data:
+#
+# [n_file.gene,
+# n_file.gwas,
+# m_file.simulation] -> working_dir/n_file.m_file.simulation_res
+#
+#_________________________________________________________________________________________
+def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+
+ simulation_files, simulation_file_roots = get_simulation_files()
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+
+ for sim_file, sim_file_root in izip(simulation_files, simulation_file_roots):
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+
+ result_file = "%s.%s.simulation_res" % (gene_file_root, sim_file_root)
+ result_file_path = os.path.join(working_dir, "simulation_results", result_file)
+
+ yield [gene, gwas, sim_file], result_file_path, gene_file_root, sim_file_root, result_file
+
+#
+# mkdir: makes sure output directories exist before task
+#
+ at follows(mkdir(options.working_dir, os.path.join(working_dir, "simulation_results")))
+ at files(generate_simulation_params)
+def gwas_simulation(input_files, result_file_path, gene_file_root, sim_file_root, result_file):
+ """
+ Dummy calculation of gene gwas vs simulation data
+ Normally runs in parallel on a computational cluster
+ """
+ (gene_file,
+ gwas_file,
+ simulation_data_file) = input_files
+
+ simulation_res_file = open(result_file_path, "w")
+ simulation_res_file.write("%s + %s -> %s\n" % (gene_file_root, sim_file_root, result_file))
+
+
+#_________________________________________________________________________________________
+#
+# Step 2:
+#
+# Statistical summary per gene/gwas file pair
+#
+# for n_file in NNN_pairs_of_input_files:
+# working_dir/simulation_results/n.*.simulation_res
+# -> working_dir/n.mean
+#
+#_________________________________________________________________________________________
+def generate_statistical_summary_params():
+ """
+ Custom function to summarising simulation results files per gene / gwas file pair
+ """
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+ result_glob_spec = "%s.*.simulation_res" % (gene_file_root)
+ result_files = glob.glob(os.path.join(working_dir, "simulation_results", result_glob_spec))
+ summary_file = os.path.join(working_dir, gene_file_root + ".mean")
+
+ yield result_files, summary_file
+
+
+
+ at follows(gwas_simulation)
+ at files(generate_statistical_summary_params)
+ at posttask(lambda : sys.stdout.write("\nAll finished: hooray!!!\n"))
+def statistical_summary (result_files, summary_file):
+ """
+ Simulate statistical summary
+ """
+
+ summary_file = open(summary_file, "w")
+ for f in result_files:
+ summary_file.write(open(f).read())
+ sleep(1)
+
+
+
+
+
+
+
+
+
+
+#888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# print pipeline or run pipeline
+#
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, long_winded=True)
+
+elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+
diff --git a/doc/static_data/example_scripts/play_with_colours.py b/doc/static_data/example_scripts/play_with_colours.py
new file mode 100644
index 0000000..5054aa4
--- /dev/null
+++ b/doc/static_data/example_scripts/play_with_colours.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python
+"""
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test
+#
+#
+# Copyright (c) 7/13/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+from optparse import OptionParser
+import StringIO
+
+parser = OptionParser(version="%play_with_colours 1.0",
+ usage = "\n\n play_with_colours "
+ "--flowchart FILE [options] "
+ "[--colour_scheme_index INT ] "
+ "[--key_legend_in_graph]")
+
+#
+# pipeline
+#
+parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+
+(options, remaining_args) = parser.parse_args()
+if not options.flowchart:
+ raise Exception("Missing mandatory parameter: --flowchart.\n")
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# up to date tasks
+#
+ at check_if_uptodate (lambda : (False, ""))
+def Up_to_date_task1(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Up_to_date_task2(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task2)
+def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task3)
+def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+#
+# Explicitly specified
+#
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+#
+# Tasks to run
+#
+ at follows(Explicitly_specified_task)
+def Task_to_run1(infile, outfile):
+ pass
+
+
+ at follows(Task_to_run1)
+def Task_to_run2(infile, outfile):
+ pass
+
+ at follows(Task_to_run2)
+def Task_to_run3(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Task_to_run2)
+def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+#
+# Final target
+#
+ at follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+def Final_target(infile, outfile):
+ pass
+
+#
+# Ignored downstream
+#
+ at follows(Final_target)
+def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+from collections import defaultdict
+custom_flow_chart_colour_scheme = defaultdict(dict)
+
+#
+# Base chart on this overall colour scheme index
+#
+custom_flow_chart_colour_scheme["colour_scheme_index"] = options.colour_scheme_index
+
+#
+# Overriding colours
+#
+if options.colour_scheme_index == None:
+ custom_flow_chart_colour_scheme["Vicious cycle"]["linecolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Pipeline"]["fontcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Key"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ custom_flow_chart_colour_scheme["Task to run"]["linecolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ custom_flow_chart_colour_scheme["Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Final target"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fillcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["color"] = "white"
+ custom_flow_chart_colour_scheme["Vicious cycle"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fillcolor"] = '"#B8CC6E"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Down stream"]["fillcolor"] = "white"
+ custom_flow_chart_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["color"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Task to run"]["fillcolor"] = '"#EBF3FF"'
+ custom_flow_chart_colour_scheme["Task to run"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+if __name__ == '__main__':
+ pipeline_printout_graph (
+
+ open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+
+ # final targets
+ [Final_target, Up_to_date_final_target],
+
+ # Explicitly specified tasks
+ [Explicitly_specified_task],
+
+ # Do we want key legend
+ no_key_legend = not options.key_legend_in_graph,
+
+ # Print all the task types whether used or not
+ minimal_key_legend = False,
+
+ user_colour_scheme = custom_flow_chart_colour_scheme,
+ pipeline_name = "Colour schemes")
+
+
+
+
+
+
+
+
diff --git a/doc/static_data/example_scripts/ruffus_template.py b/doc/static_data/example_scripts/ruffus_template.py
new file mode 100644
index 0000000..69a082d
--- /dev/null
+++ b/doc/static_data/example_scripts/ruffus_template.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+"""
+
+ ruffus_template.py
+ [--log_file PATH]
+ [--verbose]
+ [--target_tasks]
+ [--jobs]
+ [--just_print]
+ [--flowchart]
+ [--key_legend_in_graph]
+ [--forced_tasks]
+
+"""
+import sys, os
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%prog 1.0", usage = "\n\n %progs [options]")
+
+
+
+ #
+ # general options: verbosity / logging
+ #
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+ parser.add_option("-L", "--log_file", dest="log_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of log file")
+
+
+
+
+ #
+ # pipeline
+ #
+ parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="N",
+ type="int",
+ help="Allow N jobs (commands) to run simultaneously.")
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Don't actually run any commands; just print the pipeline.")
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+
+ #
+ # Less common pipeline options
+ #
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+ parser.add_option("--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+
+ # get help string
+ f =StringIO.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+ (options, remaining_args) = parser.parse_args()
+
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Change this if necessary #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ #
+ # Add names of mandatory options,
+ # strings corresponding to the "dest" parameter
+ # in the options defined above
+ #
+ mandatory_options = [ ]
+
+ #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ # #
+ # Change this if necessary #
+ # #
+ #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+ def check_mandatory_options (options, mandatory_options, helpstr):
+ """
+ Check if specified mandatory options have b een defined
+ """
+ missing_options = []
+ for o in mandatory_options:
+ if not getattr(options, o):
+ missing_options.append("--" + o)
+
+ if not len(missing_options):
+ return
+
+ raise Exception("Missing mandatory parameter%s: %s.\n\n%s\n\n" %
+ ("s" if len(missing_options) > 1 else "",
+ ", ".join(missing_options),
+ helpstr))
+ check_mandatory_options (options, mandatory_options, helpstr)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+#from json import dumps
+#from collections import defaultdict
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Logger
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+if __name__ == '__main__':
+ import logging
+ import logging.handlers
+
+ MESSAGE = 15
+ logging.addLevelName(MESSAGE, "MESSAGE")
+
+ def setup_std_logging (logger, log_file, verbose):
+ """
+ set up logging using programme options
+ """
+ class debug_filter(logging.Filter):
+ """
+ Ignore INFO messages
+ """
+ def filter(self, record):
+ return logging.INFO != record.levelno
+
+ class NullHandler(logging.Handler):
+ """
+ for when there is no logging
+ """
+ def emit(self, record):
+ pass
+
+ # We are interesting in all messages
+ logger.setLevel(logging.DEBUG)
+ has_handler = False
+
+ # log to file if that is specified
+ if log_file:
+ handler = logging.FileHandler(log_file, delay=False)
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s"))
+ handler.setLevel(MESSAGE)
+ logger.addHandler(handler)
+ has_handler = True
+
+ # log to stderr if verbose
+ if verbose:
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ if log_file:
+ stderrhandler.addFilter(debug_filter())
+ logger.addHandler(stderrhandler)
+ has_handler = True
+
+ # no logging
+ if not has_handler:
+ logger.addHandler(NullHandler())
+
+
+ #
+ # set up log
+ #
+ logger = logging.getLogger(module_name)
+ setup_std_logging(logger, options.log_file, options.verbose)
+
+ #
+ # Allow logging across Ruffus pipeline
+ #
+ def get_logger (logger_name, args):
+ return logger
+
+ from ruffus.proxy_logger import *
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (get_logger,
+ module_name,
+ {})
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Put pipeline code here
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose=options.verbose)
+
+ elif options.flowchart:
+ pipeline_printout_graph ( open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+ options.target_tasks,
+ options.forced_tasks,
+ no_key_legend = not options.key_legend_in_graph)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks,
+ multiprocess = options.jobs,
+ logger = stderr_logger,
+ verbose = options.verbose)
+
diff --git a/doc/static_data/example_scripts/simpler.py b/doc/static_data/example_scripts/simpler.py
new file mode 100644
index 0000000..c858130
--- /dev/null
+++ b/doc/static_data/example_scripts/simpler.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python2.5
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+import StringIO
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="store_true", default=False,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+import json
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def create_custom_file_func(params):
+ """
+ creates function which can be used as input to @files_func
+ """
+ def cust_func ():
+ for job_param in params:
+ yield job_param
+ return cust_func
+
+
+def is_job_uptodate (infiles, outfiles, *extra_params):
+ """
+ assumes first two parameters are files, checks if they are up to date
+ """
+ return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
+
+
+
+def test_post_task_function ():
+ print "Hooray"
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+
+
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+ time.sleep(1)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# task1
+#
+ at files(None, 'a.1')
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task2
+#
+ at files_re('*.1', '(.*).1', r'\1.1', r'\1.2')
+ at follows(task1)
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task3
+#
+ at files_re('*.1', '(.*).1', r'\1.2', r'\1.3')
+ at follows(task2)
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task4
+#
+ at files_re('*.1', '(.*).1', r'\1.3', r'\1.4')
+ at follows(task3)
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
diff --git a/doc/static_data/ruffus.css b/doc/static_data/ruffus.css
new file mode 100644
index 0000000..4734d47
--- /dev/null
+++ b/doc/static_data/ruffus.css
@@ -0,0 +1,327 @@
+/*
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+ at import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: sans-serif;
+ font-size: 100%;
+ background-color: #11303d;
+ color: #000;
+ margin: 0;
+ padding: 0;
+}
+
+div.document {
+ background-color: #1c4e63;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 0 0 0 230px;
+}
+
+div.body {
+ background-color: #ffffff;
+ color: #000000;
+ padding: 0 20px 30px 20px;
+}
+
+div.footer {
+ color: #ffffff;
+ width: 100%;
+ padding: 9px 0 9px 0;
+ text-align: center;
+ font-size: 75%;
+}
+
+div.footer a {
+ color: #ffffff;
+ text-decoration: underline;
+}
+
+div.related {
+ background-color: #133f52;
+ line-height: 30px;
+ color: #ffffff;
+}
+
+div.related a {
+ font-size: 120%; /* new */
+ color: #c0c0FF; /* new */
+ /*color: #ffffff; /* orig */
+}
+
+div.sphinxsidebar {
+}
+
+div.sphinxsidebar h3 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.4em;
+ font-weight: normal;
+ margin: 0;
+ padding: 0;
+}
+
+div.sphinxsidebar h3 a {
+ color: #98dbcc;
+ margin: 0;
+ padding: 0;
+/* margin: 0px 0px 0px 0px;
+ padding: 0px 0px 0px 0px;*/
+}
+
+
+
+div.sphinxsidebar h4 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.3em;
+ font-weight: normal;
+ margin: 5px 0 0 0;
+ padding: 0;
+}
+
+div.sphinxsidebar p {
+ color: #ffffff;
+}
+
+div.sphinxsidebar p.topless {
+ margin: 5px 10px 10px 10px;
+}
+
+div.sphinxsidebar ul {
+ font-size: 100%; /* NEW */
+ margin: 10px;
+ padding: 0;
+ color: #ffffff;
+}
+
+div.sphinxsidebar a {
+ color: #98dbcc;
+}
+
+div.sphinxsidebar a em{
+ color: #98dbcc;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+ color: #355f7c;
+ text-decoration: none;
+}
+
+a:hover {
+ text-decoration: underline;
+}
+
+div.body p, div.body dd, div.body li {
+ text-align: justify;
+ line-height: 130%;
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: 'Trebuchet MS', sans-serif;
+ background-color: #f2f2f2;
+ font-weight: normal;
+ color: #20435c;
+ border-bottom: 1px solid #ccc;
+ margin: 20px -20px 10px -20px;
+ padding: 3px 0 3px 10px;
+}
+
+div.body h1 { margin-top: 0; font-size: 200%; color: #0088FF }
+div.body h2 { font-size: 200%; color: #000000 }
+div.body h3 { font-size: 140%; color: #800080; margin-left: 0px; margin-top: 40px ;font-style:italic;}
+div.body h4 { font-size: 110%; color: #008000; margin-left: 10px; margin-top: 40px;background-color: #F0F0F0;}
+div.body h5 { font-size: 90%; }
+div.body h6 { font-size: 80%; }
+
+div.body h3 cite
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0000ff;
+ font-style: normal;
+ font-size: 90%;
+}
+
+div.body h3 em
+{
+ font-weight: normal;
+ color: #000000;
+ font-style: normal;
+ font-size: 80%;
+}
+
+div.body h2 em
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #800080;
+ font-style: normal;
+ font-size: 100%;
+}
+
+div.body h2 cite
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0000ff;
+ font-style: italic;
+ font-size: 80%;
+}
+
+div.body h2 a
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0000ff;
+ font-style: italic;
+ font-size: 80%;
+}
+
+div.body h1 em
+{
+ font-family: monospace;
+ font-weight: normal;
+ color: #0088FF;
+ font-size: 100%;
+}
+
+dt em
+{
+ font-family: monospace;
+ color: #0000ff;
+ font-style: italic;
+ font-size: 120%;
+}
+
+
+a.headerlink {
+ color: #c60f0f;
+ font-size: 0.8em;
+ padding: 0 4px 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ background-color: #c60f0f;
+ color: white;
+}
+
+div.body p, div.body dd, div.body li {
+ text-align: justify;
+ line-height: 130%;
+}
+
+div.admonition p.admonition-title + p {
+ display: inline;
+}
+
+div.note {
+ background-color: #eee;
+ border: 1px solid #ccc;
+}
+
+div.seealso {
+ background-color: #ffc;
+ border: 1px solid #ff6;
+}
+
+div.topic {
+ background-color: #eee;
+}
+
+div.warning {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre {
+ padding: 5px;
+ background-color: #eeffcc;
+ color: #333333;
+ line-height: 120%;
+ border: 1px solid #ac9;
+ border-left: none;
+ border-right: none;
+}
+
+tt {
+ background-color: #ecf0f3;
+ padding: 0 1px 0 1px;
+ font-size: 0.95em;
+}
+
+
+/*
+div.sphinxsidebar h3 {
+ font-family: 'Trebuchet MS', sans-serif;
+ color: #ffffff;
+ font-size: 1.4em;
+ font-weight: normal;
+ margin: 0 0 0 0;
+ padding: 0 0 0 0;
+}
+
+div.sphinxsidebar h3 a{
+ color: #98dbcc;
+ margin: 0px 0px 0px 0px;
+ padding: 0px 0px 0px 0px;
+}
+
+*/
+
+
+
+/*
+
+ Override Pygment style so that I can annotate the code
+
+ The key style are comments within highlighted sections. I have set these to have a
+ white background with big bold red text, so that it stands out from and does not appear
+ to be part of the surrounding code. This way I don't have to screen shot and highlight
+ manually the code in photoshop, and the embedded bitmapped code doesn't get out of date,...
+ and the code is selectable, and it fits into a sane workflow etc. etc.
+
+ I use :nth-of-type(n) pseudo class (but one which matches everything)
+ so that this is a specialisation of the css .hll class style in pygments.css
+
+ Nasty hack but this way I don't have to modify pygments.css by hand each time, and I don't have
+ to create my own pygments style.
+
+*/
+
+.highlight .nd:nth-of-type(n) { background-color: #ffff88; color: red; font-weight: bold; outline:red dotted thin}/**/
+.highlight .hll:nth-of-type(n) { background-color: #eeffcc; font-weight: bold}
+.highlight .hll .c:nth-of-type(n) { background-color: #ffffff; color: red; font-weight: bold; font-style: normal;font-size:120%} /* Comment */
diff --git a/doc/static_data/ruffus.pdf b/doc/static_data/ruffus.pdf
new file mode 100644
index 0000000..68836b8
Binary files /dev/null and b/doc/static_data/ruffus.pdf differ
diff --git a/doc/task.rst b/doc/task.rst
new file mode 100644
index 0000000..c84bee1
--- /dev/null
+++ b/doc/task.rst
@@ -0,0 +1,138 @@
+.. include:: global.inc
+#####################
+ruffus.Task
+#####################
+
+.. automodule:: ruffus.task
+ :undoc-members:
+ :noindex:
+
+***************************************
+Decorators
+***************************************
+ Basic Task decorators are:
+
+ :ref:`@follows() <decorators.follows>`
+
+ and
+
+ :ref:`@files() <decorators.files>`
+
+ Task decorators include:
+
+ :ref:`@split() <decorators.files>`
+
+ :ref:`@transform() <decorators.files>`
+
+ :ref:`@merge() <decorators.files>`
+
+ :ref:`@posttask() <decorators.posttask>`
+
+ More advanced users may require:
+
+ :ref:`@transform() <decorators.transform_ex>`
+
+ :ref:`@collate() <decorators.collate>`
+
+ :ref:`@parallel() <decorators.parallel>`
+
+ :ref:`@check_if_uptodate() <decorators.check_if_uptodate>`
+
+ :ref:`@files_re() <decorators.files_re>`
+
+
+***************************************
+Pipeline functions
+***************************************
+========================
+pipeline_run
+========================
+.. autofunction:: pipeline_run (target_tasks, forcedtorun_tasks=[], multiprocess=1, logger=stderr_logger, gnu_make_maximal_rebuild_mode=True)
+
+========================
+pipeline_printout
+========================
+.. autofunction:: pipeline_printout
+
+========================
+pipeline_printout_graph
+========================
+.. autofunction:: pipeline_printout_graph
+
+
+.. ???
+
+
+***************************************
+Logging
+***************************************
+.. autoclass:: t_black_hole_logger
+.. autoclass:: t_stderr_logger
+
+.. ???
+
+
+***************************************
+Implementation:
+***************************************
+=================================
+Parameter factories:
+=================================
+.. autofunction:: merge_param_factory
+.. autofunction:: collate_param_factory
+.. autofunction:: transform_param_factory
+.. autofunction:: files_param_factory
+.. autofunction:: args_param_factory
+.. autofunction:: split_param_factory
+
+.. ???
+
+
+=================================
+Wrappers around jobs:
+=================================
+.. autofunction:: job_wrapper_generic
+.. autofunction:: job_wrapper_io_files
+.. autofunction:: job_wrapper_mkdir
+
+.. ???
+
+
+
+
+=================================
+Checking if job is update:
+=================================
+.. autofunction:: needs_update_check_modify_time
+.. autofunction:: needs_update_check_directory_missing
+
+.. ???
+
+
+***************************************
+Exceptions and Errors
+***************************************
+.. autoclass::task_FilesArgumentsError
+.. autoclass::task_FilesreArgumentsError
+.. autoclass::JobSignalledBreak
+.. autoclass::MissingInputFileError
+.. autoclass::PostTaskArgumentError
+.. autoclass::error_making_directory
+.. autoclass::error_duplicate_task_name
+.. autoclass::error_decorator_args
+.. autoclass::error_task_name_lookup_failed
+.. autoclass::error_task_decorator_takes_no_args
+.. autoclass::error_function_is_not_a_task
+.. autoclass::error_circular_dependencies
+.. autoclass::error_not_a_directory
+.. autoclass::error_missing_output
+.. autoclass::error_job_signalled_interrupt
+
+
+
+.. ???
+
+
+
+
+
diff --git a/doc/todo.rst b/doc/todo.rst
new file mode 100644
index 0000000..678f521
--- /dev/null
+++ b/doc/todo.rst
@@ -0,0 +1,500 @@
+.. include:: global.inc
+
+.. _todo:
+
+##########################################
+Future Changes to Ruffus
+##########################################
+
+ I would appreciated feedback and help on all these issues and where next to take *ruffus*.
+
+
+ **Future Changes** are features where we more or less know where we are going and how to get there.
+
+ **Planned Improvements** describes features we would like in Ruffus but where the implementation
+ or syntax has not yet been (fully) worked out.
+
+ If you have suggestions or contributions, please either write to me ( ruffus_lib at llew.org.uk) or
+ send a pull request via the `git site <https://github.com/bunbun/ruffus>`__.
+
+
+.. _todo.inactive_tasks_in_pipeline_printout_graph:
+
+********************************************************************************************************
+Todo: pipeline_printout_graph should print inactive tasks
+********************************************************************************************************
+
+
+.. _todo.dynamic_strings:
+
+********************************************************************************************************
+Todo: Mark input strings as non-file names, and add support for dynamically returned parameters
+********************************************************************************************************
+
+ 1. Use indicator object.
+ 2. What is a good name? ``"output_from()"``, ``"NOT_FILE_NAME"`` :-)
+ 3. They will still participate in suffix, formatter and regex replacement
+
+ Bernie Pope suggests that we should generalise this:
+
+
+ If any object in the input parameters is a (non-list/tuple) class instance, check (getattr) whether it has a ``ruffus_params()`` function.
+ If it does, call it to obtain a list which is substituted in place.
+ If there are string nested within, these will also take part in Ruffus string substitution.
+ Objects with ``ruffus_params()`` always "decay" to the results of the function call
+
+ ``output_from`` would be a simple wrapper which returns the internal string via ``ruffus_params()``
+
+ .. code-block:: python
+
+ class output_from (object):
+ def __init__(self, str):
+ self.str = str
+ def ruffus_params(self):
+ return [self.str]
+
+ Returning a list should be like wildcards and should not introduce an unnecessary level of indirection for output parameters, i.e. suffix(".txt") or formatter() / "{basename[0]}" should work.
+
+ Check!
+
+
+.. _todo.extra_parameters:
+
+********************************************************************************************************
+Todo: Allow "extra" parameters to be used in output substitution
+********************************************************************************************************
+
+ Formatter substitution can refer to the original elements in the input and extra parameters (without converting them to strings either). This refers to the original (nested) data structure.
+
+ This will allow normal python datatypes to be handed down and slipstreamed into a pipeline more easily.
+
+ The syntax would use Ruffus (> version 2.4) formatter:
+
+ .. code-block:: python
+ :emphasize-lines: 2,3
+
+ @transform( ..., formatter(), [
+ "{EXTRAS[0][1][3]}", # EXTRAS
+ "[INPUTS[1][2]]"],...) # INPUTS
+ def taskfunc():
+ pass
+
+ ``EXTRA`` and ``INPUTS`` indicate that we are referring to the input and extra parameters.
+
+ These are the full (nested) parameters in all their original form. In the case of the input parameters, this obvious depends on the decorator, so
+
+ .. code-block:: python
+
+ @transform(["a.text", [1, "b.text"]], formatter(), "{INPUTS[0][0]}")
+ def taskfunc():
+ pass
+
+ would give
+
+ ::
+
+ job #1
+ input == "a.text"
+ output == "a"
+
+ job #2
+ input == [1, "b.text"]
+ output == 1
+
+
+ The entire string must consist of ``INPUTS`` or ``EXTRAS`` followed by optionally N levels of square brackets. i.e. They must match ``"(INPUTS|EXTRAS)(\[\d+\])+"``
+
+ No string conversion takes place.
+
+ For ``INPUTS`` or ``EXTRAS`` which have objects with a ``ruffus_params()`` function (see Todo item above),
+ the original object rather than the result of ``ruffus_params()`` is forwarded.
+
+
+
+.. _todo.pre_post_job:
+
+********************************************************************************************************
+Todo: Extra signalling before and after each task and job
+********************************************************************************************************
+
+ .. code-block:: python
+
+ @prejob(custom_func)
+ @postjob(custom_func)
+ def task():
+ pass
+
+ ``@prejob`` / ``@postjob`` would be run in the child processes.
+
+
+.. _todo.new_decorators:
+
+******************************************************************************
+Todo: ``@split`` / ``@subdivide`` returns the actual output created
+******************************************************************************
+
+ * **overrides** (not replaces) wild cards.
+ * Returns a list, each with output and extra paramters.
+ * Won't include extraneous files which were not created in the pipeline but which just happened to match the wild card
+ * We should have ``ruffus_output_params``, ``ruffus_extra_params`` wrappers for clarity:
+
+ .. code-block:: python
+
+ @split("a.file", "*.txt")
+ def split_into_txt_files(input_file, output_files):
+ output_files = ["a.txt", "b.txt", "c.txt"]
+ for output_file_name in output_files:
+ with open(output_file_name, "w") as oo:
+ pass
+ return [
+ ruffus_output("a.file"),
+ [ruffus_output(["b.file", "c.file"]), ruffus_extras(13, 14)],
+ ]
+
+
+ * Consider yielding?
+
+==========================================================================================
+Checkpointing
+==========================================================================================
+
+ * If checkpoint file is used, the actual files are saved and checked the next time
+ * If no files are generated, no files are checked the next time...
+ * The output files do not have to match the wildcard though we can output a warning message if that happens...
+ This is obviously dangerous because the behavior will change if the pipeline is rerun without using the checkpoint file
+ * What happens if the task function changes?
+
+***************************************
+Todo: New decorators
+***************************************
+
+==============================================================================
+Todo: ``@originate``
+==============================================================================
+
+ Each (serial) invocation returns lists of output parameters until returns
+ None. (Empty list = ``continue``, None = ``break``).
+
+
+
+==============================================================================
+Todo: ``@recombine``
+==============================================================================
+
+ Like ``@collate`` but automatically regroups jobs which were a result of a previous ``@subdivide`` / ``@split`` (even after intervening ``@transform`` )
+
+ This is the only way job trickling can work without stalling the pipeline: We would know
+ how many jobs were pending for each ``@recombine`` job and which jobs go together.
+
+****************************************************************************************
+Todo: Named parameters in decorators for clarity
+****************************************************************************************
+
+.. _todo.bioinformatics_example:
+
+********************************************************************************************************
+Todo: Bioinformatics example to end all examples
+********************************************************************************************************
+
+ Uses
+ * ``@product``
+ * ``@subdivide``
+ * ``@transform``
+ * ``@collate``
+ * ``@merge``
+
+****************************************************************************************
+Todo: Allow the next task to start before all jobs in the previous task have finished
+****************************************************************************************
+
+ Jake (Biesinger) calls this **Job Trickling**!
+
+ * A single long running job no longer will hold up the entire pipeline
+ * Calculates dependencies dynamically at the job level.
+ * Goal is to have a long running (months) pipeline to which we can keep adding input...
+ * We can choose between prioritising completion of the entire pipeline for some jobs
+ (depth first) or trying to complete as many tasks as possible (breadth first)
+
+==============================================================================
+Converting to per-job rather than per task dependencies
+==============================================================================
+ Some decorators prevent per job (rather than per task) dependency calculations, and
+ will call a pipeline stall until the dependent tasks are completed (the current situation):
+
+ * Some types of jobs unavoidably depend on an entire previous task completing:
+ * ``add_inputs()``, ``inputs()``
+ * ``@merge``
+ * ``@split`` (implicit ``@merge``)
+ * ``@split``, ``@originate`` produce variable amount of output at runtime and must be completed before the next task can be run.
+ * Should ``yield`` instead of return?
+ * ``@collate`` needs to pattern match all the inputs of a previous task
+ * Replace ``@collate`` with ``@recombine`` which "remembers" and reverses the results of a previous
+ ``@subdivide`` or ``@split``
+ * Jobs need unique job_id tag
+ * Jobs are assigned (nested) grouping id which accompany them down the
+ pipeline after ``@subdivide`` / ``@split`` and are removed after ``@recombine``
+ * Should have a count of jobs so we always know *when* an "input slot" is full
+ * Funny "single file" mode for ``@transform,`` ``@files`` needs to be
+ regularised so it is a syntactic (front end) convenience (oddity!)
+ and not plague the inards of ruffus
+
+
+ Breaking change: to force the entirety of the previous task to complete before the next one, use ``@follows``
+
+==============================================================================
+Implementation
+==============================================================================
+
+ * "Push" model. Completing jobs "check in" their outputs to "input slots" for all the sucessor jobs.
+ * When "input slots" are full for any job, it is put on the dispatch queue to be run.
+ * The priority (depth first or breadth first) can be set here.
+ * ``pipeline_run`` / ``Pipeline_printout`` create a task dependency tree structure (from decorator dependencies) (a runtime pipeline object)
+ * Each task in the pipeline object knows which other tasks wait on it.
+ * When output is created by a job, it sends messages to (i.e. function calls) all dependent tasks in the pipeline object with the new output
+ * Sets of output such as from ``@split`` and ``@subdivide`` and ``@originate`` have a
+ terminating condition and/or a associated count (# of output)
+ * Tasks in the pipeline object forward incoming inputs to task input slots (for slots common to all jobs in a
+ task: ``@inputs``, ``@add_inputs``) or to slots in new jobs in the pipeline object
+ * When all slots are full in each job, this triggers putting the job parameters onto the job submission queue
+ * The pipeline object should allow Ruffus to be reentrant?
+
+
+
+##########################################
+Planned Improvements to Ruffus
+##########################################
+
+.. _todo.run_on_cluster:
+
+
+
+ * ``@split`` needs to be able to specify at run time the number of
+ resulting jobs without using wild cards
+ * legacy support for wild cards and file names.
+
+
+********************************************************************************************************
+Planned: Running python code (task functions) transparently on remote cluster nodes
+********************************************************************************************************
+
+ Wait until next release.
+
+ Will bump Ruffus to v.3.0 if can run python jobs transparently on a cluster!
+
+ abstract out ``task.run_pooled_job_without_exceptions()`` as a function which can be supplied to ``pipeline_run``
+
+ Common "job" interface:
+
+ * marshalled arguments
+ * marshalled function
+ * submission timestamp
+
+ Returns
+ * completion timestamp
+ * returned values
+ * exception
+
+ #) Full version use libpythongrid?
+ * Christian Widmer <ckwidmer at gmail.com>
+ * Cheng Soon Ong <chengsoon.ong at unimelb.edu.au>
+ * https://code.google.com/p/pythongrid/source/browse/#git%2Fpythongrid
+ * Probably not good to base Ruffus entirely on libpythongrid to minimise dependencies, the use of sophisticated configuration policies etc.
+ #) Start with light-weight file-based protocol
+ * specify where the scripts should live
+ * use drmaa to start jobs
+ * have executable ruffus module which knows how to load deserialise (unmarshall) function / parameters from disk. This would be what drmaa starts up, given the marshalled data as an argument
+ * time stamp
+ * "heart beat" to check that the job is still running
+ #) Next step: socket-based protocol
+ * use specified master port in ruffus script
+ * start remote processes using drmaa
+ * child receives marshalled data and the address::port in the ruffus script (head node) to initiate hand shake or die
+ * process recycling: run successive jobs on the same remote process for reduced overhead, until exceeds max number of jobs on the same process, min/max time on the same process
+ * resubmit if die (Don't do sophisticated stuff like libpythongrid).
+
+.. _todo.job_trickling:
+
+
+
+.. _todo.custom_parameters:
+
+************************************
+Planned: Custom parameter generator
+************************************
+
+ Request on mailing list
+
+ I've often wished that I could use an arbitrary function to process the input filepath instead of just a regex.
+
+ .. code-block:: python
+
+ def f(inputs, outputs, extra_param1, extra_param2):
+ # do something to generate parameters
+ return new_output_param, new_extra_param1, new_extra_param2
+
+ now f() can be used inside a Ruffus decorator to generate the outputs from inputs, instead of being forced to use a regex for the job.
+
+ Cheers,
+ Bernie.
+
+ Leverages built-in Ruffus functionality.
+ Don't have to write entire parameter generation from scratch.
+
+ * Gets passed an iterator where you can do a for loop to get input parameters / a flattened list of files
+ * Other parameters are forwarded as is
+ * The duty of the function is to ``yield`` input, output, extra parameters
+
+
+ Simple to do but how do we prevent this from being a job-trickling barrier?
+
+ Postpone until we have an initial design for job-trickling: Ruffus v.4 ;-(
+
+
+.. _todo.gui:
+
+****************************************************************************
+Planned: Ruffus GUI interface.
+****************************************************************************
+
+ Desktop (PyQT or web-based solution?) I'd love to see an svg pipeline picture that I could actually interact with
+
+
+********************************************************************************************************
+Planned: Non-decorator / Function interface to Ruffus
+********************************************************************************************************
+
+
+.. _todo.intermediate_files:
+
+********************************************************************************************************
+Planned: Remove intermediate files
+********************************************************************************************************
+
+ Often large intermediate files are produced in the middle of a pipeline which could be
+ removed. However, their absence would cause the pipeline to appear out of date. What is
+ the best way to solve this?
+
+ In gmake, all intermediate files which are not marked ``.PRECIOUS`` are deleted.
+
+ We do not want to manually mark intermediate files for several reasons:
+ * The syntax would be horrible and clunky
+ * The gmake distinction between ``implicit`` and ``explicit`` rules is not one we
+ would like to impose on Ruffus
+ * Gmake uses statically determined (DAG) dependency trees so it is quite natural and
+ easy to prune intermediate paths
+
+ Our preferred solution should impose little to no semantic load on Ruffus, i.e. it should
+ not make it more complex / difficult to use. There are several alternatives we are
+ considering:
+
+ #) Have an **update** mode in which pipeline_run would ignore missing files and only run tasks with existing, out-of-date files.
+ #) Optionally ignore all out-of-date dependencies beyond a specified point in the pipeline
+ #) Add a decorator to flag sections of the pipeline where intermediate files can be removed
+
+
+ Option (1) is rather unnerving because it makes inadvertent errors difficult to detect.
+
+ Option (2) involves relying on the user of a script to remember the corect chain of dependencies in
+ often complicated pipelines. It would be advised to keep a flowchart to hand. Again,
+ the chances of error are much greater.
+
+ Option (3) springs from the observation by Andreas Heger that parts of a pipeline with
+ disposable intermediate files can usually be encapsulated as an autonomous section.
+ Within this subpipeline, all is well provided that the outputs of the last task are complete
+ and up-to-date with reference to the inputs of the first task. Intermediate files
+ could be removed with impunity.
+
+ The suggestion is that these autonomous subpipelines could be marked out using the Ruffus
+ decorator syntax::
+
+ #
+ # First task in autonomous subpipeline
+ #
+ @files("who.isit", "its.me")
+ def first_task(*args):
+ pass
+
+ #
+ # Several intermediate tasks
+ #
+ @transform(subpipeline_task1, suffix(".me"), ".her")
+ def task2_etc(*args):
+ pass
+
+ #
+ # Final task
+ #
+ @sub_pipeline(subpipeline_task1)
+ @transform(subpipeline_task1, suffix(".her"), ".you")
+ def final_task(*args):
+ pass
+
+ **@sub_pipeline** marks out all tasks between ``first_task`` and ``final_task`` and
+ intermediate files such as ``"its.me"``, ``"its.her`` can be deleted. The pipeline will
+ only run if ``"its.you"`` is missing or out-of-date compared with ``"who.isit"``.
+
+ Over the next few Ruffus releases we will see if this is a good design, and whether
+ better keyword can be found than **@sub_pipeline** (candidates include **@shortcut**
+ and **@intermediate**)
+
+
+.. _todo.retry:
+
+********************************************************************************************************
+Planned: @retry_on_error(NUM_OF_RETRIES)
+********************************************************************************************************
+
+.. _todo.cleanup:
+
+********************************************************************************************************
+Planned: Clean up
+********************************************************************************************************
+
+ The plan is to store the files and directories created via
+ a standard interface.
+
+ The placeholders for this are a function call ``register_cleanup``.
+
+ Jobs can specify the files they created and which need to be
+ deleted by returning a list of file names from the job function.
+
+ So::
+
+ raise Exception = Error
+
+ return False = halt pipeline now
+
+ return string / list of strings = cleanup files/directories later
+
+ return anything else = ignored
+
+
+ The cleanup file/directory store interface can be connected to
+ a text file or a database.
+
+ The cleanup function would look like this::
+
+ pipeline_cleanup(cleanup_log("../cleanup.log"), [instance ="october19th" ])
+ pipeline_cleanup(cleanup_msql_db("user", "password", "hash_record_table"))
+
+ The parameters for where and how to store the list of created files could be
+ similarly passed to pipeline_run as an extra parameter::
+
+ pipeline_run(cleanup_log("../cleanup.log"), [instance ="october19th" ])
+ pipeline_run(cleanup_msql_db("user", "password", "hash_record_table"))
+
+ where `cleanup_log` and `cleanup_msql_db` are classes which have functions for
+
+ #) storing file
+ #) retrieving file
+ #) clearing entries
+
+
+ * Files would be deleted in reverse order, and directories after files.
+ * By default, only empty directories would be removed.
+
+ But this could be changed with a ``--forced_remove_dir`` option
+
+ * An ``--remove_empty_parent_directories`` option would be
+ supported by `os.removedirs(path) <http://docs.python.org/library/os.html#os.removedirs>`_.
+
diff --git a/doc/tutorials/new_tutorial/active_if.rst b/doc/tutorials/new_tutorial/active_if.rst
new file mode 100644
index 0000000..ce51051
--- /dev/null
+++ b/doc/tutorials/new_tutorial/active_if.rst
@@ -0,0 +1,149 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: @active_if; Tutorial
+
+.. _new_manual.active_if:
+
+##########################################################################################################################################
+|new_manual.active_if.chapter_num|: Turning parts of the pipeline on and off at runtime with :ref:`@active_if <decorators.active_if>`
+##########################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@active_if syntax in detail <decorators.active_if>`
+
+
+***************************************
+Overview
+***************************************
+
+ It is sometimes useful to be able to switch on and off parts of a pipeline. For example, a pipeline
+ might have two difference code paths depending on the type of data it is being asked to analyse.
+
+ One surprisingly easy way to do this is to use a python ``if`` statement around particular task functions:
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ from ruffus import *
+
+ run_task1 = True
+
+ @originate(['a.foo', 'b.foo'])
+ def create_files(output_file):
+ open(output_file, "w")
+
+
+ if run_task1:
+ # might not run
+ @transform(create_files, suffix(".foo"), ".bar")
+ def foobar(input_file, output_file):
+ open(output_file, "w")
+
+
+ @transform(foobar, suffix(".bar"), ".result")
+ def wrap_up(input_file, output_file):
+ open(output_file, "w")
+
+
+ pipeline_run()
+
+
+ This simple solution has a number of drawbacks:
+ #. The on/off decision is a one off event that happens when the script is loaded. Ideally, we
+ would like some flexibility, and postpone the decision until ``pipeline_run()`` is invoked.
+ #. When ``if`` is false, the entire task function becomes invisible, and if there are any
+ downstream tasks, as in the above example, *Ruffus* will complain loudly about
+ missing dependencies.
+
+
+******************************************************************************
+:ref:`@active_if <decorators.active_if>` controls the state of tasks
+******************************************************************************
+
+
+ * Switches tasks on and off at run time depending on its parameters
+ * Evaluated each time ``pipeline_run``, ``pipeline_printout`` or ``pipeline_printout_graph`` is called.
+ * Dormant tasks behave as if they are up to date and have no output.
+
+ The Design and initial implementation were contributed by Jacob Biesinger
+
+ The following example shows its flexibility and syntax:
+
+ .. code-block:: python
+ :emphasize-lines: 20
+
+ from ruffus import *
+ run_if_true_1 = True
+ run_if_true_2 = False
+ run_if_true_3 = True
+
+
+ #
+ # task1
+ #
+ @originate(['a.foo', 'b.foo'])
+ def create_files(outfile):
+ """
+ create_files
+ """
+ open(outfile, "w").write(outfile + "\n")
+
+ #
+ # Only runs if all three run_if_true conditions are met
+ #
+ # @active_if determines if task is active
+ @active_if(run_if_true_1, lambda: run_if_true_2)
+ @active_if(run_if_true_3)
+ @transform(create_files, suffix(".foo"), ".bar")
+ def this_task_might_be_inactive(infile, outfile):
+ open(outfile, "w").write("%s -> %s\n" % (infile, outfile))
+
+
+ # @active_if switches off task because run_if_true_2 == False
+ pipeline_run(verbose = 3)
+
+ # @active_if switches on task because all run_if_true conditions are met
+ run_if_true_2 = True
+ pipeline_run(verbose = 3)
+
+
+ The task starts off inactive:
+
+
+ .. code-block:: pycon
+ :emphasize-lines: 1
+
+ >>> # @active_if switches off task "this_task_might_be_inactive" because run_if_true_2 == False
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = create_files
+ create_files
+ Job = [None -> a.foo] Missing file [a.foo]
+ Job = [None -> b.foo] Missing file [b.foo]
+ Job = [None -> a.foo] completed
+ Job = [None -> b.foo] completed
+ Completed Task = create_files
+ Inactive Task = this_task_might_be_inactive
+
+ Now turn on the task:
+
+ .. code-block:: pycon
+ :emphasize-lines: 1
+
+ >>> # @active_if switches on task "this_task_might_be_inactive" because all run_if_true conditions are met
+ >>> run_if_true_2 = True
+ >>> pipeline_run(verbose = 3)
+
+ Task enters queue = this_task_might_be_inactive
+
+ Job = [a.foo -> a.bar] Missing file [a.bar]
+ Job = [b.foo -> b.bar] Missing file [b.bar]
+ Job = [a.foo -> a.bar] completed
+ Job = [b.foo -> b.bar] completed
+ Completed Task = this_task_might_be_inactive
+
diff --git a/doc/tutorials/new_tutorial/check_if_uptodate.rst b/doc/tutorials/new_tutorial/check_if_uptodate.rst
new file mode 100644
index 0000000..2323d29
--- /dev/null
+++ b/doc/tutorials/new_tutorial/check_if_uptodate.rst
@@ -0,0 +1,89 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: check_if_uptodate; Tutorial
+
+.. _new_manual.check_if_uptodate:
+
+########################################################################################################################################################################################################################################################################################################
+|new_manual.check_if_uptodate.chapter_num|: Esoteric: Writing custom functions to decide which jobs are up to date with :ref:`@check_if_uptodate<decorators.check_if_uptodate>`
+########################################################################################################################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@check_if_uptodate syntax in detail<decorators.check_if_uptodate>`
+
+
+******************************************************************************
+**@check_if_uptodate** : Manual dependency checking
+******************************************************************************
+ tasks specified with most decorators such as
+ * :ref:`@split <decorators.split>`
+ * :ref:`@transform <decorators.transform>`
+ * :ref:`@merge <decorators.merge>`
+ * :ref:`@collate <decorators.collate>`
+ * :ref:`@collate <decorators.subdivide>`
+
+ have automatic dependency checking based on file modification times.
+
+ Sometimes, you might want to decide have more control over whether to run jobs, especially
+ if a task does not rely on or produce files (i.e. with :ref:`@parallel <decorators.parallel>`)
+
+ You can write your own custom function to decide whether to run a job.
+ This takes as many parameters as your task function, and needs to return a
+ tuple for whether an update is required, and why (i.e. ``tuple(bool, str)``)
+
+ This simple example which creates the file ``"a.1"`` if it does not exist:
+
+ ::
+
+ from ruffus import *
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+ pipeline_run([])
+
+
+
+ could be rewritten more laboriously as:
+
+ ::
+
+
+ from ruffus import *
+ import os
+ def check_file_exists(input_file, output_file):
+ if os.path.exists(output_file):
+ return False, "File already exists"
+ return True, "%s is missing" % output_file
+
+ @parallel([[None, "a.1"]])
+ @check_if_uptodate(check_file_exists)
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+
+ Both produce the same output:
+ ::
+
+ Task = create_if_necessary
+ Job = [null, "a.1"] completed
+
+
+
+
+.. note::
+
+ The function specified by :ref:`@check_if_uptodate <decorators.check_if_uptodate>` can be called
+ more than once for each job.
+
+ See the :ref:`description here <new_manual.dependencies>` of how *Ruffus* decides which tasks to run.
+
+
diff --git a/doc/tutorials/new_tutorial/checkpointing.rst b/doc/tutorials/new_tutorial/checkpointing.rst
new file mode 100644
index 0000000..f512806
--- /dev/null
+++ b/doc/tutorials/new_tutorial/checkpointing.rst
@@ -0,0 +1,400 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: Up to date; Tutorial
+ pair: Task completion; Tutorial
+ pair: Exceptions; Tutorial
+ pair: Interrupted Pipeline; Tutorial
+
+.. _new_manual.checkpointing:
+
+######################################################################################################
+|new_manual.checkpointing.chapter_num|: Checkpointing: Interrupted Pipelines and Exceptions
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.checkpointing.code`
+
+
+
+***************************************
+Overview
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+
+ By default, *Ruffus* uses file modification times for the **input** and **output** to determine
+ whether each stage of a pipeline is up-to-date or not. But what happens when the task
+ function is interrupted, whether from the command line or by error, half way through writing the output?
+
+ In this case, the half-formed, truncated and corrupt **Output** file will look newer than its **Input** and hence up-to-date.
+
+
+.. index::
+ pair: Tutorial; interrupting tasks
+
+.. _new_manual.interrupting_tasks:
+
+***************************************
+Interrupting tasks
+***************************************
+ Let us try with an example:
+
+ .. code-block:: python
+ :emphasize-lines: 20
+
+ from ruffus import *
+ import sys, time
+
+ # create initial files
+ @originate(['job1.start'])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # long task to interrupt
+ #
+ @transform(create_initial_files, suffix(".start"), ".output")
+ def long_task(input_files, output_file):
+ with open(output_file, "w") as ff:
+ ff.write("Unfinished...")
+ # sleep for 2 seconds here so you can interrupt me
+ sys.stderr.write("Job started. Press ^C to interrupt me now...\n")
+ time.sleep(2)
+ ff.write("\nFinished")
+ sys.stderr.write("Job completed.\n")
+
+
+ # Run
+ pipeline_run([long_task])
+
+
+ When this script runs, it pauses in the middle with this message::
+
+ Job started. Press ^C to interrupt me now...
+
+ If you interrupted the script by pressing Control-C at this point, you will see that ``job1.output`` contains only ``Unfinished...``.
+ However, if you should rerun the interrupted pipeline again, Ruffus ignores the corrupt, incomplete file:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([long_task])
+ Job started. Press ^C to interrupt me now...
+ Job completed
+
+ And if you had run ``pipeline_printout``:
+
+ .. code-block:: pycon
+ :emphasize-lines: 8
+
+ >>> pipeline_printout(sys.stdout, [long_task], verbose=3)
+ ________________________________________
+ Tasks which will be run:
+
+ Task = long_task
+ Job = [job1.start
+ -> job1.output]
+ # Job needs update: Previous incomplete run leftover: [job1.output]
+
+
+ We can see that *Ruffus* magically knows that the previous run was incomplete, and that ``job1.output`` is detritus that needs to be discarded.
+
+
+.. _new_manual.logging_completed_jobs:
+
+******************************************
+Checkpointing: only log completed jobs
+******************************************
+
+ All is revealed if you were to look in the working directory. *Ruffus* has created a file called ``.ruffus_history.sqlite``.
+ In this `SQLite <https://sqlite.org/>`_ database, *Ruffus* logs only those files which are the result of a completed job,
+ all other files are suspect.
+ This file checkpoint database is a fail-safe, not a substitute for checking file modification times. If the **Input** or **Output** files are
+ modified, the pipeline will rerun.
+
+ By default, *Ruffus* saves only file timestamps to the SQLite database but you can also add a checksum of the pipeline task function body or parameters.
+ This behaviour can be controlled by setting the ``checksum_level`` parameter
+ in ``pipeline_run()``. For example, if you do not want to save any timestamps or checksums:
+
+ .. code-block:: python
+
+ pipeline_run(checksum_level = 0)
+
+ CHECKSUM_FILE_TIMESTAMPS = 0 # only rerun when the file timestamps are out of date (classic mode)
+ CHECKSUM_HISTORY_TIMESTAMPS = 1 # Default: also rerun when the history shows a job as being out of date
+ CHECKSUM_FUNCTIONS = 2 # also rerun when function body has changed
+ CHECKSUM_FUNCTIONS_AND_PARAMS = 3 # also rerun when function parameters or function body change
+
+
+ .. note::
+
+ Checksums are calculated from the `pickled <http://docs.python.org/2/library/pickle.html>`_ string for the function code and parameters.
+ If pickling fails, Ruffus will degrade gracefully to saving just the timestamp in the SQLite database.
+
+.. _new_manual.history_files_cannot_be_shared:
+
+****************************************************************************
+Do not share the same checkpoint file across for multiple pipelines!
+****************************************************************************
+
+ The name of the Ruffus python script is not saved in the checkpoint file along side timestamps and checksums.
+ That means that you can rename your pipeline source code file without having to rerun the pipeline!
+ The tradeoff is that if multiple pipelines are run from the same directory, and save their histories to the
+ same SQlite database file, and if their file names overlap (all of these are bad ideas anyway!), this is
+ bound to be a source of confusion.
+
+ Luckily, the name and path of the checkpoint file can be also changed for each pipeline
+
+.. _new_manual.changing_history_file_name:
+
+****************************************************************************
+Setting checkpoint file names
+****************************************************************************
+
+ .. warning::
+
+ Some file systems do not appear to support SQLite at all:
+
+ There are reports that SQLite databases have `file locking problems <http://beets.radbox.org/blog/sqlite-nightmare.html>`_ on Lustre.
+
+ The best solution would be to keep the SQLite database on an alternate compatible file system away from the working directory if possible.
+
+============================================================================================================================================================
+environment variable ``DEFAULT_RUFFUS_HISTORY_FILE``
+============================================================================================================================================================
+
+ The name of the checkpoint file is the value of the environment variable ``DEFAULT_RUFFUS_HISTORY_FILE``.
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/some/where/.ruffus_history.sqlite
+
+ This gives considerable flexibility, and allows a system-wide policy to be set so that all Ruffus checkpoint files are set logically to particular paths.
+
+ .. note::
+
+ It is your responsibility to make sure that the requisite destination directories for the checkpoint files exist beforehand!
+
+
+ Where this is missing, the checkpoint file defaults to ``.ruffus_history.sqlite`` in your working directory
+
+
+============================================================================================================================================================
+Setting the checkpoint file name manually
+============================================================================================================================================================
+
+ This checkpoint file name can always be overridden as a parameter to Ruffus functions:
+
+ .. code-block:: python
+
+ pipeline_run(history_file = "XXX")
+ pipeline_printout(history_file = "XXX")
+ pipeline_printout_graph(history_file = "XXX")
+
+
+ There is also built in support in ``Ruffus.cmdline``. So if you use this module, you can simply add to your command line:
+
+ .. code-block:: bash
+
+ # use a custom checkpoint file
+ myscript --checksum_file_name .myscript.ruffus_history.sqlite
+
+ This takes precedence over everything else.
+
+
+
+****************************************************************************
+Useful checkpoint file name policies ``DEFAULT_RUFFUS_HISTORY_FILE``
+****************************************************************************
+
+ If the pipeline script is called ``test/bin/scripts/run.me.py``, then these are the resulting checkpoint files locations:
+
+============================================================================================================================================================
+Example 1: same directory, different name
+============================================================================================================================================================
+ If the environment variable is:
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=.{basename}.ruffus_history.sqlite
+
+ Then the job checkpoint database for ``run.me.py`` will be ``.run.me.ruffus_history.sqlite``
+
+ .. code-block:: bash
+
+ /test/bin/scripts/run.me.py
+ /common/path/for/job_history/scripts/.run.me.ruffus_history.sqlite
+
+============================================================================================================================================================
+Example 2: Different directory, same name
+============================================================================================================================================================
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/common/path/for/job_history/.{basename}.ruffus_history.sqlite
+
+ .. code-block:: bash
+
+ /common/path/for/job_history/.run.me.ruffus_history.sqlite
+
+
+============================================================================================================================================================
+Example 2: Different directory, same name but keep one level of subdirectory to disambiguate
+============================================================================================================================================================
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/common/path/for/job_history/{subdir[0]}/.{basename}.ruffus_history.sqlite
+
+
+ .. code-block:: bash
+
+ /common/path/for/job_history/scripts/.run.me.ruffus_history.sqlite
+
+
+
+============================================================================================================================================================
+Example 2: nested in common directory
+============================================================================================================================================================
+
+ .. code-block:: bash
+
+ export DEFAULT_RUFFUS_HISTORY_FILE=/common/path/for/job_history/{path}/.{basename}.ruffus_history.sqlite
+
+ .. code-block:: bash
+
+ /common/path/for/job_history/test/bin/scripts/.run.me.ruffus_history.sqlite
+
+
+
+
+.. index::
+ pair: Tutorial; Regenerating the checkpoint file
+
+.. _new_manual.regenerating_history_file:
+
+******************************************************************************
+Regenerating the checkpoint file
+******************************************************************************
+
+ Occasionally you may need to re-generate the checkpoint file.
+
+ This could be necessary:
+
+ * because you are upgrading from a previous version of Ruffus without checkpoint file support
+ * on the rare occasions when the SQLite file becomes corrupted and has to deleted
+ * if you wish to circumvent the file checking of Ruffus after making some manual changes!
+
+ To do this, it is only necessary to call ``pipeline_run`` appropriately:
+
+ .. code-block:: python
+
+ CHECKSUM_REGENERATE = 2
+ pipeline(touch_files_only = CHECKSUM_REGENERATE)
+
+
+ Similarly, if you are using ``Ruffus.cmdline``, you can call:
+
+ .. code-block:: bash
+
+ myscript --recreate_database
+
+
+ Note that this regenerates the checkpoint file to reflect the existing *Input*, *Output* files on disk.
+ In other words, the onus is on you to make sure there are no half-formed, corrupt files. On the other hand,
+ the pipeline does not need to have been previously run successfully for this to work. Essentially, Ruffus,
+ pretends to run the pipeline, while logging all the files with consistent file modication times, stopping
+ at the first tasks which appear out of date or incomplete.
+
+
+.. index::
+ pair: rules; for rerunning jobs
+
+.. _new_manual.skip_up_to_date.rules:
+
+******************************************************************************
+Rules for determining if files are up to date
+******************************************************************************
+ The following simple rules are used by *Ruffus*.
+
+ #. The pipeline stage will be rerun if:
+
+ * If any of the **Input** files are new (newer than the **Output** files)
+ * If any of the **Output** files are missing
+
+ #. In addition, it is possible to run jobs which create files from scratch.
+
+ * If no **Input** file names are supplied, the job will only run if any *output* file is missing.
+
+ #. Finally, if no **Output** file names are supplied, the job will always run.
+
+
+
+.. index::
+ pair: Exception; Missing input files
+
+******************************************************************************
+Missing files generate exceptions
+******************************************************************************
+
+ If the *inputs* files for a job are missing, the task function will have no way
+ to produce its *output*. In this case, a ``MissingInputFileError`` exception will be raised
+ automatically. For example,
+
+ ::
+
+ task.MissingInputFileError: No way to run job: Input file ['a.1'] does not exist
+ for Job = ["a.1" -> "a.2", "A file"]
+
+.. index::
+ pair: Manual; Timestamp resolution
+
+******************************************************************************
+Caveats: Coarse Timestamp resolution
+******************************************************************************
+
+ Note that modification times have precision to the nearest second under some older file systems
+ (ext2/ext3?). This may be also be true for networked file systems.
+
+ *Ruffus* supplements the file system time resolution by independently recording the timestamp at
+ full OS resolution (usually to at least the millisecond) at job completion, when presumably the **Output**
+ files will have been created.
+
+ However, *Ruffus* only does this if the discrepancy between file time and system time is less than a second
+ (due to poor file system timestamp resolution). If there are large mismatches between the two, due for example
+ to network time slippage, misconfiguration etc, *Ruffus* reverts to using the file system time and adds a one second
+ delay between jobs (via ``time.sleep()``) to make sure input and output file stamps are different.
+
+ If you know that your filesystem has coarse-grained timestamp resolution, you can always revert to this very conservative behaviour,
+ at the prices of some annoying 1s pauses, by setting :ref:`pipeline_run(one_second_per_job = True) <pipeline_functions.pipeline_run>`
+
+
+
+.. index::
+ pair: Manual; flag files
+
+******************************************************************************
+Flag files: Checkpointing for the paranoid
+******************************************************************************
+
+ One other way of checkpointing your pipelines is to create an extra "flag" file as an additional
+ **Output** file name. The flag file is only created or updated when everything else in the
+ job has completed successifully and written to disk. A missing or out of date flag file then
+ would be a sign for Ruffus that the task never completed properly in the first place.
+
+ This used to be much the best way of performing checkpointing in Ruffus and is still
+ the most bulletproof way of proceeding. For example, even the loss or corruption
+ of the checkpoint file, would not affect things greatly.
+
+ Nevertheless flag files are largely superfluous in modern *Ruffus*.
diff --git a/doc/tutorials/new_tutorial/checkpointing_code.rst b/doc/tutorials/new_tutorial/checkpointing_code.rst
new file mode 100644
index 0000000..27632e8
--- /dev/null
+++ b/doc/tutorials/new_tutorial/checkpointing_code.rst
@@ -0,0 +1,23 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.checkpointing.code:
+
+#################################################################################################################
+|new_manual.checkpointing.chapter_num|: Python Code for Checkpointing: Interrupted Pipelines and Exceptions
+#################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`Back to |new_manual.checkpointing.chapter_num|: Interrupted Pipelines and Exceptions <new_manual.checkpointing>`
+
+
+************************************************************************
+Code for .:ref:`suffix() <decorators.suffix>` example
+************************************************************************
+ .. code-block:: python
+
+ from ruffus import *
+
+
diff --git a/doc/tutorials/new_tutorial/combinatorics.rst b/doc/tutorials/new_tutorial/combinatorics.rst
new file mode 100644
index 0000000..3e89ca1
--- /dev/null
+++ b/doc/tutorials/new_tutorial/combinatorics.rst
@@ -0,0 +1,442 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: combinatorics; Tutorial
+
+.. _new_manual.combinatorics:
+
+##################################################################################################################################################################################################################################################
+|new_manual.combinatorics.chapter_num|: :ref:`@combinations<decorators.combinations>`, :ref:`@permutations<decorators.permutations>` and all versus all :ref:`@product<decorators.product>`
+##################################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@product <decorators.product>`
+ * :ref:`formatter() <decorators.formatter>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.combinatorics.code`
+
+
+**************************************
+Overview
+**************************************
+
+ A surprising number of computational problems involve some sort of all versus all calculations.
+ Previously, this would have required all the parameters to be supplied using a custom function
+ on the fly with :ref:`@files<decorators.files_on_the_fly>`.
+
+ From version 2.4, *Ruffus* supports :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`,
+ :ref:`@combinations <decorators.combinations>`, :ref:`@permutations <decorators.permutations>`,
+ :ref:`@product <decorators.product>`.
+
+ These provide as far as possible all the functionality of the four combinatorics iterators
+ from the standard python `itertools <http://docs.python.org/2/library/itertools.html>`__
+ functions of the same name.
+
+***************************************************************************
+Generating output with :ref:`formatter()<decorators.formatter>`
+***************************************************************************
+
+ String replacement always takes place via :ref:`formatter()<decorators.formatter>`. Unfortunately,
+ the other *Ruffus* workhorses of :ref:`regex()<decorators.regex>` and :ref:`suffix()<decorators.suffix>`
+ do not have sufficient syntactic flexibility.
+
+ Each combinatorics decorator deals with multiple sets of inputs whether this might be:
+
+ * a self-self comparison (such as :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`,
+ :ref:`@combinations <decorators.combinations>`, :ref:`@permutations <decorators.permutations>`) or,
+ * a self-other comparison (:ref:`@product <decorators.product>`)
+
+ The replacement strings thus require an extra level of indirection to refer to
+ parsed components.
+
+ #. The first level refers to which *set* of inputs.
+ #. The second level refers to which input file in any particular *set* of inputs.
+
+
+ For example, if the *inputs* are **[A1,A2],[B1,B2],[C1,C2] vs [P1,P2],[Q1,Q2],[R1,R2] vs [X1,X2],[Y1,Y2],[Z1,Z2]**,
+ then ``'{basename[2][0]}'`` is the `basename <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ for
+
+ * the third set of inputs (**X,Y,Z**) and
+ * the first file name string in each **Input** of that set (**X1, Y1, Z1**)
+
+
+
+.. _new_manual.product:
+
+***************************************************************************
+All vs all comparisons with :ref:`@product <decorators.product>`
+***************************************************************************
+
+ :ref:`@product <decorators.product>` generates the Cartesian **product** between sets of input files,
+ i.e. all vs all comparisons.
+
+ The effect is analogous to a nested for loop.
+
+ :ref:`@product <decorators.product>` can be useful, for example, in bioinformatics for finding
+ the corresponding genes (orthologues) for a set of proteins in multiple species.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import product
+ >>> # product('ABC', 'XYZ') --> AX AY AZ BX BY BZ CX CY CZ
+ >>> [ "".join(a) for a in product('ABC', 'XYZ')]
+ ['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']
+
+
+
+ This example Calculates the **@product** of **A,B** and **P,Q** and **X,Y** files
+
+ .. code-block:: python
+ :emphasize-lines: 4,17,19,22,25,27,28,29,30,32,34,35,36
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # Three sets of initial files
+ @originate([ 'a.start', 'b.start'])
+ def create_initial_files_ab(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ 'p.start', 'q.start'])
+ def create_initial_files_pq(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ ['x.1_start', 'x.2_start'],
+ ['y.1_start', 'y.2_start'] ])
+ def create_initial_files_xy(output_file):
+ with open(output_file, "w") as oo: pass
+
+ # @product
+ @product( create_initial_files_ab, # Input
+ formatter("(.start)$"), # match input file set # 1
+
+ create_initial_files_pq, # Input
+ formatter("(.start)$"), # match input file set # 2
+
+ create_initial_files_xy, # Input
+ formatter("(.start)$"), # match input file set # 3
+
+ "{path[0][0]}/" # Output Replacement string
+ "{basename[0][0]}_vs_" #
+ "{basename[1][0]}_vs_" #
+ "{basename[2][0]}.product", #
+
+ "{path[0][0]}", # Extra parameter: path for 1st set of files, 1st file name
+
+ ["{basename[0][0]}", # Extra parameter: basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "# basenames = ", " ".join(basenames)
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter, "\n"
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6,10,14,18,22,26,30
+
+ >>> pipeline_run(verbose=0)
+
+ # basenames = a p x
+ input_parameter = ('a.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_x.product
+
+ # basenames = a p y
+ input_parameter = ('a.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_y.product
+
+ # basenames = a q x
+ input_parameter = ('a.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_x.product
+
+ # basenames = a q y
+ input_parameter = ('a.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_y.product
+
+ # basenames = b p x
+ input_parameter = ('b.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_x.product
+
+ # basenames = b p y
+ input_parameter = ('b.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_y.product
+
+ # basenames = b q x
+ input_parameter = ('b.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_x.product
+
+ # basenames = b q y
+ input_parameter = ('b.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_y.product
+
+
+.. _new_manual.permutations:
+
+******************************************************************************************************************************************************
+Permute all k-tuple orderings of inputs without repeats using :ref:`@permutations <decorators.permutations>`
+******************************************************************************************************************************************************
+
+ Generates the **permutations** for all the elements of a set of **Input** (e.g. **A B C D**),
+ * r-length tuples of *input* elements
+ * excluding repeated elements (**A A**)
+ * and order of the tuples is significant (both **A B** and **B A**).
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import permutations
+ >>> # permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+ >>> [ "".join(a) for a in permutations("ABCD", 2)]
+ ['AB', 'AC', 'AD', 'BA', 'BC', 'BD', 'CA', 'CB', 'CD', 'DA', 'DB', 'DC']
+
+ This following example calculates the **@permutations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @permutations
+ @permutations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.permutations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ ])
+ def permutations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+
+ A - B
+ A - C
+ A - D
+ B - A
+ B - C
+ B - D
+ C - A
+ C - B
+ C - D
+ D - A
+ D - B
+ D - C
+
+.. _new_manual.combinations:
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Select unordered k-tuples within inputs excluding repeated elements using :ref:`@combinations <decorators.combinations>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ Generates the **combinations** for all the elements of a set of **Input** (e.g. **A B C D**),
+ * r-length tuples of *input* elements
+ * without repeated elements (**A A**)
+ * where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+ :ref:`@combinations <decorators.combinations>` can be useful, for example, in calculating a transition probability matrix
+ for a set of states. The diagonals are meaningless "self-self" transitions which are excluded.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations
+ >>> # combinations('ABCD', 3) --> ABC ABD ACD BCD
+ >>> [ "".join(a) for a in combinations("ABCD", 3)]
+ ['ABC', 'ABD', 'ACD', 'BCD']
+
+ This example calculates the **@combinations** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations
+ @combinations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 3 at a time
+ 3,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}_vs_"
+ "{basename[2][1]}.combinations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def combinations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - B - C
+ A - B - D
+ A - C - D
+ B - C - D
+
+.. _new_manual.combinations_with_replacement:
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Select unordered k-tuples within inputs *including* repeated elements with :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ Generates the **combinations_with_replacement** for all the elements of a set of **Input** (e.g. **A B C D**),
+ * r-length tuples of *input* elements
+ * including repeated elements (**A A**)
+ * where order of the tuples is irrelevant (either **A B** or **B A**, not both).
+
+
+ :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>` can be useful,
+ for example, in bioinformatics for finding evolutionary relationships between genetic elements such as proteins
+ and genes. Self-self comparisons can be used a baseline for scaling similarity scores.
+
+ .. code-block:: pycon
+ :emphasize-lines: 2
+
+ >>> from itertools import combinations_with_replacement
+ >>> # combinations_with_replacement('ABCD', 2) --> AA AB AC AD BB BC BD CC CD DD
+ >>> [ "".join(a) for a in combinations_with_replacement('ABCD', 2)]
+ ['AA', 'AB', 'AC', 'AD', 'BB', 'BC', 'BD', 'CC', 'CD', 'DD']
+
+ This example calculates the **@combinations_with_replacement** of **A,B,C,D** files
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations_with_replacement
+ @combinations_with_replacement(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.combinations_with_replacement",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2rd
+ ])
+ def combinations_with_replacement_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - A
+ A - B
+ A - C
+ A - D
+ B - B
+ B - C
+ B - D
+ C - C
+ C - D
+ D - D
+
diff --git a/doc/tutorials/new_tutorial/combinatorics_code.rst b/doc/tutorials/new_tutorial/combinatorics_code.rst
new file mode 100644
index 0000000..3113601
--- /dev/null
+++ b/doc/tutorials/new_tutorial/combinatorics_code.rst
@@ -0,0 +1,308 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.combinatorics.code:
+
+############################################################################################################################################################################################################
+|new_manual.combinatorics.chapter_num|: Python Code for :ref:`@combinations<decorators.combinations>`, :ref:`@permutations<decorators.permutations>` and all versus all :ref:`@product<decorators.product>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+ * :ref:`@combinations <decorators.combinations>`
+ * :ref:`@permutations <decorators.permutations>`
+ * :ref:`@product <decorators.product>`
+ * Back to |new_manual.combinatorics.chapter_num|: :ref:`Preparing directories for output with @combinatorics() <new_manual.combinatorics>`
+
+***************************************************************************
+Example code for :ref:`@product <decorators.product>`
+***************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 4,17,19,22,25,27,28,29,30,32,34,35,36
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # Three sets of initial files
+ @originate([ 'a.start', 'b.start'])
+ def create_initial_files_ab(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ 'p.start', 'q.start'])
+ def create_initial_files_pq(output_file):
+ with open(output_file, "w") as oo: pass
+
+ @originate([ ['x.1_start', 'x.2_start'],
+ ['y.1_start', 'y.2_start'] ])
+ def create_initial_files_xy(output_file):
+ with open(output_file, "w") as oo: pass
+
+ # @product
+ @product( create_initial_files_ab, # Input
+ formatter("(.start)$"), # match input file set # 1
+
+ create_initial_files_pq, # Input
+ formatter("(.start)$"), # match input file set # 2
+
+ create_initial_files_xy, # Input
+ formatter("(.start)$"), # match input file set # 3
+
+ "{path[0][0]}/" # Output Replacement string
+ "{basename[0][0]}_vs_" #
+ "{basename[1][0]}_vs_" #
+ "{basename[2][0]}.product", #
+
+ "{path[0][0]}", # Extra parameter: path for 1st set of files, 1st file name
+
+ ["{basename[0][0]}", # Extra parameter: basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def product_task(input_file, output_parameter, shared_path, basenames):
+ print "# basenames = ", " ".join(basenames)
+ print "input_parameter = ", input_file
+ print "output_parameter = ", output_parameter, "\n"
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+ :emphasize-lines: 2,6,10,14,18,22,26,30
+
+ >>> pipeline_run(verbose=0)
+
+ # basenames = a p x
+ input_parameter = ('a.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_x.product
+
+ # basenames = a p y
+ input_parameter = ('a.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_p_vs_y.product
+
+ # basenames = a q x
+ input_parameter = ('a.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_x.product
+
+ # basenames = a q y
+ input_parameter = ('a.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/a_vs_q_vs_y.product
+
+ # basenames = b p x
+ input_parameter = ('b.start', 'p.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_x.product
+
+ # basenames = b p y
+ input_parameter = ('b.start', 'p.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_p_vs_y.product
+
+ # basenames = b q x
+ input_parameter = ('b.start', 'q.start', 'x.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_x.product
+
+ # basenames = b q y
+ input_parameter = ('b.start', 'q.start', 'y.start')
+ output_parameter = /home/lg/temp/b_vs_q_vs_y.product
+
+******************************************************************************************************************************************************
+Example code for :ref:`@permutations <decorators.permutations>`
+******************************************************************************************************************************************************
+
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @permutations
+ @permutations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.permutations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ ])
+ def permutations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+
+ A - B
+ A - C
+ A - D
+ B - A
+ B - C
+ B - D
+ C - A
+ C - B
+ C - D
+ D - A
+ D - B
+ D - C
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Example code for :ref:`@combinations <decorators.combinations>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations
+ @combinations(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 3 at a time
+ 3,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}_vs_"
+ "{basename[2][1]}.combinations",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2nd
+ "{basename[2][0]}", # 3rd
+ ])
+ def combinations_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - B - C
+ A - B - D
+ A - C - D
+ B - C - D
+
+
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+Example code for :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 13,17,20,25,28-30
+
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ # initial file pairs
+ @originate([ ['A.1_start', 'A.2_start'],
+ ['B.1_start', 'B.2_start'],
+ ['C.1_start', 'C.2_start'],
+ ['D.1_start', 'D.2_start']])
+ def create_initial_files_ABCD(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ # @combinations_with_replacement
+ @combinations_with_replacement(create_initial_files_ABCD, # Input
+ formatter(), # match input files
+
+ # tuple of 2 at a time
+ 2,
+
+ # Output Replacement string
+ "{path[0][0]}/"
+ "{basename[0][1]}_vs_"
+ "{basename[1][1]}.combinations_with_replacement",
+
+ # Extra parameter: path for 1st set of files, 1st file name
+ "{path[0][0]}",
+
+ # Extra parameter
+ ["{basename[0][0]}", # basename for 1st set of files, 1st file name
+ "{basename[1][0]}", # 2rd
+ ])
+ def combinations_with_replacement_task(input_file, output_parameter, shared_path, basenames):
+ print " - ".join(basenames)
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This results in:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose=0)
+ A - A
+ A - B
+ A - C
+ A - D
+ B - B
+ B - C
+ B - D
+ C - C
+ C - D
+ D - D
+
diff --git a/doc/tutorials/new_tutorial/command_line.rst b/doc/tutorials/new_tutorial/command_line.rst
new file mode 100644
index 0000000..e2a6419
--- /dev/null
+++ b/doc/tutorials/new_tutorial/command_line.rst
@@ -0,0 +1,352 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: command line; Tutorial
+
+.. _new_manual.cmdline:
+
+######################################################################################################
+|new_manual.cmdline.chapter_num|: Running *Ruffus* from the command line with ruffus.cmdline
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual table of Contents <new_manual.table_of_contents>`
+
+
+We find that much of our *Ruffus* pipeline code is built on the same template and this is generally
+a good place to start developing a new pipeline.
+
+From version 2.4, *Ruffus* includes an optional ``Ruffus.cmdline`` module that provides
+support for a set of common command line arguments. This makes writing *Ruffus* pipelines much more pleasant.
+
+
+.. _new_manual.cmdline.get_argparse:
+
+.. _new_manual.cmdline.run:
+
+.. _new_manual.cmdline.setup_logging:
+
+************************************************************************************************************
+Template for `argparse <http://docs.python.org/2.7/library/argparse.html>`__
+************************************************************************************************************
+ All you need to do is copy these 6 lines
+
+
+ .. code-block:: python
+ :emphasize-lines: 5, 13
+
+ import ruffus.cmdline as cmdline
+
+ parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')
+
+ # <<<---- add your own command line options like --input_file here
+ # parser.add_argument("--input_file")
+
+ options = parser.parse_args()
+
+ # standard python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ # <<<---- pipelined functions go here
+
+ cmdline.run (options)
+
+ You are recommended to use the standard `argparse <http://docs.python.org/2.7/library/argparse.html>`__ module
+ but the deprecated `optparse <http://docs.python.org/2.7/library/optparse.html>`__ module works as well. (See :ref:`below <code_template.optparse>` for the template)
+
+
+******************************************************
+Command Line Arguments
+******************************************************
+
+ ``Ruffus.cmdline`` by default provides these predefined options:
+
+ .. code-block:: bash
+ :emphasize-lines: 5,12,15,22
+
+ -v, --verbose
+ --version
+ -L, --log_file
+
+ # tasks
+ -T, --target_tasks
+ --forced_tasks
+ -j, --jobs
+ --use_threads
+
+
+ # printout
+ -n, --just_print
+
+ # flow chart
+ --flowchart
+ --key_legend_in_graph
+ --draw_graph_horizontally
+ --flowchart_format
+
+
+ # check sum
+ --touch_files_only
+ --checksum_file_name
+ --recreate_database
+
+
+******************************************************
+1) Logging
+******************************************************
+
+ The script provides for logging both to the command line:
+
+ .. code-block:: bash
+
+ myscript -v
+ myscript --verbose
+
+ and an optional log file:
+
+ .. code-block:: bash
+
+ # keep tabs on yourself
+ myscript --log_file /var/log/secret.logbook
+
+ Logging is ignored if neither ``--verbose`` or ``--log_file`` are specified on the command line
+
+ ``Ruffus.cmdline`` automatically allows you to write to a shared log file via a proxy from multiple processes.
+ However, you do need to use ``logging_mutex`` for the log files to be synchronised properly across different jobs:
+
+ .. code-block:: python
+
+ with logging_mutex:
+
+ logger_proxy.info("Look Ma. No hands")
+
+ Logging is set up so that you can write
+
+
+=================================
+ A) Only to the log file:
+=================================
+
+ .. code-block:: python
+
+ logger.info("A message")
+
+=================================
+ B) Only to the display:
+=================================
+
+ .. code-block:: python
+
+ logger.debug("A message")
+
+
+.. _new_manual.cmdline.MESSAGE:
+
+======================================
+ C) To both simultaneously:
+======================================
+
+ .. code-block:: python
+
+ from ruffus.cmdline import MESSAGE
+
+ logger.log(MESSAGE, "A message")
+
+
+******************************************************
+2) Tracing pipeline progress
+******************************************************
+
+ This is extremely useful for understanding what is happening with your pipeline, what tasks and which
+ jobs are up-to-date etc.
+
+ See :ref:`new_manual.pipeline_printout`
+
+ To trace the pipeline, call script with the following options
+
+ .. code-block:: bash
+
+ # well-mannered, reserved
+ myscript --just_print
+ myscript -n
+
+ or
+
+ # extremely loquacious
+ myscript --just_print --verbose 5
+ myscript -n -v5
+
+ Increasing levels of verbosity (``--verbose`` to ``--verbose 5``) provide more detailed output
+
+
+
+******************************************************
+3) Printing a flowchart
+******************************************************
+
+ This is the subject of :ref:`new_manual.pipeline_printout_graph`.
+
+ Flowcharts can be specified using the following option:
+
+ .. code-block:: bash
+
+ myscript --flowchart xxxchart.svg
+
+ The extension of the flowchart file indicates what format the flowchart should take,
+ for example, ``svg``, ``jpg`` etc.
+
+ Override with ``--flowchart_format``
+
+******************************************************
+4) Running in parallel on multiple processors
+******************************************************
+
+
+ Optionally specify the number of parallel strands of execution and which is the last *target* task to run.
+ The pipeline will run starting from any out-of-date tasks which precede the *target* and proceed no further
+ beyond the *target*.
+
+ .. code-block:: bash
+
+ myscript --jobs 15 --target_tasks "final_task"
+ myscript -j 15
+
+
+
+
+******************************************************************************************************
+5) Setup checkpointing so that *Ruffus* knows which files are out of date
+******************************************************************************************************
+
+ The :ref:`checkpoint file <new_manual.checkpointing>` uses to the value set in the
+ environment (``DEFAULT_RUFFUS_HISTORY_FILE``).
+
+ If this is not set, it will default to ``.ruffus_history.sqlite`` in the current working directory.
+
+ Either can be changed on the command line:
+
+ .. code-block:: bash
+
+ myscript --checksum_file_name mychecksum.sqlite
+
+
+============================================================================================================================================================
+Recreating checkpoints
+============================================================================================================================================================
+
+ Create or update the checkpoint file so that all existing files in completed jobs appear up to date
+
+ Will stop sensibly if current state is incomplete or inconsistent
+
+ ::
+
+ myscript --recreate_database
+
+============================================================================================================================================================
+Touch files
+============================================================================================================================================================
+
+ As far as possible, create empty files with the correct timestamp to make the pipeline appear up to date.
+
+ .. code-block:: bash
+
+ myscript --touch_files_only
+
+
+******************************************************************************************************
+6) Skipping specified options
+******************************************************************************************************
+ Note that particular options can be skipped (not added to the command line), if they conflict with your own options, for example:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ # see below for how to use get_argparse
+ parser = cmdline.get_argparse( description='WHAT DOES THIS PIPELINE DO?',
+ # Exclude the following options: --log_file --key_legend_in_graph
+ ignored_args = ["log_file", "key_legend_in_graph"])
+
+
+******************************************************************************************************
+7) Specifying verbosity and abbreviating long paths
+******************************************************************************************************
+
+ The verbosity can be specified on the command line
+
+ .. code-block:: bash
+
+ myscript --verbose 5
+
+ # verbosity of 5 + 1 = 6
+ myscript --verbose 5 --verbose
+
+ # verbosity reset to 2
+ myscript --verbose 5 --verbose --verbose 2
+
+ If the printed paths are too long, and need to be abbreviated, or alternatively, if you want see the full absolute paths of your input and output parameters,
+ you can specify an extension to the verbosity. See the manual discussion of :ref:`verbose_abbreviated_path <new_manual.pipeline_printout.verbose_abbreviated_path>` for
+ more details. This is specified as ``--verbose VERBOSITY:VERBOSE_ABBREVIATED_PATH``. (No spaces!)
+
+ For example:
+
+ .. code-block:: bash
+ :emphasize-lines: 4,7
+
+ # verbosity of 4
+ myscript.py --verbose 4
+
+ # display three levels of nested directories
+ myscript.py --verbose 4:3
+
+ # restrict input and output parameters to 60 letters
+ myscript.py --verbose 4:-60
+
+
+******************************************************************************************************
+8) Displaying the version
+******************************************************************************************************
+ Note that the version for your script will default to ``"%(prog)s 1.0"`` unless specified:
+
+ .. code-block:: python
+
+ parser = cmdline.get_argparse( description='WHAT DOES THIS PIPELINE DO?',
+ version = "my_programme.py v. 2.23")
+
+
+
+
+
+
+
+.. _code_template.optparse:
+
+************************************************************************************************************
+Template for `optparse <http://docs.python.org/2.7/library/optparse.html>`__
+************************************************************************************************************
+
+ deprecated since python 2.7
+
+ .. code-block:: python
+ :emphasize-lines: 8,16
+
+ #
+ # Using optparse (new in python v 2.6)
+ #
+ from ruffus import *
+
+ parser = cmdline.get_optgparse(version="%prog 1.0", usage = "\n\n %prog [options]")
+
+ # <<<---- add your own command line options like --input_file here
+ # parser.add_option("-i", "--input_file", dest="input_file", help="Input file")
+
+ (options, remaining_args) = parser.parse_args()
+
+ # logger which can be passed to ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging ("this_program", options.log_file, options.verbose)
+
+ # <<<---- pipelined functions go here
+
+ cmdline.run (options)
+
diff --git a/doc/tutorials/new_tutorial/decorators_compendium.rst b/doc/tutorials/new_tutorial/decorators_compendium.rst
new file mode 100644
index 0000000..f1b2862
--- /dev/null
+++ b/doc/tutorials/new_tutorial/decorators_compendium.rst
@@ -0,0 +1,154 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: decorators_compendium; Tutorial
+
+.. _new_manual.decorators_compendium:
+
+#####################################################################################################################
+|new_manual.decorators_compendium.chapter_num|: Pipeline topologies and a compendium of *Ruffus* decorators
+#####################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`decorators <decorators>`
+
+
+***************************************
+Overview
+***************************************
+
+ Computational pipelines transform your data in stages until the final result is produced.
+
+ You can visualise your pipeline data flowing like water down a system of pipes.
+ *Ruffus* has many ways of joining up your pipes to create different topologies.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **Write down the file names of the data as it flows across your pipeline.**
+ * **Draw lines between the file names to show how they should be connected together.**
+
+
+******************************************************************************
+:ref:`@transform <decorators.transform>`
+******************************************************************************
+
+
+ So far, our data files have been flowing through our pipelines independently in lockstep.
+
+ .. image:: ../../images/bestiary_transform.png
+ :scale: 50
+
+ If we drew a graph of the data files moving through the pipeline, all of our flowcharts would look like something like this.
+
+ The :ref:`@transform <decorators.transform>` decorator connects up your data files in 1 to 1 operations, ensuring that for every **Input**, a corresponding **Output** is
+ generated, ready to got into the next pipeline stage. If we start with three sets of starting data, we would end up with three final sets of results.
+
+******************************************************************************
+A bestiary of *Ruffus* decorators
+******************************************************************************
+
+ Very often, we would like to transform our data in more complex ways, this is where other *Ruffus* decorators come in.
+
+ .. image:: ../../images/bestiary_decorators.png
+ :scale: 50
+
+******************************************************************************
+:ref:`@originate <decorators.originate>`
+******************************************************************************
+
+ * Introduced in |new_manual.transform_in_parallel.chapter_num| :ref:`More on @transform-ing data and @originate <new_manual.transform_in_parallel>`,
+ :ref:`@originate <decorators.originate>` generates **Output** files from scratch without the benefits of any **Input** files.
+
+******************************************************************************
+:ref:`@merge <decorators.merge>`
+******************************************************************************
+ * A **many to one** operator.
+ * The last decorator at the far right to the figure, :ref:`@merge <decorators.merge>` merges multiple **Input** into one **Output**.
+
+******************************************************************************
+:ref:`@split <decorators.split>`
+******************************************************************************
+ * A **one to many** operator,
+ * :ref:`@split <decorators.split>` is the evil twin of :ref:`@merge <decorators.merge>`. It takes a single set of **Input** and splits them into multiple smaller pieces.
+ * The best part of :ref:`@split <decorators.split>` is that we don't necessarily have to decide ahead of time *how many* smaller pieces it should produce. If we have encounter a larger file,
+ we might need to split it up into more fragments for greater parallelism.
+ * Since :ref:`@split <decorators.split>` is a **one to many** operator, if you pass it **many** inputs (e.g. via :ref:`@transform <decorators.transform>`, it performs an implicit :ref:`@merge <decorators.merge>` step to make one
+ set of **Input** that you can redistribute into a different number of pieces. If you are looking to split *each* **Input** into further smaller fragments, then you
+ need :ref:`@subdivide <decorators.subdivide>`
+
+******************************************************************************
+:ref:`@subdivide <decorators.subdivide>`
+******************************************************************************
+ * A **many to even more** operator.
+ * It takes each of multiple **Input**, and further subdivides them.
+ * Uses :ref:`suffix() <decorators.suffix>`, :ref:`formatter() <decorators.formatter>` or :ref:`regex() <decorators.regex>` to generate **Output** names from its **Input** files but like :ref:`@split <decorators.split>`, we don't have to decide ahead of time
+ *how many* smaller pieces each **Input** should be further divided into. For example, a large **Input** files might be subdivided into 7 pieces while the next job might,
+ however, split its **Input** into just 4 pieces.
+
+******************************************************************************
+:ref:`@collate <decorators.collate>`
+******************************************************************************
+ * A **many to fewer** operator.
+ * :ref:`@collate <decorators.collate>` is the opposite twin of ``subdivide``: it takes multiple **Output** and groups or collates them into bundles of **Output**.
+ * :ref:`@collate <decorators.collate>` uses :ref:`formatter() <decorators.formatter>` or :ref:`regex() <decorators.regex>` to generate **Output** names.
+ * All **Input** files which map to the same **Output** are grouped together into one job (one task function call) which
+ produces one **Output**.
+
+******************************************************************************
+Combinatorics
+******************************************************************************
+
+ More rarely, we need to generate a set of **Output** based on a combination or permutation or product of the **Input**.
+
+ For example, in bioinformatics, we might need to look for all instances of a set of genes in the genomes of a different number of species.
+ In other words, we need to find the :ref:`@product <decorators.product>` of XXX genes x YYY species.
+
+ *Ruffus* provides decorators modelled on the "Combinatoric generators" in the Standard Python `itertools <http://docs.python.org/2/library/itertools.html>`_ library.
+
+ To use combinatoric decorators, you need to explicitly include them from *Ruffus*:
+
+ .. code-block:: python
+
+
+ import ruffus
+ from ruffus import *
+ from ruffus.combinatorics import *
+
+ .. image:: ../../images/bestiary_combinatorics.png
+ :scale: 50
+
+******************************************************************************
+:ref:`@product <decorators.product>`
+******************************************************************************
+ * Given several sets of **Input**, it generates all versus all **Output**. For example, if there are four sets of **Input** files, :ref:`@product <decorators.product>` will generate ``WWW x XXX x YYY x ZZZ`` **Output**.
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in
+ all **Input** sets. In the above example, this allows the generation of ``WWW x XXX x YYY x ZZZ`` unique names.
+
+******************************************************************************
+:ref:`@combinations <decorators.combinations>`
+******************************************************************************
+ * Given one set of **Input**, it generates the combinations of r-length tuples among them.
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in all **Input** sets.
+ * For example, given **Input** called ``A``, ``B`` and ``C``, it will generate: ``A-B``, ``A-C``, ``B-C``
+ * The order of **Input** items is ignored so either ``A-B`` or ``B-A`` will be included, not both
+ * Self-vs-self combinations (``A-A``) are excluded.
+
+************************************************************************************************************************************************************
+:ref:`@combinations_with_replacement <decorators.combinations_with_replacement>`
+************************************************************************************************************************************************************
+ * Given one set of **Input**, it generates the combinations of r-length tuples among them but includes self-vs-self conbinations.
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in all **Input** sets.
+ * For example, given **Input** called ``A``, ``B`` and ``C``, it will generate: ``A-A``, ``A-B``, ``A-C``, ``B-B``, ``B-C``, ``C-C``
+
+******************************************************************************
+:ref:`@permutations <decorators.permutations>`
+******************************************************************************
+ * Given one set of **Input**, it generates the permutations of r-length tuples among them. This excludes self-vs-self combinations but includes all orderings (``A-B`` and ``B-A``).
+ * Uses :ref:`formatter <decorators.transform>` to generate unique **Output** names from components parsed from *any* parts of *any* specified files in all **Input** sets.
+ * For example, given **Input** called ``A``, ``B`` and ``C``, it will generate: ``A-A``, ``A-B``, ``A-C``, ``B-A``, ``B-C``, ``C-A``, ``C-B``
+
diff --git a/doc/tutorials/new_tutorial/dependencies.rst b/doc/tutorials/new_tutorial/dependencies.rst
new file mode 100644
index 0000000..66600e4
--- /dev/null
+++ b/doc/tutorials/new_tutorial/dependencies.rst
@@ -0,0 +1,110 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: Checking dependencies; Tutorial
+
+.. _new_manual.dependencies:
+
+##################################################################################
+|new_manual.dependencies.chapter_num|: How dependency is checked
+##################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+
+**************************************
+Overview
+**************************************
+
+ How does *Ruffus* decide how to run your pipeline?
+
+ * In which order should pipelined functions be called?
+
+ * Which parts of the pipeline are up-to-date and do not need to be rerun?
+
+
+=============================================
+Running all out-of-date tasks and dependents
+=============================================
+
+ .. image:: ../../images/manual_dependencies_flowchart_intro.png
+ :scale: 50
+
+
+ By default, *Ruffus* will
+
+ * build a flow chart (dependency tree) of pipelined tasks (functions)
+ * start from the most ancestral tasks with the fewest dependencies (``task1`` and ``task4`` in the flowchart above).
+ * walk up the tree to find the first incomplete / out-of-date tasks (i.e. ``task3`` and ``task5``.
+ * start running from there
+
+ All down-stream (dependent) tasks will be re-run anyway, so we don't have to test
+ whether they are up-to-date or not.
+
+ .. _new_manual.dependencies.checking_multiple_times:
+
+ .. note::
+
+ This means that *Ruffus* *may* ask any task if their jobs are out of date more than once:
+
+ * once when deciding which parts of the pipeline have to be run
+ * once just before executing the task.
+
+ *Ruffus* tries to be clever / efficient, and does the minimal amount of querying.
+
+
+.. _new_manual.dependencies.forced_reruns:
+
+=======================================
+Forced Reruns
+=======================================
+ Even if a pipeline stage appears to be up to date,
+ you can always force the pipeline to include from one or more task functions.
+
+ This is particularly useful, for example, if the pipeline data hasn't changed but
+ the analysis or computional code has.
+
+ ::
+
+ pipeline_run(forcedtorun_tasks = [up_to_date_task1])
+
+
+ will run all tasks from ``up_to_date_task1`` to ``final_task``
+
+
+ Both the "target" and the "forced" lists can include as many tasks as you wish. All dependencies
+ are still carried out and out-of-date jobs rerun.
+
+.. _new_manual.dependencies.minimal_reruns:
+
+=======================================
+Esoteric option: Minimal Reruns
+=======================================
+
+ In the above example, if we were to delete the results of ``up_to_date_task1``, *Ruffus*
+ would rerun ``up_to_date_task1``, ``up_to_date_task2`` and ``task3``.
+
+ However, you might argue that so long as ``up_to_date_task2`` is up-to-date, and it
+ is the only necessary prerequisite for ``task3``, we should not be concerned about
+ ``up_to_date_task1``.
+
+ This is enabled with:
+
+ .. code-block:: python
+
+ pipeline_run([task6], gnu_make_maximal_rebuild_mode = False)
+
+ This option walks down the dependency tree and proceeds no further when it encounters
+ an up-to-date task (``up_to_date_task2``) whatever the state of what lies beyond it.
+
+ This rather dangerous option is useful if you don't want to keep all the intermediate
+ files/results from upstream tasks. The pipeline will only not involve any incomplete
+ tasks which precede an up-to-date result.
+
+ This is seldom what you intend, and you should always check that the appropriate stages
+ of the pipeline are executed in the flowchart output.
+
+
diff --git a/doc/tutorials/new_tutorial/deprecated_files.rst b/doc/tutorials/new_tutorial/deprecated_files.rst
new file mode 100644
index 0000000..740d3b8
--- /dev/null
+++ b/doc/tutorials/new_tutorial/deprecated_files.rst
@@ -0,0 +1,238 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: deprecated @files; Tutorial
+
+.. _new_manual.deprecated_files:
+
+#####################################################################################################################
+|new_manual.deprecated_files.chapter_num|: **@files**: Deprecated syntax
+#####################################################################################################################
+
+.. warning ::
+
+ -
+
+ **This is deprecated syntax**
+
+ **which is no longer supported and**
+
+ **should NOT be used in new code.**
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`decorators <decorators>`
+ * :ref:`@files <decorators.files>` syntax in detail
+
+
+***************************************
+Overview
+***************************************
+
+
+ | The python functions which do the actual work of each stage or
+ :term:`task` of a *Ruffus* pipeline are written by you.
+ | The role of *Ruffus* is to make sure these functions are called in the right order,
+ with the right parameters, running in parallel using multiprocessing if desired.
+
+ The easiest way to specify parameters to *Ruffus* :term:`task` functions is to use
+ the :ref:`@files <decorators.files>` decorator.
+
+ .. index::
+ pair: @files; Manual
+
+
+***************************************
+**@files**
+***************************************
+
+ Running this code:
+
+ ::
+
+ from ruffus import *
+
+ @files('a.1', ['a.2', 'b.2'], 'A file')
+ def single_job_io_task(infile, outfiles, text):
+ for o in outfiles: open(o, "w")
+
+ # prepare input file
+ open('a.1', "w")
+
+ pipeline_run()
+
+
+ Is equivalent to calling:
+ ::
+
+ single_job_io_task('a.1', ['a.2', 'b.2'], 'A file')
+
+
+ And produces:
+ ::
+
+ >>> pipeline_run()
+ Job = [a.1 -> [a.2, b.2], A file] completed
+ Completed Task = single_job_io_task
+
+ *Ruffus* will automatically check if your task is up to date. The second time :ref:`pipeline_run() <pipeline_functions.pipeline_run>`
+ is called, nothing will happen. But if you update ``a.1``, the task will rerun:
+
+ ::
+
+ >>> open('a.1', "w")
+ >>> pipeline_run()
+ Job = [a.1 -> [a.2, b.2], A file] completed
+ Completed Task = single_job_io_task
+
+ See :ref:`chapter 2 <new_manual.skip_up_to_date.rules>` for a more in-depth discussion of how *Ruffus*
+ decides which parts of the pipeline are complete and up-to-date.
+
+
+.. index::
+ pair: @files; in parallel
+
+.. _new_manual.files.parallel:
+
+******************************************************************************
+Running the same code on different parameters in parallel
+******************************************************************************
+
+ Your pipeline may require the same function to be called multiple times on independent parameters.
+ In which case, you can supply all the parameters to @files, each will be sent to separate jobs that
+ may run in parallel if necessary. *Ruffus* will check if each separate :term:`job` is up-to-date using
+ the *inputs* and *outputs* (first two) parameters (See the :ref:`new_manual.only_rerun_out_of_date` ).
+
+
+ For example, if a sequence
+ (e.g. a list or tuple) of 5 parameters are passed to **@files**, that indicates
+ there will also be 5 separate jobs:
+
+ ::
+
+ from ruffus import *
+ parameters = [
+ [ 'job1.file' ], # 1st job
+ [ 'job2.file', 4 ], # 2st job
+ [ 'job3.file', [3, 2] ], # 3st job
+ [ 67, [13, 'job4.file'] ], # 4st job
+ [ 'job5.file' ], # 5st job
+ ]
+ @files(parameters)
+ def task_file(*params):
+ ""
+
+ | *Ruffus* creates as many jobs as there are elements in ``parameters``.
+ | In turn, each of these elements consist of series of parameters which will be
+ passed to each separate job.
+
+ Thus the above code is equivalent to calling:
+
+ ::
+
+ task_file('job1.file')
+ task_file('job2.file', 4)
+ task_file('job3.file', [3, 2])
+ task_file(67, [13, 'job4.file'])
+ task_file('job5.file')
+
+
+ What ``task_file()`` does with these parameters is up to you!
+
+ The only constraint on the parameters is that *Ruffus* will treat any first
+ parameter of each job as the *inputs* and any second as the *output*. Any
+ strings in the *inputs* or *output* parameters (including those nested in sequences)
+ will be treated as file names.
+
+ Thus, to pick the parameters out of one of the above jobs:
+
+ ::
+
+ task_file(67, [13, 'job4.file'])
+
+ | *inputs* == ``67``
+ | *outputs* == ``[13, 'job4.file']``
+ |
+ | The solitary output filename is ``job4.file``
+
+
+.. index::
+ pair: @files; check if up to date
+
+.. _new_manual.files.is_uptodate:
+.. _new_manual.files.example:
+
+=======================================
+Checking if jobs are up to date
+=======================================
+
+ | Usually we do not want to run all the stages in a pipeline but only where
+ the input data has changed or is no longer up to date.
+ | One easy way to do this is to check the modification times for files produced
+ at each stage of the pipeline.
+
+ | Let us first create our starting files ``a.1`` and ``b.1``
+ | We can then run the following pipeline function to create
+
+ * ``a.2`` from ``a.1`` and
+ * ``b.2`` from ``b.1``
+
+ ::
+
+ # create starting files
+ open("a.1", "w")
+ open("b.1", "w")
+
+
+ from ruffus import *
+ parameters = [
+ [ 'a.1', 'a.2', 'A file'], # 1st job
+ [ 'b.1', 'b.2', 'B file'], # 2nd job
+ ]
+
+ @files(parameters)
+ def parallel_io_task(infile, outfile, text):
+ # copy infile contents to outfile
+ infile_text = open(infile).read()
+ f = open(outfile, "w").write(infile_text + "\n" + text)
+
+ pipeline_run()
+
+
+ .. ???
+
+ This produces the following output:
+ ::
+
+ >>> pipeline_run()
+ Job = [a.1 -> a.2, A file] completed
+ Job = [b.1 -> b.2, B file] completed
+ Completed Task = parallel_io_task
+
+
+ | If you called :ref:`pipeline_run() <pipeline_functions.pipeline_run>` again, nothing would happen because the files are up to date:
+ | ``a.2`` is more recent than ``a.1`` and
+ | ``b.2`` is more recent than ``b.1``
+
+ However, if you subsequently modified ``a.1`` again:
+ ::
+
+ open("a.1", "w")
+ pipeline_run(verbose = 1)
+
+ you would see the following::
+
+ >>> pipeline_run([parallel_io_task])
+ Task = parallel_io_task
+ Job = ["a.1" -> "a.2", "A file"] completed
+ Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date
+ Completed Task = parallel_io_task
+
+ The 2nd job is up to date and will be skipped.
+
+
+
+
+
diff --git a/doc/tutorials/new_tutorial/deprecated_files_re.rst b/doc/tutorials/new_tutorial/deprecated_files_re.rst
new file mode 100644
index 0000000..93782c5
--- /dev/null
+++ b/doc/tutorials/new_tutorial/deprecated_files_re.rst
@@ -0,0 +1,145 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: deprecated @files_re; Tutorial
+
+.. _new_manual.deprecated_files_re:
+
+#####################################################################################################################
+|new_manual.deprecated_files_re.chapter_num|: **@files_re**: Deprecated `syntax using regular expressions`
+#####################################################################################################################
+
+.. warning ::
+
+ -
+
+ **This is deprecated syntax**
+
+ **which is no longer supported and**
+
+ **should NOT be used in new code.**
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`decorators <decorators>`
+ * :ref:`@files_re <decorators.files_re>` syntax in detail
+
+
+***************************************
+Overview
+***************************************
+
+
+
+ **@files_re** combines the functionality of @transform, @collate and @merge in
+ one overloaded decorator.
+
+ This is the reason why its use is discouraged. **@files_re** syntax is far too overloaded
+ and context-dependent to support its myriad of different functions.
+
+ The following documentation is provided to help maintain historical *Ruffus* usage.
+
+=======================================
+Transforming input and output filenames
+=======================================
+
+
+ For example, the following code takes files from
+ the previous pipeline task, and makes new output parameters with the ``.sums`` suffix
+ in place of the ``.chunks`` suffix:
+
+ ::
+
+ @transform(step_4_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+ def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ #
+ # calculate sums and sums of squares for all values in the input_file_name
+ # writing to output_file_name
+ ""
+
+ This can be written using @files_re equivalently:
+
+ ::
+
+ @files_re(step_4_split_numbers_into_chunks, r".chunks", r".sums")
+ def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ ""
+
+.. _new_manual.files_re.combine:
+.. index::
+ pair: combine; Manual
+
+=====================================================
+Collating many *inputs* into a single *output*
+=====================================================
+
+ Similarly, the following code collects **inputs**
+ from the same species in the same directory:
+
+ ::
+
+ @collate('*.animals', # inputs = all *.animal files
+ regex(r'mammals.([^.]+)'), # regular expression
+ r'\1/animals.in_my_zoo', # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
+ This can be written using @files_re equivalently using the :ref:`combine<decorators.combine>` indicator:
+
+ ::
+
+ @files_re('*.animals', # inputs = all *.animal files
+ r'mammals.([^.]+)', # regular expression
+ combine(r'\1/animals.in_my_zoo'), # single output file per species
+ r'\1' ) # species name
+ def capture_mammals(infiles, outfile, species):
+ # summarise all animals of this species
+ ""
+
+
+
+==============================================================================
+Generating *input* and *output* parameter using regular expresssions
+==============================================================================
+
+ The following code generates additional
+ *input* prerequisite file names which match the original *input* files.
+
+ We want each job of our ``analyse()`` function to get corresponding pairs
+ of ``xx.chunks`` and ``xx.red_indian`` files when
+
+ ``*.chunks`` are generated by the task function ``split_up_problem()`` and
+ ``*.red_indian`` are generated by the task function ``make_red_indians()``:
+
+ ::
+
+ @follows(make_red_indians)
+ @transform(split_up_problem, # starting set of *inputs*
+ regex(r"(.*).chunks"), # regular expression
+ inputs([r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results" # xx.results
+ )
+ def analyse(input_filenames, output_file_name):
+ "Do analysis here"
+
+
+ The equivalent code using @files_re looks very similar:
+
+ ::
+
+ @follows(make_red_indians)
+ @files_re( split_up_problem, # starting set of *inputs*
+ r"(.*).chunks", # regular expression
+ [r"\g<0>", # xx.chunks
+ r"\1.red_indian"]), # important.file
+ r"\1.results") # xx.results
+ def analyse(input_filenames, output_file_name):
+ "Do analysis here"
+
+
diff --git a/doc/tutorials/new_tutorial/exceptions.rst b/doc/tutorials/new_tutorial/exceptions.rst
new file mode 100644
index 0000000..8a3ed92
--- /dev/null
+++ b/doc/tutorials/new_tutorial/exceptions.rst
@@ -0,0 +1,191 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: exceptions; Tutorial
+
+.. _new_manual.exceptions:
+
+###################################################################################################
+|new_manual.exceptions.chapter_num|: Exceptions thrown inside pipelines
+###################################################################################################
+
+**************************************
+Overview
+**************************************
+
+
+ The goal for *Ruffus* is that exceptions should just work *out-of-the-box* without any fuss.
+ This is especially important for exceptions that come from your code which may be raised
+ in a different process. Often multiple parallel operations (jobs or tasks) fail at the
+ same time. *Ruffus* will forward each of these exceptions with the tracebacks so you
+ can jump straight to the offending line.
+
+ This example shows separate exceptions from two jobs running in parallel:
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate(["a.start", "b.start", "c.start", "d.start", "e.start"])
+ def throw_exceptions_here(output_file):
+ raise Exception("OOPS")
+
+ pipeline_run(multiprocess = 2)
+
+ .. code-block:: pycon
+ :emphasize-lines: 5, 21
+
+ >>> pipeline_run(multiprocess = 2)
+
+ ruffus.ruffus_exceptions.RethrownJobError:
+
+ Original exceptions:
+
+ Exception #1
+ 'exceptions.Exception(OOPS)' raised in ...
+ Task = def throw_exceptions_here(...):
+ Job = [None -> b.start]
+
+ Traceback (most recent call last):
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 685, in run_pooled_job_without_exceptions
+ return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 549, in job_wrapper_output_files
+ job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 504, in job_wrapper_io_files
+ ret_val = user_defined_work_func(*(param[1:]))
+ File "<stdin>", line 3, in throw_exceptions_here
+ Exception: OOPS
+
+
+ Exception #2
+ 'exceptions.Exception(OOPS)' raised in ...
+ Task = def throw_exceptions_here(...):
+ Job = [None -> a.start]
+
+ Traceback (most recent call last):
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 685, in run_pooled_job_without_exceptions
+ return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 549, in job_wrapper_output_files
+ job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)
+ File "/usr/local/lib/python2.7/dist-packages/ruffus/task.py", line 504, in job_wrapper_io_files
+ ret_val = user_defined_work_func(*(param[1:]))
+ File "<stdin>", line 3, in throw_exceptions_here
+ Exception: OOPS
+
+
+ .. image:: ../../images/manual_exceptions.png
+
+
+.. _new_manual.exceptions.multiple_errors:
+
+.. index:: signalling, interrupts, break, errors, exceptions, multiple errors
+
+****************************************************************
+Pipelines running in parallel accumulate Exceptions
+****************************************************************
+
+ As show above, by default *Ruffus* accumulates ``NN`` exceptions before interrupting the pipeline prematurely where
+ ``NN`` is the specified parallelism for :ref:`pipeline_run(multiprocess = NN) <pipeline_functions.pipeline_run>`
+
+ This seems a fair tradeoff between being able to gather detailed error information for
+ running jobs, and not wasting too much time for a task that is going to fail anyway.
+
+
+****************************************************************
+Terminate pipeline immediately upon Exceptions
+****************************************************************
+
+
+==============================================================================================================================
+Set :ref:`pipeline_run(exceptions_terminate_immediately = True) <pipeline_functions.pipeline_run>`
+==============================================================================================================================
+
+ To have all exceptions interrupt the pipeline immediately, invoke:
+
+ .. code-block:: python
+
+ pipeline_run(exceptions_terminate_immediately = True)
+
+
+ For example, with this change, only a single exception will be thrown before the pipeline is interrupted:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate(["a.start", "b.start", "c.start", "d.start", "e.start"])
+ def throw_exceptions_here(output_file):
+ raise Exception("OOPS")
+
+ pipeline_run(multiprocess = 2, exceptions_terminate_immediately = True)
+
+ .. code-block:: pycon
+ :emphasize-lines: 5, 21
+
+ >>> pipeline_run(multiprocess = 2)
+
+ ruffus.ruffus_exceptions.RethrownJobError:
+
+ Original exception:
+
+ Exception #1
+ 'exceptions.Exception(OOPS)' raised in ...
+ Task = def throw_exceptions_here(...):
+ Job = [None -> a.start]
+
+ Traceback (most recent call last):
+ [Tedious traceback snipped out!!!....]
+ Exception: OOPS
+
+
+==============================================================================================================================
+raise ``Ruffus.JobSignalledBreak``
+==============================================================================================================================
+
+ The same can be accomplished on a finer scale by throwing the ``Ruffus.JobSignalledBreak`` Exception. Unlike
+ other exceptions, this causes an immediate halt in pipeline execution. If there are other exceptions in play at that
+ point, they will be rethrown in the main process but no new exceptions will be added.
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @originate(["a.start", "b.start", "c.start", "d.start", "e.start"])
+ def throw_exceptions_here(output_file):
+ raise JobSignalledBreak("OOPS")
+
+ pipeline_run(multiprocess = 2)
+
+
+****************************************************************
+Display exceptions as they occur
+****************************************************************
+
+ In the following example, the jobs throw exceptions
+ at two second staggered intervals into the job. With ``log_exceptions = True``, the
+ exceptions are displayed as they occur even though the pipeline continues running.
+
+ logger.error(...) will be invoked with the string representation of the each exception, and associated stack trace.
+
+ The default logger prints to sys.stderr, but as usual can be changed to any class from the logging module or compatible object via
+ :ref:`pipeline_run(logger = XXX) <pipeline_functions.pipeline_run>`
+
+
+ .. code-block:: python
+
+ from ruffus import *
+ import time, os
+
+ @originate(["1.start", "2.start", "3.start", "4.start", "5.start"])
+ def throw_exceptions_here(output_file):
+ delay = int(os.path.splitext(output_file)[0])
+ time.sleep(delay * 2)
+ raise JobSignalledBreak("OOPS")
+
+ pipeline_run(log_exceptions = True, multiprocess = 5)
+
+
+
+
diff --git a/doc/tutorials/new_tutorial/flowchart_colours.rst b/doc/tutorials/new_tutorial/flowchart_colours.rst
new file mode 100644
index 0000000..e03f171
--- /dev/null
+++ b/doc/tutorials/new_tutorial/flowchart_colours.rst
@@ -0,0 +1,61 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: flowchart colours; Tutorial
+
+.. _new_manual.flowchart_colours:
+
+##########################################################################################################################################################################################################################################
+|new_manual.flowchart_colours.chapter_num|: Flow Chart Colours with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+##########################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+ * :download:`Download code <../../static_data/example_scripts/play_with_colours.py>`
+ * :ref:`Code <new_manual.flowchart_colours.code>` for experimenting with colours
+
+******************
+Flowchart colours
+******************
+
+The appearance of *Ruffus* flowcharts produced by :ref:`pipeline_printout_graph <pipeline_functions.pipeline_printout_graph>`
+can be extensively customised.
+
+This is mainly controlled by the :ref:`user_colour_scheme <pipeline_functions.pipeline_printout_graph.user_colour_scheme>` (note UK spelling of "colour") parameter
+
+Example:
+
+ Use colour scheme index = 1
+ ::
+
+ pipeline_printout_graph ("flowchart.svg", "svg", [final_task],
+ user_colour_scheme = {
+ "colour_scheme_index" :1,
+ "Pipeline" :{"fontcolor" : '"#FF3232"' },
+ "Key" :{"fontcolor" : "Red",
+ "fillcolor" : '"#F6F4F4"' },
+ "Task to run" :{"linecolor" : '"#0044A0"' },
+ "Final target" :{"fillcolor" : '"#EFA03B"',
+ "fontcolor" : "black",
+ "dashed" : 0 }
+ })
+
+
+There are 8 colour schemes by setting ``"colour_scheme_index"``:
+ ::
+
+ pipeline_printout_graph ("flowchart.svg", "svg", [final_task],
+ user_colour_scheme = {"colour_scheme_index" :6})
+
+
+These colours were chosen after much fierce arguments between the authors and friends, and much
+inspiration from http://kuler.adobe.com/#create/fromacolor. Please
+feel free to submit any additional sets of colours for our consideration.
+
+
+(Click here for image in :download:`svg <../../images/flowchart_colour_schemes.svg>`.)
+
+.. image:: ../../images/flowchart_colour_schemes.png
diff --git a/doc/tutorials/new_tutorial/flowchart_colours_code.rst b/doc/tutorials/new_tutorial/flowchart_colours_code.rst
new file mode 100644
index 0000000..13bb236
--- /dev/null
+++ b/doc/tutorials/new_tutorial/flowchart_colours_code.rst
@@ -0,0 +1,288 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: flowchart colours; Tutorial
+
+.. _new_manual.flowchart_colours.code:
+
+##########################################################################################################################################################################################################################################
+|new_manual.flowchart_colours.chapter_num|: Python code for Flow Chart Colours with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+##########################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+ * :download:`Download code <../../static_data/example_scripts/play_with_colours.py>`
+ * Back to :ref:`Flowchart colours <new_manual.flowchart_colours>`
+
+ This example shows how flowchart colours can be customised.
+
+
+************************************
+Code
+************************************
+ ::
+
+ #!/usr/bin/env python
+ """
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+ """
+
+ ################################################################################
+ #
+ # play_with_colours.py
+ #
+ #
+ # Copyright (c) 7/13/2010 Leo Goodstadt
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
+ # of this software and associated documentation files (the "Software"), to deal
+ # in the Software without restriction, including without limitation the rights
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ # copies of the Software, and to permit persons to whom the Software is
+ # furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice shall be included in
+ # all copies or substantial portions of the Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ # THE SOFTWARE.
+ #################################################################################
+
+ import sys, os
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # options
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+ from optparse import OptionParser
+ import StringIO
+
+ parser = OptionParser(version="%play_with_colours 1.0",
+ usage = "\n\n play_with_colours "
+ "--flowchart FILE [options] "
+ "[--colour_scheme_index INT ] "
+ "[--key_legend_in_graph]")
+
+ #
+ # pipeline
+ #
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+ parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+
+ (options, remaining_args) = parser.parse_args()
+ if not options.flowchart:
+ raise Exception("Missing mandatory parameter: --flowchart.\n")
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ from ruffus import *
+ from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Pipeline
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+ #
+ # up to date tasks
+ #
+ @check_if_uptodate (lambda : (False, ""))
+ def Up_to_date_task1(infile, outfile):
+ pass
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task1)
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task2)
+ def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task3)
+ def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+ #
+ # Explicitly specified
+ #
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Up_to_date_task1)
+ def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+ #
+ # Tasks to run
+ #
+ @follows(Explicitly_specified_task)
+ def Task_to_run1(infile, outfile):
+ pass
+
+
+ @follows(Task_to_run1)
+ def Task_to_run2(infile, outfile):
+ pass
+
+ @follows(Task_to_run2)
+ def Task_to_run3(infile, outfile):
+ pass
+
+ @check_if_uptodate (lambda : (False, ""))
+ @follows(Task_to_run2)
+ def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+ #
+ # Final target
+ #
+ @follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+ def Final_target(infile, outfile):
+ pass
+
+ #
+ # Ignored downstream
+ #
+ @follows(Final_target)
+ def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ @follows(Final_target)
+ def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Main logic
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ from collections import defaultdict
+ custom_flow_chart_colour_scheme = defaultdict(dict)
+
+ #
+ # Base chart on this overall colour scheme index
+ #
+ custom_flow_chart_colour_scheme["colour_scheme_index"] = options.colour_scheme_index
+
+ #
+ # Overriding colours
+ #
+ if options.colour_scheme_index == None:
+ custom_flow_chart_colour_scheme["Vicious cycle"]["linecolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Pipeline"]["fontcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Key"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ custom_flow_chart_colour_scheme["Task to run"]["linecolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ custom_flow_chart_colour_scheme["Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Final target"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fillcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["color"] = "white"
+ custom_flow_chart_colour_scheme["Vicious cycle"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fillcolor"] = '"#B8CC6E"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Down stream"]["fillcolor"] = "white"
+ custom_flow_chart_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["color"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Task to run"]["fillcolor"] = '"#EBF3FF"'
+ custom_flow_chart_colour_scheme["Task to run"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+ if __name__ == '__main__':
+ pipeline_printout_graph (
+
+ open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+
+ # final targets
+ [Final_target, Up_to_date_final_target],
+
+ # Explicitly specified tasks
+ [Explicitly_specified_task],
+
+ # Do we want key legend
+ no_key_legend = not options.key_legend_in_graph,
+
+ # Print all the task types whether used or not
+ minimal_key_legend = False,
+
+ user_colour_scheme = custom_flow_chart_colour_scheme,
+ pipeline_name = "Colour schemes")
+
diff --git a/doc/tutorials/new_tutorial/inputs.rst b/doc/tutorials/new_tutorial/inputs.rst
new file mode 100644
index 0000000..823f187
--- /dev/null
+++ b/doc/tutorials/new_tutorial/inputs.rst
@@ -0,0 +1,239 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: inputs; Tutorial
+ pair: add_inputs; Tutorial
+ pair: string substiution for inputs; Tutorial
+
+.. _new_manual.inputs:
+
+###########################################################################################################################################################################################################################################################################################
+|new_manual.inputs.chapter_num|: Manipulating task inputs via string substitution using :ref:`inputs() <decorators.inputs>` and :ref:`add_inputs() <decorators.add_inputs>`
+###########################################################################################################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`inputs() <decorators.inputs>` syntax
+ * :ref:`add_inputs() <decorators.add_inputs>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.inputs.code`
+
+***********************
+Overview
+***********************
+
+ The previous chapters have been described how *Ruffus* allows the **Output** names for each job
+ to be generated from the *Input* names via string substitution. This is how *Ruffus* can
+ automatically chain multiple tasks in a pipeline together seamlessly.
+
+ Sometimes it is useful to be able to modify the **Input** by string substitution
+ as well. There are two situations where this additional flexibility is needed:
+
+ #. You need to add additional prequisites or filenames to the **Input** of every single job
+ #. You need to add additional **Input** file names which are some variant of the existing ones.
+
+ Both will be much more obvious with some examples
+
+
+*******************************************************************************************************************
+Adding additional *input* prerequisites per job with :ref:`add_inputs() <decorators.add_inputs>`
+*******************************************************************************************************************
+
+
+===================================================================
+1. Example: compiling c++ code
+===================================================================
+
+ Let us first compile some c++ (``"*.cpp"``) files using plain :ref:`@transform <decorators.transform>` syntax:
+
+ .. code-block:: python
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ from ruffus import *
+
+ @transform(source_files, suffix(".cpp"), ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+
+======================================================================================================================================
+2. Example: Adding a common header file with :ref:`add_inputs() <decorators.add_inputs>`
+======================================================================================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ # make header files
+ @transform(source_files, suffix(".cpp"), ".h")
+ def create_matching_headers(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h",
+ # add result of task create_matching_headers to the input of every job
+ create_matching_headers),
+ ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ >>> pipeline_run()
+ Job = [hasty.cpp -> hasty.h] completed
+ Job = [messy.cpp -> messy.h] completed
+ Job = [tasty.cpp -> tasty.h] completed
+ Completed Task = create_matching_headers
+ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+=====================================================================
+3. Example: Additional *Input* can be tasks
+=====================================================================
+
+ We can also add a task name to :ref:`add_inputs() <decorators.add_inputs>`.
+ This chains the **Output**, i.e. run time results, of any previous task as
+ an additional **Input** to every single job in the task.
+
+ .. code-block:: python
+ :emphasize-lines: 1,7,9
+
+ # make header files
+ @transform(source_files, suffix(".cpp"), ".h")
+ def create_matching_headers(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h",
+ # add result of task create_matching_headers to the input of every job
+ create_matching_headers),
+ ".o")
+ def compile(input_filenames, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+================================================================================================================================================================================================================================================
+4. Example: Add corresponding files using :ref:`add_inputs() <decorators.add_inputs>` with :ref:`formatter <decorators.formatter>` or :ref:`regex <decorators.regex>`
+================================================================================================================================================================================================================================================
+ The previous example created headers corresponding to our source files and added them
+ as the **Input** to the compilation. That is generally not what you want. Instead,
+ what is generally need is a way to
+
+ 1) Look up the exact corresponding header for the *specific* job, and not add all
+ possible files to all jobs in a task. When compiling ``hasty.cpp``, we just need
+ to add ``hasty.h`` (and ``universal.h``).
+ 2) Add a pre-existing file name (``hasty.h`` already exists. Don't create it via
+ another task.)
+
+ This is a surprisingly common requirement: In bioinformatics sometimes DNA or RNA
+ sequence files come singly in `*.fastq <http://en.wikipedia.org/wiki/FASTQ_format>`__
+ and sometimes in `matching pairs <http://en.wikipedia.org/wiki/DNA_sequencing_theory#Pairwise_end-sequencing>`__:
+ ``*1.fastq, *2.fastq`` etc. In the latter case, we often need to make sure that both
+ sequence files are being processed in tandem. One way is to take one file name (``*1.fastq``)
+ and look up the other.
+
+ :ref:`add_inputs() <decorators.add_inputs>` uses standard *Ruffus* string substitution
+ via :ref:`formatter <decorators.formatter>` and :ref:`regex <decorators.regex>` to lookup (generate) **Input** file names.
+ (As a rule :ref:`suffix <decorators.suffix>` only substitutes **Output** file names.)
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ @transform( source_files,
+ formatter(".cpp$"),
+ # corresponding header for each source file
+ add_inputs("{basename[0]}.h",
+ # add header to the input of every job
+ "universal.h"),
+ "{basename[0]}.o")
+ def compile(input_filenames, output_file):
+ open(output_file, "w")
+
+ This script gives the following output
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, hasty.h, universal.h] -> hasty.o] completed
+ Job = [[messy.cpp, messy.h, universal.h] -> messy.o] completed
+ Job = [[tasty.cpp, tasty.h, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+
+********************************************************************************
+Replacing all input parameters with :ref:`inputs() <decorators.inputs>`
+********************************************************************************
+
+ The previous examples all *added* to the set of **Input** file names.
+ Sometimes it is necessary to replace all the **Input** parameters altogether.
+
+================================================================================================================================================================================================================================================
+5. Example: Running matching python scripts using :ref:`inputs() <decorators.inputs>`
+================================================================================================================================================================================================================================================
+
+ Here is a contrived example: we wish to find all cython/python files which have been
+ compiled into corresponding c++ source files.
+ Instead of compiling the c++, we shall invoke the corresponding python scripts.
+
+ Given three c++ files and their corresponding python scripts:
+
+ .. code-block:: python
+ :emphasize-lines: 4
+
+ @transform( source_files,
+ formatter(".cpp$"),
+
+ # corresponding python file for each source file
+ inputs("{basename[0]}.py"),
+
+ "{basename[0]}.results")
+ def run_corresponding_python(input_filenames, output_file):
+ open(output_file, "w")
+
+
+ The *Ruffus* code will call each python script corresponding to their c++ counterpart:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [hasty.py -> hasty.results] completed
+ Job = [messy.py -> messy.results] completed
+ Job = [tasty.py -> tasty.results] completed
+ Completed Task = run_corresponding_python
+
diff --git a/doc/tutorials/new_tutorial/inputs_code.rst b/doc/tutorials/new_tutorial/inputs_code.rst
new file mode 100644
index 0000000..050ae10
--- /dev/null
+++ b/doc/tutorials/new_tutorial/inputs_code.rst
@@ -0,0 +1,229 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.inputs.code:
+
+############################################################################################################################################################################################################
+|new_manual.inputs.chapter_num|: Python Code for Manipulating task inputs via string substitution using :ref:`inputs() <decorators.inputs>` and :ref:`add_inputs() <decorators.add_inputs>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`inputs() <decorators.inputs>` syntax
+ * :ref:`add_inputs() <decorators.add_inputs>` syntax
+ * Back to |new_manual.inputs.chapter_num|: :ref:`Manipulating task inputs via string substitution <new_manual.inputs>`
+
+******************************************************************************************************************************************************
+Example code for adding additional *input* prerequisites per job with :ref:`add_inputs() <decorators.add_inputs>`
+******************************************************************************************************************************************************
+
+.. _new_manual.inputs.example1:
+
+===================================================================
+1. Example: compiling c++ code
+===================================================================
+
+ .. code-block:: python
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ from ruffus import *
+
+ @transform(source_files, suffix(".cpp"), ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [hasty.cpp -> hasty.o] completed
+ Job = [messy.cpp -> messy.o] completed
+ Job = [tasty.cpp -> tasty.o] completed
+ Completed Task = compile
+
+.. _new_manual.inputs.example2:
+
+======================================================================================================================================
+2. Example: Adding a common header file with :ref:`add_inputs() <decorators.add_inputs>`
+======================================================================================================================================
+
+
+ .. code-block:: python
+ :emphasize-lines: 12
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ @transform( source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h"),
+ ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, universal.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+.. _new_manual.inputs.example3:
+
+=====================================================================
+3. Example: Additional *Input* can be tasks
+=====================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ for source_file in source_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ # make header files
+ @transform(source_files, suffix(".cpp"), ".h")
+ def create_matching_headers(input_file, output_file):
+ open(output_file, "w")
+
+ @transform(source_files, suffix(".cpp"),
+ # add header to the input of every job
+ add_inputs("universal.h",
+ # add result of task create_matching_headers to the input of every job
+ create_matching_headers),
+ ".o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+
+ >>> pipeline_run()
+ Job = [hasty.cpp -> hasty.h] completed
+ Job = [messy.cpp -> messy.h] completed
+ Job = [tasty.cpp -> tasty.h] completed
+ Completed Task = create_matching_headers
+ Job = [[hasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> hasty.o] completed
+ Job = [[messy.cpp, universal.h, hasty.h, messy.h, tasty.h] -> messy.o] completed
+ Job = [[tasty.cpp, universal.h, hasty.h, messy.h, tasty.h] -> tasty.o] completed
+ Completed Task = compile
+
+.. _new_manual.inputs.example4:
+
+================================================================================================================================================================================================================================================
+4. Example: Add corresponding files using :ref:`add_inputs() <decorators.add_inputs>` with :ref:`formatter <decorators.formatter>` or :ref:`regex <decorators.regex>`
+================================================================================================================================================================================================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ header_files = ["hasty.h", "tasty.h", "messy.h"]
+ for source_file in source_files + header_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ @transform( source_files,
+ formatter(".cpp$"),
+ # corresponding header for each source file
+ add_inputs("{basename[0]}.h",
+ # add header to the input of every job
+ "universal.h"),
+ "{basename[0]}.o")
+ def compile(input_filename, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[hasty.cpp, hasty.h, universal.h] -> hasty.o] completed
+ Job = [[messy.cpp, messy.h, universal.h] -> messy.o] completed
+ Job = [[tasty.cpp, tasty.h, universal.h] -> tasty.o] completed
+ Completed Task = compile
+
+*********************************************************************************************
+Example code for replacing all input parameters with :ref:`inputs() <decorators.inputs>`
+*********************************************************************************************
+
+.. _new_manual.inputs.example5:
+
+================================================================================================================================================================================================================================================
+5. Example: Running matching python scripts using :ref:`inputs() <decorators.inputs>`
+================================================================================================================================================================================================================================================
+
+ .. code-block:: python
+ :emphasize-lines: 11,17,19
+
+ # source files exist before our pipeline
+ source_files = ["hasty.cpp", "tasty.cpp", "messy.cpp"]
+ python_files = ["hasty.py", "tasty.py", "messy.py"]
+ for source_file in source_files + python_files:
+ open(source_file, "w")
+
+ # common (universal) header exists before our pipeline
+ open("universal.h", "w")
+
+ from ruffus import *
+
+ @transform( source_files,
+ formatter(".cpp$"),
+ # corresponding python file for each source file
+ inputs("{basename[0]}.py"),
+
+ "{basename[0]}.results")
+ def run_corresponding_python(input_filenames, output_file):
+ open(output_file, "w")
+
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [hasty.py -> hasty.results] completed
+ Job = [messy.py -> messy.results] completed
+ Job = [tasty.py -> tasty.results] completed
+ Completed Task = run_corresponding_python
+
diff --git a/doc/tutorials/new_tutorial/introduction.rst b/doc/tutorials/new_tutorial/introduction.rst
new file mode 100644
index 0000000..d36354a
--- /dev/null
+++ b/doc/tutorials/new_tutorial/introduction.rst
@@ -0,0 +1,399 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. role:: raw-html(raw)
+ :format: html
+
+:raw-html:`<style> .blue {color:blue} </style>`
+
+:raw-html:`<style> .highlight-red {color:red} </style>`
+
+.. role:: highlight-red
+
+.. role:: blue
+
+
+.. index::
+ pair: overview; Tutorial
+
+.. _new_manual.introduction:
+
+######################################################################################################
+|new_manual.introduction.chapter_num|: An introduction to basic *Ruffus* syntax
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+
+************************************
+Overview
+************************************
+
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ One easy way to understand pipelines is by imagining your data flowing across a series of
+ pipes until it reaches its final destination. Even quite complicated processes can be
+ broken into simple stages. Of course, it helps to visualise the whole process.
+
+ *Ruffus* is a way of automating the plumbing in your pipeline: You supply the python functions
+ which perform the data transformation, and tell *Ruffus* how these pipeline ``task`` functions
+ are connected up. *Ruffus* will make sure that the right data flows down your pipeline in the
+ right way at the right time.
+
+
+ .. note::
+
+ *Ruffus* refers to each stage of your pipeline as a :term:`task`.
+
+.. _new_manual.introduction.import:
+
+.. index::
+ single: importing ruffus
+
+****************************
+Importing *Ruffus*
+****************************
+
+ The most convenient way to use *Ruffus* is to import the various names directly:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ This will allow *Ruffus* terms to be used directly in your code. This is also
+ the style we have adopted for this manual.
+
+
+ If any of these clash with names in your code, you can use qualified names instead:
+ ::
+
+ import ruffus
+
+ ruffus.pipeline_printout("...")
+
+ *Ruffus* uses only standard python syntax.
+
+ There is no need to install anything extra or to have your script "preprocessed" to run
+ your pipeline.
+
+****************************************************************************************************************
+*Ruffus* `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__
+****************************************************************************************************************
+
+ To let *Ruffus* know that which python functions are part of your pipeline,
+ they need to be tagged or annotated using
+ *Ruffus* `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ .
+
+ `Decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ have been part of the Python language since version 2.4.
+ Common examples from the standard library include `@staticmethod <https://docs.python.org/2/library/functions.html#staticmethod>`__ and
+ `classmethod <https://docs.python.org/2/library/functions.html#classmethod>`__.
+
+ `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ start with a ``@``
+ prefix, and take a number of parameters in parenthesis, much like in a function call.
+
+ `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ are placed before a normal python function.
+
+ .. image:: ../../images/tutorial_step1_decorator_syntax.png
+
+
+ Multiple decorators can be stacked as necessary in whichever order:
+
+ .. code-block:: python
+
+ @follows(first_task)
+ @follows(another_task)
+ @originate(range(5))
+ def second_task():
+ ""
+
+ *Ruffus* `decorators <https://docs.python.org/2/glossary.html#term-decorator>`__ do not
+ otherwise alter the underlying function. These can still be called normally.
+
+***************************************
+Your first *Ruffus* pipeline
+***************************************
+
+==============================================================================
+1. Write down the file names
+==============================================================================
+
+ *Ruffus* is designed for data moving through a computational pipeline as a series of files.
+
+ It is also possible to use *Ruffus* pipelines without using intermediate data files but for your
+ first efforts, it is probably best not to subvert its canonical design.
+
+ The first thing when designing a new *Ruffus* pipeline is to sketch out the set of file names for
+ the pipeline on paper:
+
+ .. image:: ../../images/tutorial_ruffus_files.jpg
+ :scale: 50
+
+ Here we have a number of DNA sequence files (``*.fasta``)
+ #. mapped to a genome (``*.sam``), and
+ #. compressed (``*.bam``) before being
+ #. summarised statistically (``*.statistics``)
+
+ The first striking thing is that all of the files following the same **consistent naming scheme**.
+
+ .. note::
+
+ :highlight-red:`The most important part of a Ruffus pipeline is to have a consistent naming scheme for your files.`
+
+ This allows you to build sane pipelines.
+
+
+ In this case, each of the files at the same stage share the same file extension, e.g. (``.sam``).
+ This is usually the simplest and most sensible choice. (We shall see in later chapters
+ that *Ruffus* supports more complicated naming patterns so long as they are consistent.)
+
+
+==============================================================================
+2. Write the python functions for each stage
+==============================================================================
+
+ Next, we can sketch out the python functions which do the actual work for the pipeline.
+
+ .. note::
+
+ #. :highlight-red:`These are normal python functions with the important proviso that`
+
+ #. The first parameter contains the **Input** (file names)
+ #. The second parameter contains the **Output** (file names)
+
+ You can otherwise supply as many parameters as is required.
+
+ #. :highlight-red:`Each python function should only take a` *Single* **Input** at a time
+
+ All the parallelism in your pipeline should be handled by *Ruffus*. Make sure
+ each function analyses one thing at a time.
+
+
+ *Ruffus* refers to a pipelined function as a :term:`task`.
+
+ The code for our three task functions look something like:
+
+ .. code-block:: python
+ :emphasize-lines: 2,4,5
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ def map_dna_sequence(input_file, # 1st parameter is Input
+ output_file): # 2nd parameter is Output
+ """
+ Sketch of real mapping function
+ We can do the mapping ourselves
+ or call some other programme:
+ os.system("stampy %s %s..." % (input_file, output_file))
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # STAGE 2 sam->bam
+ #
+ def compress_sam_file(input_file, # Input parameter
+ output_file): # Output parameter
+ """
+ Sketch of real compression function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ def summarise_bam_file(input_file, # Input parameter
+ output_file, # Output parameter
+ extra_stats_parameter): # Any number of extra parameters as required
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+
+ If we were calling our functions manually, without the benefit of *Ruffus*, we would need
+ the following sequence of calls:
+
+ .. code-block:: python
+
+ # STAGE 1
+ map_dna_sequence("a.fasta", "a.sam")
+ map_dna_sequence("b.fasta", "b.sam")
+ map_dna_sequence("c.fasta", "c.sam")
+
+ # STAGE 2
+ compress_sam_file("a.sam", "a.bam")
+ compress_sam_file("b.sam", "b.bam")
+ compress_sam_file("c.sam", "c.bam")
+
+ # STAGE 3
+ summarise_bam_file("a.bam", "a.statistics")
+ summarise_bam_file("b.bam", "b.statistics")
+ summarise_bam_file("c.bam", "c.statistics")
+
+==============================================================================
+3. Link the python functions into a pipeline
+==============================================================================
+
+ *Ruffus* makes exactly the same function calls on your behalf. However, first, we need to
+ tell *Ruffus* what the arguments should be for each of the function calls.
+
+ * The **Input** is easy: This is either the starting file set (``*.fasta``) or whatever is produced
+ by the previous stage.
+
+ * The **Output** file name is the same as the **Input** but with the appropriate extension.
+
+ These are specified using the *Ruffus* :ref:`@transform <decorators.transform>` decorator as follows:
+
+ .. code-block:: python
+ :emphasize-lines: 6-8,17-19,29-31
+
+ from ruffus import *
+
+ starting_files = ["a.fasta", "b.fasta", "c.fasta"]
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 2 sam->bam
+ #
+ @transform(map_dna_sequence, # Input = previous stage
+ suffix(".sam"), # suffix = .sam
+ ".bam") # Output suffix = .bam
+ def compress_sam_file(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ @transform(compress_sam_file, # Input = previous stage
+ suffix(".bam"), # suffix = .bam
+ ".statistics", # Output suffix = .statistics
+ "use_linear_model") # Extra statistics parameter
+ def summarise_bam_file(input_file,
+ output_file,
+ extra_stats_parameter):
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+
+==============================================================================
+4. @transform syntax
+==============================================================================
+
+ #. | The 1st parameter for :ref:`@transform <decorators.transform>` is the **Input**.
+ | This is either the set of starting data or the name of the previous pipeline function.
+ | *Ruffus* *chains* together the stages of a pipeline by linking the **Output** of the previous stage into the **Input** of the next.
+
+ #. | The 2nd parameter is the current :ref:`suffix <decorators.suffix>`
+ | (i.e. our **Input** file extensions of ``".fasta"`` or ``".sam"`` or ``".bam"``)
+
+ #. | The 3rd parameter is what we want our **Output** file name to be after :ref:`suffix <decorators.suffix>` string substitution (e.g. ``.fasta - > .sam``).
+ | This works because we are using a sane naming scheme for our data files.
+
+ #. Other parameters can be passed to ``@transform`` and they will be forwarded to our python
+ pipeline function.
+
+
+ The functions that do the actual work of each stage of the pipeline remain unchanged.
+ The role of *Ruffus* is to make sure each is called in the right order,
+ with the right parameters, running in parallel (using multiprocessing if desired).
+
+
+.. index::
+ pair: pipeline_run; Tutorial
+
+.. _new_manual.pipeline_run:
+
+==============================================================================
+5. Run the pipeline!
+==============================================================================
+
+ .. note ::
+
+ **Key Ruffus Terminology**:
+
+ A :term:`task` is an annotated python function which represents a recipe or stage of your pipeline.
+
+ A :term:`job` is each time your recipe is applied to a piece of data, i.e. each time *Ruffus* calls your function.
+
+ Each **task** or pipeline recipe can thus have many **jobs** each of which can work in parallel on different data.
+
+ Now we can run the pipeline with the *Ruffus* function :ref:`pipeline_run<pipeline_functions.pipeline_run>`:
+
+ .. code-block:: python
+
+ pipeline_run()
+
+
+
+ This produces three sets of results in parallel, as you might expect:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [a.fasta -> a.sam] completed
+ Job = [b.fasta -> b.sam] completed
+ Job = [c.fasta -> c.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [a.sam -> a.bam] completed
+ Job = [b.sam -> b.bam] completed
+ Job = [c.sam -> c.bam] completed
+ Completed Task = compress_sam_file
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
+
+
+ To work out which functions to call, :ref:`pipeline_run<pipeline_functions.pipeline_run>`
+ finds the **last** :term:`task` function of your pipeline, then
+ works out all the other functions this depends on, working backwards up the chain of
+ dependencies automatically.
+
+ We can specify this end point of your pipeline explicitly:
+
+ ::
+
+ >>> pipeline_run(target_tasks = [summarise_bam_file])
+
+
+ This allows us to only run part of the pipeline, for example:
+
+ ::
+
+ >>> pipeline_run(target_tasks = [compress_sam_file])
+
+
+.. note::
+
+ The :ref:`example code <new_manual.introduction.code>` can be copied and pasted into a python
+ command shell.
+
diff --git a/doc/tutorials/new_tutorial/introduction_code.rst b/doc/tutorials/new_tutorial/introduction_code.rst
new file mode 100644
index 0000000..7bdd203
--- /dev/null
+++ b/doc/tutorials/new_tutorial/introduction_code.rst
@@ -0,0 +1,94 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.introduction.code:
+
+##############################################################################################################
+|new_manual.introduction.chapter_num|: Python Code for An introduction to basic Ruffus syntax
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.introduction.chapter_num|: :ref:`An introduction to basic Ruffus syntax <new_manual.introduction>`
+
+*******************************************
+Your first Ruffus script
+*******************************************
+
+ .. code-block:: python
+
+ ::
+
+ #
+ # The starting data files would normally exist beforehand!
+ # We create some empty files for this example
+ #
+ starting_files = ["a.fasta", "b.fasta", "c.fasta"]
+
+ for ff in starting_files:
+ open(ff, "w")
+
+
+ from ruffus import *
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 2 sam->bam
+ #
+ @transform(map_dna_sequence, # Input = previous stage
+ suffix(".sam"), # suffix = .sam
+ ".bam") # Output suffix = .bam
+ def compress_sam_file(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ @transform(compress_sam_file, # Input = previous stage
+ suffix(".bam"), # suffix = .bam
+ ".statistics", # Output suffix = .statistics
+ "use_linear_model") # Extra statistics parameter
+ def summarise_bam_file(input_file,
+ output_file,
+ extra_stats_parameter):
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ pipeline_run()
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [a.fasta -> a.sam] completed
+ Job = [b.fasta -> b.sam] completed
+ Job = [c.fasta -> c.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [a.sam -> a.bam] completed
+ Job = [b.sam -> b.bam] completed
+ Job = [c.sam -> c.bam] completed
+ Completed Task = compress_sam_file
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
diff --git a/doc/tutorials/new_tutorial/list_of_ruffus_names.rst b/doc/tutorials/new_tutorial/list_of_ruffus_names.rst
new file mode 100644
index 0000000..aa8b66b
--- /dev/null
+++ b/doc/tutorials/new_tutorial/list_of_ruffus_names.rst
@@ -0,0 +1,77 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: Ruffus names list; Tutorial
+
+.. _new_manual.ruffus_names:
+
+##########################################################################################################################################################################################################################################
+|new_manual.ruffus_names.chapter_num|: Names exported from Ruffus
+##########################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+******************
+Ruffus Names
+******************
+
+ This is a list of all the names *Ruffus* makes available:
+
+
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | Category | Manual |
+ +===============================================================+===============================================================================================================================================================+
+ | **Pipeline functions** | | :ref:`pipeline_printout() <pipeline_functions.pipeline_printout>` (:ref:`Manual <new_manual.pipeline_printout>`) |
+ | | | :ref:`pipeline_printout() <pipeline_functions.pipeline_printout_graph>` (:ref:`Manual <new_manual.pipeline_printout_graph>`) |
+ | | | :ref:`pipeline_printout() <pipeline_functions.pipeline_run>` (:ref:`Manual <new_manual.pipeline_run>`) |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Decorators** | |
+ | | | :ref:`@active_if <decorators.active_if>` (:ref:`Manual <new_manual.active_if>`) |
+ | | | :ref:`@check_if_uptodate <decorators.check_if_uptodate>` (:ref:`Manual <new_manual.check_if_uptodate>`) |
+ | | | :ref:`@collate <decorators.collate>` (:ref:`Manual <new_manual.collate>`) |
+ | | | :ref:`@files <decorators.files>` (:ref:`Manual <new_manual.deprecated_files>`) |
+ | | | :ref:`@follows <decorators.follows>` (:ref:`Manual <new_manual.follows>`) |
+ | | | :ref:`@jobs_limit <decorators.jobs_limit>` (:ref:`Manual <new_manual.jobs_limit>`) |
+ | | | :ref:`@merge <decorators.merge>` (:ref:`Manual <new_manual.merge>`) |
+ | | | :ref:`@mkdir <decorators.mkdir>` (:ref:`Manual <new_manual.mkdir>`) |
+ | | | :ref:`@originate <decorators.originate>` (:ref:`Manual <new_manual.originate>`) |
+ | | | :ref:`@parallel <decorators.parallel>` (:ref:`Manual <new_manual.deprecated_parallel>`) |
+ | | | :ref:`@posttask <decorators.posttask>` (:ref:`Manual <new_manual.posttask>`) |
+ | | | :ref:`@split <decorators.split>` (:ref:`Manual <new_manual.split>`) |
+ | | | :ref:`@subdivide <decorators.subdivide>` (:ref:`Manual <new_manual.subdivide>`) |
+ | | | :ref:`@transform <decorators.transform>` (:ref:`Manual <new_manual.transform>`) |
+ | | | :ref:`@files_re <decorators.files_re>` (:ref:`Manual <new_manual.deprecated_files_re>`) |
+ | | |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Loggers** | |
+ | | | stderr_logger |
+ | | | black_hole_logger |
+ | | |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Parameter disambiguating Indicators** | | :ref:`suffix <decorators.suffix>` (:ref:`Manual <new_manual.suffix>`) |
+ | | | :ref:`regex <decorators.regex>` (:ref:`Manual <new_manual.regex>`) |
+ | | | :ref:`formatter <decorators.formatter>` (:ref:`Manual <new_manual.formatter>`) |
+ | | | :ref:`inputs <decorators.inputs>` (:ref:`Manual <new_manual.inputs>`) |
+ | | | :ref:`inputs <decorators.add_inputs>` (:ref:`Manual <new_manual.inputs>`) |
+ | | | :ref:`touch_file <decorators.touch_file>` (:ref:`Manual <new_manual.posttask.touch_file>`) |
+ | | | :ref:`combine <decorators.combine>` |
+ | | | :ref:`mkdir <decorators.follows.mkdir>` (:ref:`Manual <new_manual.follows.mkdir>`) |
+ | | | :ref:`output_from <decorators.output_from>` (:ref:`Manual <new_manual.output_from>`) |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Decorators in ruffus.combinatorics** | |
+ | | | :ref:`@combinations <decorators.combinations>` (:ref:`Manual <new_manual.combinations>`) |
+ | | | :ref:`@combinations_with_replacement <decorators.combinations_with_replacement>` (:ref:`Manual <new_manual.combinations_with_replacement>`) |
+ | | | :ref:`@permutations <decorators.permutations>` (:ref:`Manual <new_manual.permutations>`) |
+ | | | :ref:`@product <decorators.product>` (:ref:`Manual <new_manual.product>`) |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | **Decorators in ruffus.cmdline** | |
+ | | | :ref:`get_argparse <new_manual.cmdline.get_argparse>` |
+ | | | :ref:`setup_logging <new_manual.cmdline.setup_logging>` |
+ | | | :ref:`run <new_manual.cmdline.run>` |
+ | | | :ref:`MESSAGE <new_manual.cmdline.MESSAGE>` |
+ +---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
diff --git a/doc/tutorials/new_tutorial/logging.rst b/doc/tutorials/new_tutorial/logging.rst
new file mode 100644
index 0000000..4e42e69
--- /dev/null
+++ b/doc/tutorials/new_tutorial/logging.rst
@@ -0,0 +1,221 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: logging; Tutorial
+
+.. _new_manual.logging:
+
+######################################################################################################
+|new_manual.logging.chapter_num|: Logging progress through a pipeline
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+
+.. note::
+
+ Remember to look at the :ref:`example code <new_manual.logging.code>`
+
+*************************
+Overview
+*************************
+
+ There are two parts to logging with **Ruffus**:
+
+ * Logging progress through the pipeline
+
+ This produces the sort of output displayed in this manual:
+
+ ::
+
+ >>> pipeline_run([parallel_io_task])
+ Task = parallel_io_task
+ Job = ["a.1" -> "a.2", "A file"] completed
+ Job = ["b.1" -> "b.2", "B file"] unnecessary: already up to date
+ Completed Task = parallel_io_task
+
+
+ * Logging your own messages from within your pipelined functions.
+
+ Because **Ruffus** may run each task function in separate process on a separate
+ CPU (multiprocessing), some attention has to be paid to how to send and
+ synchronise your log messages across process boundaries.
+
+
+ We shall deal with these in turn.
+
+
+.. _new_manual.logging.pipeline:
+
+**********************************
+Logging task/job completion
+**********************************
+ By default, *Ruffus* logs each task and each job as it is completed to
+ `sys.stderr <http://docs.python.org/2/library/sys.html#sys.stderr>`__.
+
+ By default, Ruffus logs to ``STDERR``: :ref:`pipeline_run(logger = stderr_logger) <pipeline_functions.pipeline_run>`.
+
+ If you want to turn off all tracking messages as the pipeline runs, apart from setting ``verbose = 0``, you
+ can also use the aptly named Ruffus ``black_hole_logger``:
+
+ .. code-block:: python
+
+ pipeline_run(logger = black_hole_logger)
+
+.. index::
+ pair: pipeline_run verbosity; Tutorial
+
+=================================
+Controlling logging verbosity
+=================================
+ :ref:`pipeline_run() <pipeline_functions.pipeline_run>` currently has five levels of verbosity, set by the optional ``verbose``
+ parameter which defaults to 1:
+
+ ::
+
+ verbose = 0: nothing
+ verbose = 1: logs completed jobs/tasks;
+ verbose = 2: logs up to date jobs in incomplete tasks
+ verbose = 3: logs reason for running job
+ verbose = 4: logs messages useful only for debugging ruffus pipeline code
+
+
+ ``verbose`` > ``5`` are intended for debugging **Ruffus** by the developers and the details
+ are liable to change from release to release
+
+
+.. index::
+ pair: logging with ruffus.cmdline; Tutorial
+
+********************************************************************************
+Use :ref:`ruffus.cmdline <new_manual.cmdline>`
+********************************************************************************
+
+ As always, it is easiest to use :ref:`ruffus.cmdline <new_manual.cmdline>`.
+
+ Set your script to
+
+ * write messages to ``STDERR`` with the ``--verbose`` option and
+ * to a log file with the ``--log_file`` option.
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ from ruffus import *
+
+ # Python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ @transform( ["job1.input"], suffix(".input"), ".output1"),
+ def first_task(input_file, output_file):
+ pass
+
+ pipeline_run(logger=logger)
+
+
+.. index::
+ pair: logging customising; Tutorial
+
+****************************************
+Customising logging
+****************************************
+
+ You can also specify exactly how logging works by providing a `logging <http://docs.python.org/library/logging.html>`_ object
+ to :ref:`pipeline_run() <pipeline_functions.pipeline_run>` .
+ This log object should have ``debug()`` and ``info()`` methods.
+
+ Instead of writing your own, it is usually more convenient to use the python
+ `logging <http://docs.python.org/library/logging.html>`_
+ module which provides logging classes with rich functionality.
+
+ The :ref:`example code<new_manual.logging.code>` sets up a logger to a rotating set of files
+
+
+.. index::
+ pair: logging your own message; Tutorial
+
+.. _new_manual.logging.per_job:
+
+****************************************
+Log your own messages
+****************************************
+
+ You need to take a little care when logging your custom messages *within* your pipeline.
+
+ * If your Ruffus pipeline may run in parallel, make sure that logging is synchronised.
+ * If your Ruffus pipeline may run across separate processes, send your logging object across process boundaries.
+
+
+ `logging <http://docs.python.org/library/logging.html>`_ objects can not be
+ `pickled <http://docs.python.org/library/pickle.html>`_ and shared naively across
+ processes. Instead, we need to create proxies which forward the logging to a single
+ shared log.
+
+ The :ref:`ruffus.proxy_logger <proxy-logger>` module provides an easy way to share
+ `logging <http://docs.python.org/library/logging.html>`_ objects among
+ jobs. This requires just two simple steps:
+
+
+
+
+.. note::
+
+ * This is a good template for sharing `non-picklable objects <http://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled>`_
+ across processes.
+
+
+.. _new_manual.sharing_proxy_object:
+
+
+============================================================
+ 1. Set up logging
+============================================================
+
+ Things are easiest if you are using ``ruffus.cmdline``:
+
+ .. code-block:: python
+
+ # standard python logger which can be synchronised across concurrent Ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+ Otherwise, manually:
+
+ .. code-block:: python
+
+
+ from ruffus.proxy_logger import *
+ (logger,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger",
+ {"file_name" :"/my/lg.log"})
+
+============================================================
+ 2. Share the proxy
+============================================================
+ Now, pass:
+
+ * ``logger`` (which forwards logging calls across jobs) and
+ * ``logging_mutex`` (which prevents different jobs which are logging simultaneously
+ from being jumbled up)
+
+ to each job:
+
+ .. code-block:: python
+ :emphasize-lines: 4,6,9
+
+ @transform( initial_file,
+ suffix(".input"),
+ ".output1",
+ logger, logging_mutex), # pass log and synchronisation as parameters
+ def first_task(input_file, output_file,
+ logger, logging_mutex): # pass log and synchronisation as parameters
+ pass
+
+ # synchronise logging
+ with logging_mutex:
+ logger.info("Here we go logging...")
+
diff --git a/doc/tutorials/new_tutorial/logging_code.rst b/doc/tutorials/new_tutorial/logging_code.rst
new file mode 100644
index 0000000..69bd51b
--- /dev/null
+++ b/doc/tutorials/new_tutorial/logging_code.rst
@@ -0,0 +1,55 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.logging.code:
+
+######################################################################################################
+|new_manual.logging.chapter_num|: Python Code for Logging progress through a pipeline
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * Back to |new_manual.logging.chapter_num|: :ref:`Logging progress through a pipeline <new_manual.logging>`
+
+****************************************
+Rotating set of file logs
+****************************************
+
+ .. code-block:: python
+ :emphasize-lines: 10,14,17,31
+
+ import logging
+ import logging.handlers
+
+ LOG_FILENAME = '/tmp/ruffus.log'
+
+ # Set up a specific logger with our desired output level
+ logger = logging.getLogger('My_Ruffus_logger')
+ logger.setLevel(logging.DEBUG)
+
+ # Rotate a set of 5 log files every 2kb
+ handler = logging.handlers.RotatingFileHandler(
+ LOG_FILENAME, maxBytes=2000, backupCount=5)
+
+ # Add the log message handler to the logger
+ logger.addHandler(handler)
+
+ # Ruffus pipeline
+ from ruffus import *
+
+ # Start with some initial data file of yours...
+ initial_file = "job1.input"
+ open(initial_file, "w")
+
+ @transform( initial_file,
+ suffix(".input"),
+ ".output1"),
+ def first_task(input_file, output_file):
+ "Some detailed description"
+ pass
+
+ # use our custom logging object
+ pipeline_run(logger=logger)
+ print open("/tmp/ruffus.log").read()
+
diff --git a/doc/tutorials/new_tutorial/manual_chapter_numbers.inc b/doc/tutorials/new_tutorial/manual_chapter_numbers.inc
new file mode 100644
index 0000000..fbe692c
--- /dev/null
+++ b/doc/tutorials/new_tutorial/manual_chapter_numbers.inc
@@ -0,0 +1,30 @@
+.. |new_manual.introduction.chapter_num| replace:: **Chapter 1**
+.. |new_manual.transform.chapter_num| replace:: **Chapter 2**
+.. |new_manual.transform_in_parallel.chapter_num| replace:: **Chapter 3**
+.. |new_manual.originate.chapter_num| replace:: **Chapter 4**
+.. |new_manual.pipeline_printout.chapter_num| replace:: **Chapter 5**
+.. |new_manual.cmdline.chapter_num| replace:: **Chapter 6**
+.. |new_manual.pipeline_printout_graph.chapter_num| replace:: **Chapter 7**
+.. |new_manual.output_file_names.chapter_num| replace:: **Chapter 8**
+.. |new_manual.mkdir.chapter_num| replace:: **Chapter 9**
+.. |new_manual.checkpointing.chapter_num| replace:: **Chapter 10**
+.. |new_manual.decorators_compendium.chapter_num| replace:: **Chapter 11**
+.. |new_manual.split.chapter_num| replace:: **Chapter 12**
+.. |new_manual.merge.chapter_num| replace:: **Chapter 13**
+.. |new_manual.multiprocessing.chapter_num| replace:: **Chapter 14**
+.. |new_manual.logging.chapter_num| replace:: **Chapter 15**
+.. |new_manual.subdivide_collate.chapter_num| replace:: **Chapter 16**
+.. |new_manual.combinatorics.chapter_num| replace:: **Chapter 17**
+.. |new_manual.active_if.chapter_num| replace:: **Chapter 18**
+.. |new_manual.posttask.chapter_num| replace:: **Chapter 19**
+.. |new_manual.inputs.chapter_num| replace:: **Chapter 20**
+.. |new_manual.on_the_fly.chapter_num| replace:: **Chapter 21**
+.. |new_manual.parallel.chapter_num| replace:: **Chapter 22**
+.. |new_manual.check_if_uptodate.chapter_num| replace:: **Chapter 23**
+.. |new_manual.flowchart_colours.chapter_num| replace:: **Appendix 1**
+.. |new_manual.dependencies.chapter_num| replace:: **Appendix 2**
+.. |new_manual.exceptions.chapter_num| replace:: **Appendix 3**
+.. |new_manual.ruffus_names.chapter_num| replace:: **Appendix 4**
+.. |new_manual.deprecated_files.chapter_num| replace:: **Appendix 5**
+.. |new_manual.deprecated_files_re.chapter_num| replace:: **Appendix 6**
+
diff --git a/doc/tutorials/new_tutorial/manual_contents.rst b/doc/tutorials/new_tutorial/manual_contents.rst
new file mode 100644
index 0000000..e17dbf2
--- /dev/null
+++ b/doc/tutorials/new_tutorial/manual_contents.rst
@@ -0,0 +1,64 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.table_of_contents:
+
+####################################################################
+**Ruffus** Manual: List of Chapters and Example code
+####################################################################
+
+ Download as :download:`pdf <../../static_data/ruffus.pdf>`.
+
+ * |new_manual.introduction.chapter_num|: :ref:`An introduction to basic Ruffus syntax <new_manual.introduction>`
+ * |new_manual.transform.chapter_num|: :ref:`Transforming data in a pipeline with @transform <new_manual.transform>`
+ * |new_manual.transform_in_parallel.chapter_num|: :ref:`More on @transform-ing data <new_manual.transform_in_parallel>`
+ * |new_manual.originate.chapter_num|: :ref:`Creating files with @originate <new_manual.originate>`
+ * |new_manual.pipeline_printout.chapter_num|: :ref:`Understanding how your pipeline works with pipeline_printout() <new_manual.pipeline_printout>`
+ * |new_manual.cmdline.chapter_num|: :ref:`Running Ruffus from the command line with ruffus.cmdline <new_manual.cmdline>`
+ * |new_manual.pipeline_printout_graph.chapter_num|: :ref:`Displaying the pipeline visually with pipeline_printout_graph() <new_manual.pipeline_printout_graph>`
+ * |new_manual.output_file_names.chapter_num|: :ref:`Specifying output file names with formatter() and regex() <new_manual.output_file_names>`
+ * |new_manual.mkdir.chapter_num|: :ref:`Preparing directories for output with @mkdir <new_manual.mkdir>`
+ * |new_manual.checkpointing.chapter_num|: :ref:`Checkpointing: Interrupted Pipelines and Exceptions <new_manual.checkpointing>`
+ * |new_manual.decorators_compendium.chapter_num|: :ref:`Pipeline topologies and a compendium of Ruffus decorators <new_manual.decorators_compendium>`
+ * |new_manual.split.chapter_num|: :ref:`Splitting up large tasks / files with @split <new_manual.split>`
+ * |new_manual.merge.chapter_num|: :ref:`@merge multiple input into a single result <new_manual.merge>`
+ * |new_manual.logging.chapter_num|: :ref:`Logging progress through a pipeline <new_manual.logging>`
+ * |new_manual.multiprocessing.chapter_num|: :ref:`Multiprocessing, drmaa and Computation Clusters <new_manual.multiprocessing>`
+ * |new_manual.subdivide_collate.chapter_num|: :ref:`@subdivide tasks to run efficiently and regroup with @collate <new_manual.subdivide_collate>`
+ * |new_manual.combinatorics.chapter_num|: :ref:`@combinations, @permutations and all versus all @product <new_manual.combinatorics>`
+ * |new_manual.active_if.chapter_num|: :ref:`Turning parts of the pipeline on and off at runtime with @active_if <new_manual.active_if>`
+ * |new_manual.inputs.chapter_num|: :ref:`Manipulating task inputs via string substitution with inputs() and add_inputs() <new_manual.inputs>`
+ * |new_manual.posttask.chapter_num|: :ref:`Signal the completion of each stage of our pipeline with @posttask <new_manual.posttask>`
+ * |new_manual.on_the_fly.chapter_num|: :ref:`Esoteric: Generating parameters on the fly with @files <new_manual.on_the_fly>`
+ * |new_manual.parallel.chapter_num|: :ref:`Esoteric: Running jobs in parallel without files using @parallel <new_manual.deprecated_parallel>`
+ * |new_manual.check_if_uptodate.chapter_num|: :ref:`Esoteric: Writing custom functions to decide which jobs are up to date with @check_if_uptodate <new_manual.check_if_uptodate>`
+ * |new_manual.flowchart_colours.chapter_num| :ref:`Flow Chart Colours with pipeline_printout_graph <new_manual.flowchart_colours>`
+ * |new_manual.dependencies.chapter_num| :ref:`Under the hood: How dependency works <new_manual.dependencies>`
+ * |new_manual.exceptions.chapter_num| :ref:`Exceptions thrown inside pipelines <new_manual.exceptions>`
+ * |new_manual.ruffus_names.chapter_num| :ref:`Names (keywords) exported from Ruffus <new_manual.ruffus_names>`
+ * |new_manual.deprecated_files.chapter_num|: :ref:`Legacy and deprecated syntax @files <new_manual.deprecated_files>`
+ * |new_manual.deprecated_files_re.chapter_num|: :ref:`Legacy and deprecated syntax @files_re <new_manual.deprecated_files_re>`
+
+
+
+**Ruffus** Manual: List of Example Code for Each Chapter:
+
+ * :ref:`new_manual.introduction.code`
+ * :ref:`new_manual.transform.code`
+ * :ref:`new_manual.transform_in_parallel.code`
+ * :ref:`new_manual.originate.code`
+ * :ref:`new_manual.pipeline_printout.code`
+ * :ref:`new_manual.pipeline_printout_graph.code`
+ * :ref:`new_manual.output_file_names.code`
+ * :ref:`new_manual.mkdir.code`
+ * :ref:`new_manual.checkpointing.code`
+ * :ref:`new_manual.split.code`
+ * :ref:`new_manual.merge.code`
+ * :ref:`new_manual.multiprocessing.code`
+ * :ref:`new_manual.logging.code`
+ * :ref:`new_manual.subdivide_collate.code`
+ * :ref:`new_manual.combinatorics.code`
+ * :ref:`new_manual.inputs.code`
+ * :ref:`new_manual.on_the_fly.code`
+
+
diff --git a/doc/tutorials/new_tutorial/merge.rst b/doc/tutorials/new_tutorial/merge.rst
new file mode 100644
index 0000000..4b8466e
--- /dev/null
+++ b/doc/tutorials/new_tutorial/merge.rst
@@ -0,0 +1,140 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: merge; Tutorial
+
+.. _new_manual.merge:
+
+######################################################################################################
+|new_manual.merge.chapter_num|: ``@merge`` multiple input into a single result
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@merge <decorators.merge>` syntax
+ * :ref:`Example code for this chapter <new_manual.merge.code>`
+
+
+**************************************************************************************
+Overview of :ref:`@merge <decorators.merge>`
+**************************************************************************************
+
+ The :ref:`previous chapter <new_manual.split>` explained how **Ruffus** allows large
+ jobs to be split into small pieces with :ref:`@split <decorators.split>` and analysed
+ in parallel using for example, our old friend :ref:`@transform <decorators.transform>`.
+
+ Having done this, our next task is to recombine the fragments into a seamless whole.
+
+ This is the role of the :ref:`@merge <decorators.merge>` decorator.
+
+**************************************************************************************
+:ref:`@merge <decorators.merge>` is a many to one operator
+**************************************************************************************
+
+ :ref:`@transform <decorators.transform>` tasks multiple *inputs* and produces a single *output*, **Ruffus**
+ is again agnostic as to the sort of data contained within this single *output*. It can be a single
+ (string) file name, an arbitrary complicated nested structure with numbers, objects etc.
+ Or even a list.
+
+ The main thing is that downstream tasks will interpret this output as a single entity leading to a single
+ job.
+
+ :ref:`@split <decorators.split>` and :ref:`@merge <decorators.merge>` are, in other words, about network topology.
+
+ Because of this :ref:`@merge <decorators.merge>` is also very useful for summarising the progress
+ in our pipeline. At key selected points, we can gather data from the multitude of data or disparate *inputs*
+ and :ref:`@merge <decorators.merge>` them to a single set of summaries.
+
+
+
+**************************************************************************************
+Example: Combining partial solutions: Calculating variances
+**************************************************************************************
+
+ The :ref:`previous chapter <new_manual.split>` we had almost completed all the pieces of our flowchart:
+
+ .. image:: ../../images/manual_split_merge_example.jpg
+ :scale: 30
+
+ What remains is to take the partial solutions from the different ``.sums`` files
+ and turn these into the variance as follows:
+
+ ::
+
+ variance = (sum_squared - sum * sum / N)/N
+
+ where ``N`` is the number of values
+
+ See the `wikipedia <http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ entry for a discussion of
+ why this is a very naive approach.
+
+
+
+ To do this, all we have to do is iterate through all the values in ``*.sums``,
+ add up the ``sums`` and ``sum_squared``, and apply the above (naive) formula.
+
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # @merge files together
+ #
+ @merge(sum_of_squares, "variance.result")
+ def calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = map(float, open(input_file_name).readlines())
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ open(output_file_name, "w").write("%s\n" % variance)
+
+
+
+ This results in the following equivalent function call:
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+
+ calculate_variance (["1.sums", "2.sums", "3.sums",
+ "4.sums", "5.sums", "6.sums",
+ "7.sums", "8.sums", "9.sums, "10.sums"], "variance.result")
+
+ and the following display:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] -> variance.result] completed
+ Completed Task = calculate_variance
+
+
+
+ The final result is in ``variance.result``
+
+
+ Have a look at the :ref:`complete example code for this chapter <new_manual.merge.code>`.
+
+
+
diff --git a/doc/tutorials/new_tutorial/merge_code.rst b/doc/tutorials/new_tutorial/merge_code.rst
new file mode 100644
index 0000000..cc5bd8b
--- /dev/null
+++ b/doc/tutorials/new_tutorial/merge_code.rst
@@ -0,0 +1,147 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.merge.code:
+
+##############################################################################################################
+|new_manual.merge.chapter_num|: Python Code for ``@merge`` multiple input into a single result
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@merge syntax in detail <decorators.merge>`
+ * Back to |new_manual.merge.chapter_num|: :ref:`Splitting up large tasks / files with @merge <new_manual.merge>`
+
+*******************************************
+Splitting large jobs
+*******************************************
+
+ ::
+
+ from ruffus import *
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ import random, os, glob
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @originate("random_numbers.list")
+ def create_random_numbers(output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # split initial file
+ #
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ """
+ splits random numbers file into xxx files of chunk_size each
+ """
+ #
+ # clean up any files from previous runs
+ #
+ #for ff in glob.glob("*.chunks"):
+ for ff in input_file_names:
+ os.unlink(ff)
+ #
+ #
+ # create new file every chunk_size lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for input_file_name in input_file_names:
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(split_problem, suffix(".chunks"), ".sums")
+ def sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+ #---------------------------------------------------------------
+ #
+ # Calculate variance from sums
+ #
+ @merge(sum_of_squares, "variance.result")
+ def calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = map(float, open(input_file_name).readlines())
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ open(output_file_name, "w").write("%s\n" % variance)
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [None -> random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Job = [[random_numbers.list] -> *.chunks] completed
+ Completed Task = split_problem
+ Job = [1.chunks -> 1.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [6.chunks -> 6.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Completed Task = sum_of_squares
+ Job = [[1.sums, 10.sums, 2.sums, 3.sums, 4.sums, 5.sums, 6.sums, 7.sums, 8.sums, 9.sums] -> variance.result] completed
+ Completed Task = calculate_variance
+
diff --git a/doc/tutorials/new_tutorial/mkdir.rst b/doc/tutorials/new_tutorial/mkdir.rst
new file mode 100644
index 0000000..a7632c6
--- /dev/null
+++ b/doc/tutorials/new_tutorial/mkdir.rst
@@ -0,0 +1,152 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: mkdir; Tutorial
+
+.. _new_manual.mkdir:
+
+######################################################################################################################################################################
+|new_manual.mkdir.chapter_num|: Preparing directories for output with :ref:`@mkdir() <decorators.mkdir>`
+######################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@follows(mkdir()) syntax in detail <decorators.follows>`
+ * :ref:`@mkdir syntax in detail <decorators.mkdir>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.mkdir.code`
+
+
+***************************************
+Overview
+***************************************
+
+ In |new_manual.transform_in_parallel.chapter_num|, we saw that we could use :ref:`@follows(mkdir()) <new_manual.follows.mkdir>` to
+ ensure that output directories exist:
+
+ .. code-block:: python
+ :emphasize-lines: 4
+
+ #
+ # create_new_files() @follows mkdir
+ #
+ @follows(mkdir("output/results/here"))
+ @originate(["output/results/here/a.start_file",
+ "output/results/here/b.start_file"])
+ def create_new_files(output_file_pair):
+ pass
+
+
+ This ensures that the decorated task follows (:ref:`@follows <new_manual.follows.mkdir>`) the
+ making of the specified directory (``mkdir()``).
+
+ Sometimes, however, the **Output** is intended not for any single directory but a group
+ of destinations depending on the parsed contents of **Input** paths.
+
+*********************************************************************************************************************
+Creating directories after string substitution in a zoo...
+*********************************************************************************************************************
+
+ You may remember :ref:`this example <new_manual.output_file_names.formatter.zoo>` from |new_manual.output_file_names.chapter_num|:
+
+ We want to feed the denizens of a zoo. The original file names are spread over several directories and we
+ group their food supply by the *clade* of the animal in the following manner:
+
+ .. image:: ../../images/simple_tutorial_zoo_animals_formatter_example.jpg
+ :scale: 50
+
+ .. code-block:: python
+ :emphasize-lines: 13,14
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this blows up
+ # open(output_file, "w")
+
+
+ The example code from |new_manual.output_file_names.chapter_num| is, however, incomplete. If we were to actually create the specified
+ files we would realise that we had forgotten to create the destination directories ``reptiles``, ``mammals`` first!
+
+==============================================================================
+using :ref:`formatter() <decorators.formatter>`
+==============================================================================
+
+ We could of course create directories manually.
+ However, apart from being tedious and error prone, we have already gone to some lengths
+ to parse out the diretories for :ref:`@transform <decorators.transform>`.
+ Why don't we use the same logic to make the directories?
+
+ Can you see the parallels between the syntax for :ref:`@mkdir <decorators.mkdir>` and :ref:`@transform <decorators.transform>`?
+
+ .. code-block:: python
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+ "{subpath[0][1]}/{clade[0]}) # new_directory
+
+ # Put animals of each clade in the same directory
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
+ See the :ref:`example code <new_manual.mkdir.code>`
+
+==============================================================================
+using :ref:`regex() <decorators.regex>`
+==============================================================================
+
+ If you are particularly fond of using regular expression to parse file paths,
+ you could also use :ref:`regex() <decorators.regex>`:
+
+
+ .. code-block:: python
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ regex(r"(.*?)/?(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+ r"\1/\g<clade>") # new_directory
+
+ # Put animals of each clade in the same directory
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
diff --git a/doc/tutorials/new_tutorial/mkdir_code.rst b/doc/tutorials/new_tutorial/mkdir_code.rst
new file mode 100644
index 0000000..91f3661
--- /dev/null
+++ b/doc/tutorials/new_tutorial/mkdir_code.rst
@@ -0,0 +1,113 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.mkdir.code:
+
+############################################################################################################################################################################################################
+|new_manual.mkdir.chapter_num|: Python Code for Preparing directories for output with :ref:`@mkdir() <decorators.mkdir>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`mkdir() <decorators.mkdir>` syntax
+ * :ref:`formatter() <decorators.formatter>` syntax
+ * :ref:`regex() <decorators.regex>` syntax
+ * Back to |new_manual.mkdir.chapter_num|: :ref:`Preparing directories for output with @mkdir() <new_manual.mkdir>`
+
+****************************************************************************************************************
+Code for :ref:`formatter() <decorators.formatter>` Zoo example
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}") # new_directory
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
+
+ pipeline_run(verbose=0)
+
+
+****************************************************************************************************************
+Code for :ref:`regex() <decorators.regex>` Zoo example
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # create directories for each clade
+ @mkdir( create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+ r"\g<clade>") # new_directory
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ r"\1\g<clade>/\g<tame>.\2.food", # Replacement
+
+ r"\1\g<clade>", # new_directory
+ r"\2", # animal_name
+ "\g<tame>") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "%40s -> %90s" % (input_file, output_file)
+ # this works now
+ open(output_file, "w")
+
+
+ pipeline_run(verbose=0)
+
diff --git a/doc/tutorials/new_tutorial/multiprocessing.rst b/doc/tutorials/new_tutorial/multiprocessing.rst
new file mode 100644
index 0000000..aaf77ed
--- /dev/null
+++ b/doc/tutorials/new_tutorial/multiprocessing.rst
@@ -0,0 +1,293 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: multiprocessing; Tutorial
+
+.. _new_manual.multiprocessing:
+
+####################################################################################################################################################
+|new_manual.multiprocessing.chapter_num|: Multiprocessing, ``drmaa`` and Computation Clusters
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax
+ * :ref:`pipeline_run() <pipeline_functions.pipeline_run>` syntax
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.multiprocessing.code`
+
+***********************
+Overview
+***********************
+
+.. index::
+ pair: pipeline_run(multiprocess); Tutorial
+
+=====================
+Multi Processing
+=====================
+
+ *Ruffus* uses python `multiprocessing <http://docs.python.org/library/multiprocessing.html>`_ to run
+ each job in a separate process.
+
+ This means that jobs do *not* necessarily complete in the order of the defined parameters.
+ Task hierachies are, of course, inviolate: upstream tasks run before downstream, dependent tasks.
+
+ Tasks that are independent (i.e. do not precede each other) may be run in parallel as well.
+
+ The number of concurrent jobs can be set in :ref:`pipeline_run<pipeline_functions.pipeline_run>`:
+
+ ::
+
+ pipeline_run([parallel_task], multiprocess = 5)
+
+
+ If ``multiprocess`` is set to 1, then jobs will be run on a single process.
+
+
+
+.. index::
+ pair: data sharing across processes; Tutorial
+
+=====================
+Data sharing
+=====================
+
+ Running jobs in separate processes allows *Ruffus* to make full use of the multiple
+ processors in modern computers. However, some `multiprocessing guidelines <http://docs.python.org/library/multiprocessing.html#multiprocessing-programming>`_
+ should be borne in mind when writing *Ruffus* pipelines. In particular:
+
+ * Try not to pass large amounts of data between jobs, or at least be aware that this has to be marshalled
+ across process boundaries.
+
+ * Only data which can be `pickled <http://docs.python.org/library/pickle.html>`_ can be passed as
+ parameters to *Ruffus* task functions. Happily, that applies to almost any native Python data type.
+ The use of the rare, unpicklable object will cause python to complain (fail) loudly when *Ruffus* pipelines
+ are run.
+
+
+
+.. index::
+ pair: @jobs_limit; Tutorial
+
+.. _new_manual.jobs_limit:
+
+
+********************************************************************************************
+Restricting parallelism with :ref:`@jobs_limit <decorators.jobs_limit>`
+********************************************************************************************
+
+ Calling :ref:`pipeline_run(multiprocess = NNN)<pipeline_functions.pipeline_run>` allows
+ multiple jobs (from multiple independent tasks) to be run in parallel. However, there
+ are some operations that consume so many resources that we might want them to run
+ with less or no concurrency.
+
+ For example, we might want to download some files via FTP but the server restricts
+ requests from each IP address. Even if the rest of the pipeline is running 100 jobs in
+ parallel, the FTP downloading must be restricted to 2 files at a time. We would really
+ like to keep the pipeline running as is, but let this one operation run either serially,
+ or with little concurrency.
+
+
+ * :ref:`pipeline_run(multiprocess = NNN)<pipeline_functions.pipeline_run>` sets the pipeline-wide concurrency but
+ * :ref:`@jobs_limit(MMM)<decorators.jobs_limit>` sets concurrency at ``MMM`` only for jobs in the decorated task.
+
+ The optional name (e.g. ``@jobs_limit(3, "ftp_download_limit")``) allows the same limit to
+ be shared across multiple tasks. To be pedantic: a limit of ``3`` jobs at a time would be applied
+ across all tasks which have a ``@jobs_limit`` named ``"ftp_download_limit"``.
+
+ The :ref:`example code<new_manual.multiprocessing.code>` uses up to 10 processes across the
+ pipeline, but runs the ``stage1_big`` and ``stage1_small`` tasks 3 at a time (shared across
+ both tasks). ``stage2`` jobs run 5 at a time.
+
+
+
+.. _new_manual.ruffus.drmaa_wrapper.run_job:
+
+********************************************************************************************
+Using ``drmaa`` to dispatch work to Computational Clusters or Grid engines from Ruffus jobs
+********************************************************************************************
+
+ Ruffus has been widely used to manage work on computational clusters or grid engines. Though Ruffus
+ task functions cannot (yet!) run natively and transparently on remote cluster nodes, it is trivial
+ to dispatch work across the cluster.
+
+ From version 2.4 onwards, Ruffus includes an optional helper module which interacts with
+ `python bindings <https://github.com/drmaa-python/drmaa-python>`__ for the widely used `drmaa <http://en.wikipedia.org/wiki/DRMAA>`__
+ Open Grid Forum API specification. This allows jobs to dispatch work to a computational cluster and wait until it completes.
+
+
+ Here are the necessary steps
+
+==============================================================================
+1) Use a shared drmaa session:
+==============================================================================
+
+ Before your pipeline runs:
+
+ .. code-block:: python
+
+ #
+ # start shared drmaa session for all jobs / tasks in pipeline
+ #
+ import drmaa
+ drmaa_session = drmaa.Session()
+ drmaa_session.initialize()
+
+
+ Cleanup after your pipeline completes:
+
+ .. code-block:: python
+
+ #
+ # pipeline functions go here
+ #
+ if __name__ == '__main__':
+ drmaa_session.exit()
+
+
+==============================================================================
+2) import ``ruffus.drmaa_wrapper``
+==============================================================================
+
+ * The optional ``ruffus.drmaa_wrapper`` module needs to be imported explicitly:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+ # imported ruffus.drmaa_wrapper explicitly
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+
+
+==============================================================================
+3) call :ref:`drmaa_wrapper.run_job()<drmaa_wrapper.run_job>`
+==============================================================================
+
+ :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` dispatches the work to a cluster node within a normal Ruffus job and waits for completion
+
+ This is the equivalent of `os.system <http://docs.python.org/2/library/os.html#os.system>`__ or
+ `subprocess.check_output <http://docs.python.org/2/library/subprocess.html#subprocess.check_call>`__ but the code will run remotely as specified:
+
+ .. code-block:: python
+ :emphasize-lines: 1
+
+ # ruffus.drmaa_wrapper.run_job
+ stdout_res, stderr_res = run_job(cmd_str = "touch " + output_file,
+ job_name = job_name,
+ logger = logger,
+ drmaa_session = drmaa_session,
+ run_locally = options.local_run,
+ job_other_options = job_other_options)
+
+ The complete code is available :ref:`here <using_ruffus.drmaa_wrapper>`
+
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` is a convenience wrapper around the `python drmaa bindings <https://github.com/drmaa-python/drmaa-python>`__
+ `RunJob <http://drmaa-python.readthedocs.org/en/latest/tutorials.html#waiting-for-a-job>`__ function.
+ It takes care of writing drmaa *job templates* for you.
+ * Each call creates a separate drmaa *job template*.
+
+==================================================================================================
+4) Use multithread: :ref:`pipeline_run(multithread = NNN) <pipeline_functions.pipeline_run>`
+==================================================================================================
+
+ .. warning ::
+
+ :ref:`drmaa_wrapper.run_job()<drmaa_wrapper.run_job>`
+
+ **requires** ``pipeline_run`` :ref:`(multithread = NNN)<pipeline_functions.pipeline_run>`
+
+ **and will not work with** ``pipeline_run`` :ref:`(multiprocess = NNN)<pipeline_functions.pipeline_run>`
+
+
+ Using multithreading rather than multiprocessing
+ * allows the drmaa session to be shared
+ * prevents "processing storms" which lock up the queue submission node when hundreds or thousands of grid engine / cluster commands complete at the same time.
+
+ .. code-block:: python
+
+ pipeline_run (..., multithread = NNN, ...)
+
+ or if you are using ruffus.cmdline:
+
+ .. code-block:: python
+
+ cmdline.run (options, multithread = options.jobs)
+
+
+ Normally multithreading reduces the amount of parallelism in python due to the python `Global interpreter Lock (GIL) <http://en.wikipedia.org/wiki/Global_Interpreter_Lock>`__.
+ However, as the work load is almost entirely on another computer (i.e. a cluster / grid engine node) with a separate python interpreter, any cost benefit calculations of this sort are moot.
+
+==================================================================================================
+5) Develop locally
+==================================================================================================
+
+ :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` provides two convenience parameters for developing grid engine pipelines:
+
+ * commands can run locally, i.e. on the local machine rather than on cluster nodes:
+
+ .. code-block:: python
+
+ run_job(cmd_str, run_locally = True)
+
+ * Output files can be `touch <http://en.wikipedia.org/wiki/Touch_(Unix)>`__\ed, i.e. given the appearance of the work having being done without actually running the commands
+
+ .. code-block:: python
+
+ run_job(cmd_str, touch_only = True)
+
+
+.. index::
+ pair: pipeline_run touch mode; Tutorial
+ pair: touch mode pipeline_run; Tutorial
+
+.. _new_manual.pipeline_run_touch:
+
+
+********************************************************************************************
+Forcing a pipeline to appear up to date
+********************************************************************************************
+
+ Sometimes, we *know* that a pipeline has run to completion, that everything is up-to-date. However, Ruffus still insists on the basis
+ of file modification times that you need to rerun.
+
+ For example, sometimes a trivial accounting modification needs to be made to a data file.
+ Even though you know that this changes nothing in practice, Ruffus will detect the modification and
+ ask to rerun everything from that point forwards.
+
+ One way to convince Ruffus that everything is fine is to manually `touch <http://en.wikipedia.org/wiki/Touch_(Unix)>`__
+ all subsequent data files one by one in sequence so that the file timestamps follow the appropriate progression.
+
+ You can also ask *Ruffus* to do this automatically for you by running the pipeline in `touch <http://en.wikipedia.org/wiki/Touch_(Unix)>`__
+ mode:
+
+ .. code-block:: python
+
+ pipeline_run( touch_files_only = True)
+
+
+ :ref:`pipeline_run <pipeline_functions.pipeline_run>` will run your pipeline script normally working backwards from any specified final target, or else the
+ last task in the pipeline. It works out where it should begin running, i.e. with the first out-of-date data files.
+ After that point, instead of calling your pipeline task functions, each missing or out-of-date file is
+ `touch-ed <http://en.wikipedia.org/wiki/Touch_(Unix)>`__ in turn so that the file modification dates
+ follow on successively.
+
+
+ This turns out to be useful way to check that your pipeline runs correctly by creating a series of dummy (empty files).
+ However, *Ruffus* does not know how to read your mind to know which files to create from :ref:`@split <decorators.split>` or
+ :ref:`@subdivide <decorators.subdivide>` tasks.
+
+
+ Using :ref:`ruffus.cmdline <new_manual.cmdline>` from version 2.4, you can just specify:
+
+ .. code-block:: bash
+
+ your script --touch_files_only [--other_options_of_your_own_etc]
+
diff --git a/doc/tutorials/new_tutorial/multiprocessing_code.rst b/doc/tutorials/new_tutorial/multiprocessing_code.rst
new file mode 100644
index 0000000..6766bd5
--- /dev/null
+++ b/doc/tutorials/new_tutorial/multiprocessing_code.rst
@@ -0,0 +1,183 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.multiprocessing.code:
+
+################################################################################################################################################################
+|new_manual.multiprocessing.chapter_num|: Python Code for Multiprocessing, ``drmaa`` and Computation Clusters
+################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax
+ * :ref:`pipeline_run() <pipeline_functions.pipeline_run>` syntax
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` syntax
+ * Back to |new_manual.multiprocessing.chapter_num|: :ref:`Multiprocessing, drmaa and Computation Clusters <new_manual.multiprocessing>`
+
+************************************************************************************
+:ref:`@jobs_limit <decorators.jobs_limit>`
+************************************************************************************
+
+ * First 2 tasks are constrained to a parallelism of 3 shared jobs at a time
+ * Final task is constrained to a parallelism of 5 jobs at a time
+ * The entire pipeline is constrained to a (theoretical) parallelism of 10 jobs at a time
+
+ .. code-block:: python
+ :emphasize-lines: 12,17,22
+
+ from ruffus import *
+ import time
+
+ # make list of 10 files
+ @split(None, "*stage1")
+ def make_files(input_files, output_files):
+ for i in range(10):
+ if i < 5:
+ open("%d.small_stage1" % i, "w")
+ else:
+ open("%d.big_stage1" % i, "w")
+
+ @jobs_limit(3, "ftp_download_limit")
+ @transform(make_files, suffix(".small_stage1"), ".stage2")
+ def stage1_small(input_file, output_file):
+ print "FTP downloading %s ->Start" % input_file
+ time.sleep(2)
+ open(output_file, "w")
+ print "FTP downloading %s ->Finished" % input_file
+
+ @jobs_limit(3, "ftp_download_limit")
+ @transform(make_files, suffix(".big_stage1"), ".stage2")
+ def stage1_big(input_file, output_file):
+ print "FTP downloading %s ->Start" % input_file
+ time.sleep(2)
+ open(output_file, "w")
+ print "FTP downloading %s ->Finished" % input_file
+
+ @jobs_limit(5)
+ @transform([stage1_small, stage1_big], suffix(".stage2"), ".stage3")
+ def stage2(input_file, output_file):
+ print "Processing stage2 %s ->Start" % input_file
+ time.sleep(2)
+ open(output_file, "w")
+ print "Processing stage2 %s ->Finished" % input_file
+
+ pipeline_run(multiprocess = 10, verbose = 0)
+
+
+ Giving:
+
+ .. code-block:: pycon
+ :emphasize-lines: 3,25
+
+ >>> pipeline_run(multiprocess = 10, verbose = 0)
+
+ >>> # 3 jobs at a time, interleaved
+ FTP downloading 5.big_stage1 ->Start
+ FTP downloading 6.big_stage1 ->Start
+ FTP downloading 7.big_stage1 ->Start
+ FTP downloading 5.big_stage1 ->Finished
+ FTP downloading 8.big_stage1 ->Start
+ FTP downloading 6.big_stage1 ->Finished
+ FTP downloading 9.big_stage1 ->Start
+ FTP downloading 7.big_stage1 ->Finished
+ FTP downloading 0.small_stage1 ->Start
+ FTP downloading 8.big_stage1 ->Finished
+ FTP downloading 1.small_stage1 ->Start
+ FTP downloading 9.big_stage1 ->Finished
+ FTP downloading 2.small_stage1 ->Start
+ FTP downloading 0.small_stage1 ->Finished
+ FTP downloading 3.small_stage1 ->Start
+ FTP downloading 1.small_stage1 ->Finished
+ FTP downloading 4.small_stage1 ->Start
+ FTP downloading 2.small_stage1 ->Finished
+ FTP downloading 3.small_stage1 ->Finished
+ FTP downloading 4.small_stage1 ->Finished
+
+ >>> # 5 jobs at a time, interleaved
+ Processing stage2 0.stage2 ->Start
+ Processing stage2 1.stage2 ->Start
+ Processing stage2 2.stage2 ->Start
+ Processing stage2 3.stage2 ->Start
+ Processing stage2 4.stage2 ->Start
+ Processing stage2 0.stage2 ->Finished
+ Processing stage2 5.stage2 ->Start
+ Processing stage2 1.stage2 ->Finished
+ Processing stage2 6.stage2 ->Start
+ Processing stage2 2.stage2 ->Finished
+ Processing stage2 4.stage2 ->Finished
+ Processing stage2 7.stage2 ->Start
+ Processing stage2 8.stage2 ->Start
+ Processing stage2 3.stage2 ->Finished
+ Processing stage2 9.stage2 ->Start
+ Processing stage2 5.stage2 ->Finished
+ Processing stage2 7.stage2 ->Finished
+ Processing stage2 6.stage2 ->Finished
+ Processing stage2 8.stage2 ->Finished
+ Processing stage2 9.stage2 ->Finished
+
+.. _using_ruffus.drmaa_wrapper:
+
+************************************************************************************
+Using ``ruffus.drmaa_wrapper``
+************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 17,31,53
+
+ #!/usr/bin/python
+ job_queue_name = "YOUR_QUEUE_NAME_GOES_HERE"
+ job_other_options = "-P YOUR_PROJECT_NAME_GOES_HERE"
+
+ from ruffus import *
+ from ruffus.drmaa_wrapper import run_job, error_drmaa_job
+
+ parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')
+
+ options = parser.parse_args()
+
+ # logger which can be passed to multiprocessing ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+ #
+ # start shared drmaa session for all jobs / tasks in pipeline
+ #
+ import drmaa
+ drmaa_session = drmaa.Session()
+ drmaa_session.initialize()
+
+ @originate(["1.chromosome", "X.chromosome"],
+ logger, logger_mutex)
+ def create_test_files(output_file):
+ try:
+ stdout_res, stderr_res = "",""
+ job_queue_name, job_other_options = get_queue_options()
+
+ #
+ # ruffus.drmaa_wrapper.run_job
+ #
+ stdout_res, stderr_res = run_job(cmd_str = "touch " + output_file,
+ job_name = job_name,
+ logger = logger,
+ drmaa_session = drmaa_session,
+ run_locally = options.local_run,
+ job_queue_name = job_queue_name,
+ job_other_options = job_other_options)
+
+ # relay all the stdout, stderr, drmaa output to diagnose failures
+ except error_drmaa_job as err:
+ raise Exception("\n".join(map(str,
+ "Failed to run:"
+ cmd,
+ err,
+ stdout_res,
+ stderr_res)))
+
+
+ if __name__ == '__main__':
+ cmdline.run (options, multithread = options.jobs)
+ # cleanup drmaa
+ drmaa_session.exit()
+
+
diff --git a/doc/tutorials/new_tutorial/onthefly.rst b/doc/tutorials/new_tutorial/onthefly.rst
new file mode 100644
index 0000000..221b050
--- /dev/null
+++ b/doc/tutorials/new_tutorial/onthefly.rst
@@ -0,0 +1,192 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: on_the_fly; Tutorial
+
+.. _new_manual.on_the_fly:
+
+####################################################################################################################################################
+|new_manual.on_the_fly.chapter_num|: Esoteric: Generating parameters on the fly with :ref:`@files<decorators.files_on_the_fly>`
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@files on-the-fly syntax in detail <decorators.files_on_the_fly>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.on_the_fly.code`
+
+
+***********************
+Overview
+***********************
+
+ The different *Ruffus* :ref:`decorators <decorators>` connect up different tasks and
+ generate *Output* (file names) from your *Input* in all sorts of different ways.
+
+ However, sometimes, none of them *quite* do exactly what you need. And it becomes
+ necessary to generate your own *Input* and *Output* parameters on the fly.
+
+ Although this additional flexibility comes at the cost of a lot of extra inconvenient
+ code, you can continue to leverage the rest of *Ruffus* functionality such as
+ checking whether files are up to date or not.
+
+.. index::
+ pair: @files; Tutorial on-the-fly parameter generation
+
+
+*********************************************************************
+:ref:`@files <decorators.files_on_the_fly>` syntax
+*********************************************************************
+ To generate parameters on the fly, use the :ref:`@files <decorators.files_on_the_fly>`
+ with a :term:`generator` function which yields one list / tuple of parameters per job.
+
+ For example:
+
+ .. code-block:: python
+ :emphasize-lines: 3,16
+
+ from ruffus import *
+
+ # generator function
+ def generate_parameters_on_the_fly():
+ """
+ returns one list of parameters per job
+ """
+ parameters = [
+ ['A.input', 'A.output', (1, 2)], # 1st job
+ ['B.input', 'B.output', (3, 4)], # 2nd job
+ ['C.input', 'C.output', (5, 6)], # 3rd job
+ ]
+ for job_parameters in parameters:
+ yield job_parameters
+
+ # tell ruffus that parameters should be generated on the fly
+ @files(generate_parameters_on_the_fly)
+ def pipeline_task(input, output, extra):
+ open(output, "w").write(open(input).read())
+ sys.stderr.write("%d + %d => %d\n" % (extra[0] , extra[1], extra[0] + extra[1]))
+
+ pipeline_run()
+
+
+ Produces:
+
+ .. code-block:: pycon
+
+ Task = parallel_task
+ 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+
+
+ .. note::
+
+ Be aware that the parameter generating function may be invoked
+ :ref:`more than once<new_manual.dependencies.checking_multiple_times>`:
+ * The first time to check if this part of the pipeline is up-to-date.
+ * The second time when the pipeline task function is run.
+
+ The resulting custom *inputs*, *outputs* parameters per job are
+ treated normally for the purposes of checking to see if jobs are up-to-date and
+ need to be re-run.
+
+
+**********************************************
+ A Cartesian Product, all vs all example
+**********************************************
+
+ The :ref:`accompanying example<new_manual.on_the_fly.code>` provides a more realistic reason why
+ you would want to generate parameters on the fly. It is a fun piece of code, which generates
+ N x M combinations from two sets of files as the *inputs* of a pipeline stage.
+
+ The *inputs* / *outputs* filenames are generated as a pair of nested for-loops to produce
+ the N (outside loop) x M (inside loop) combinations, with the appropriate parameters
+ for each job ``yield``\ed per iteration of the inner loop. The gist of this is:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ #_________________________________________________________________________________________
+ #
+ # Generator function
+ #
+ # N x M jobs
+ #_________________________________________________________________________________________
+ def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+ for sim_file in get_simulation_files():
+ for (gene, gwas) in get_gene_gwas_file_pairs():
+ result_file = "%s.%s.results" % (gene, sim_file)
+ yield (gene, gwas, sim_file), result_file
+
+
+
+ @files(generate_simulation_params)
+ def gwas_simulation(input_files, output_file):
+ "..."
+
+ If ``get_gene_gwas_file_pairs()`` produces:
+ ::
+
+ ['a.sim', 'b.sim', 'c.sim']
+
+ and ``get_gene_gwas_file_pairs()`` produces:
+ ::
+
+ [('1.gene', '1.gwas'), ('2.gene', '2.gwas')]
+
+ then we would end up with ``3`` x ``2`` = ``6`` jobs and the following equivalent function calls:
+
+ ::
+
+ gwas_simulation(('1.gene', '1.gwas', 'a.sim'), "1.gene.a.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'a.sim'), "2.gene.a.sim.results")
+ gwas_simulation(('1.gene', '1.gwas', 'b.sim'), "1.gene.b.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'b.sim'), "2.gene.b.sim.results")
+ gwas_simulation(('1.gene', '1.gwas', 'c.sim'), "1.gene.c.sim.results")
+ gwas_simulation(('2.gene', '2.gwas', 'c.sim'), "2.gene.c.sim.results")
+
+
+ The :ref:`accompanying code<new_manual.on_the_fly.code>` looks slightly more complicated because
+ of some extra bookkeeping.
+
+
+
+ You can compare this approach with the alternative of using :ref:`@product <decorators.product>`:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ #_________________________________________________________________________________________
+ #
+ # N x M jobs
+ #_________________________________________________________________________________________
+ @product( os.path.join(simulation_data_dir, "*.simulation"),
+ formatter(),
+
+ os.path.join(gene_data_dir, "*.gene"),
+ formatter(),
+
+ # add gwas as an input: looks like *.gene but with a differnt extension
+ add_inputs("{path[1][0]/{basename[1][0]}.gwas")
+
+ "{basename[0][0]}.{basename[1][0]}.results") # output file
+ def gwas_simulation(input_files, output_file):
+ "..."
+
+
+
+
diff --git a/doc/tutorials/new_tutorial/onthefly_code.rst b/doc/tutorials/new_tutorial/onthefly_code.rst
new file mode 100644
index 0000000..2ddc060
--- /dev/null
+++ b/doc/tutorials/new_tutorial/onthefly_code.rst
@@ -0,0 +1,328 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.on_the_fly.code:
+
+############################################################################################################################################################################################################
+|new_manual.on_the_fly.chapter_num|: Esoteric: Python Code for Generating parameters on the fly with :ref:`@files<decorators.files_on_the_fly>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@files on-the-fly syntax in detail <decorators.files_on_the_fly>`
+ * Back to |new_manual.on_the_fly.chapter_num|: :ref:`Generating parameters on the fly <new_manual.on_the_fly>`
+
+************************************
+Introduction
+************************************
+
+ | This script takes N pairs of input file pairs (with the suffices .gene and .gwas)
+ | and runs them against M sets of simulation data (with the suffix .simulation)
+ | A summary per input file pair is then produced
+
+
+ In pseudo-code:
+
+ STEP_1:
+
+ ::
+
+ for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res
+
+
+ STEP_2:
+
+ ::
+
+ for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean
+
+
+ | n = CNT_GENE_GWAS_FILES
+ | m = CNT_SIMULATION_FILES
+
+************************************
+Code
+************************************
+ ::
+
+ from ruffus import *
+ import os
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # constants
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ working_dir = "temp_NxM"
+ simulation_data_dir = os.path.join(working_dir, "simulation")
+ gene_data_dir = os.path.join(working_dir, "gene")
+ CNT_GENE_GWAS_FILES = 2
+ CNT_SIMULATION_FILES = 3
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # imports
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ import os, sys
+ from itertools import izip
+ import glob
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Functions
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ #_________________________________________________________________________________________
+ #
+ # get gene gwas file pairs
+ #
+ #_________________________________________________________________________________________
+ def get_gene_gwas_file_pairs( ):
+ """
+ Helper function to get all *.gene, *.gwas from the direction specified
+ in --gene_data_dir
+
+ Returns
+ file pairs with both .gene and .gwas extensions,
+ corresponding roots (no extension) of each file
+ """
+ gene_files = glob.glob(os.path.join(gene_data_dir, "*.gene"))
+ gwas_files = glob.glob(os.path.join(gene_data_dir, "*.gwas"))
+ #
+ common_roots = set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gene_files))
+ common_roots &=set(map(lambda x: os.path.splitext(os.path.split(x)[1])[0], gwas_files))
+ common_roots = list(common_roots)
+ #
+ p = os.path; g_dir = gene_data_dir
+ file_pairs = [[p.join(g_dir, x + ".gene"), p.join(g_dir, x + ".gwas")] for x in common_roots]
+ return file_pairs, common_roots
+
+ #_________________________________________________________________________________________
+ #
+ # get simulation files
+ #
+ #_________________________________________________________________________________________
+ def get_simulation_files( ):
+ """
+ Helper function to get all *.simulation from the direction specified
+ in --simulation_data_dir
+ Returns
+ file with .simulation extensions,
+ corresponding roots (no extension) of each file
+ """
+ simulation_files = glob.glob(os.path.join(simulation_data_dir, "*.simulation"))
+ simulation_roots =map(lambda x: os.path.splitext(os.path.split(x)[1])[0], simulation_files)
+ return simulation_files, simulation_roots
+
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Main logic
+
+
+ #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+
+
+ #_________________________________________________________________________________________
+ #
+ # setup_simulation_data
+ #
+ #_________________________________________________________________________________________
+
+ #
+ # mkdir: makes sure output directories exist before task
+ #
+ @follows(mkdir(gene_data_dir, simulation_data_dir))
+ def setup_simulation_data ():
+ """
+ create simulation files
+ """
+ for i in range(CNT_GENE_GWAS_FILES):
+ open(os.path.join(gene_data_dir, "%03d.gene" % i), "w")
+ open(os.path.join(gene_data_dir, "%03d.gwas" % i), "w")
+ #
+ # gene files without corresponding gwas and vice versa
+ open(os.path.join(gene_data_dir, "orphan1.gene"), "w")
+ open(os.path.join(gene_data_dir, "orphan2.gwas"), "w")
+ open(os.path.join(gene_data_dir, "orphan3.gwas"), "w")
+ #
+ for i in range(CNT_SIMULATION_FILES):
+ open(os.path.join(simulation_data_dir, "%03d.simulation" % i), "w")
+
+
+
+
+ #_________________________________________________________________________________________
+ #
+ # cleanup_simulation_data
+ #
+ #_________________________________________________________________________________________
+ def try_rmdir (d):
+ if os.path.exists(d):
+ try:
+ os.rmdir(d)
+ except OSError:
+ sys.stderr.write("Warning:\t%s is not empty and will not be removed.\n" % d)
+
+
+
+ def cleanup_simulation_data ():
+ """
+ cleanup files
+ """
+ sys.stderr.write("Cleanup working directory and simulation files.\n")
+ #
+ # cleanup gene and gwas files
+ #
+ for f in glob.glob(os.path.join(gene_data_dir, "*.gene")):
+ os.unlink(f)
+ for f in glob.glob(os.path.join(gene_data_dir, "*.gwas")):
+ os.unlink(f)
+ try_rmdir(gene_data_dir)
+ #
+ # cleanup simulation
+ #
+ for f in glob.glob(os.path.join(simulation_data_dir, "*.simulation")):
+ os.unlink(f)
+ try_rmdir(simulation_data_dir)
+ #
+ # cleanup working_dir
+ #
+ for f in glob.glob(os.path.join(working_dir, "simulation_results", "*.simulation_res")):
+ os.unlink(f)
+ try_rmdir(os.path.join(working_dir, "simulation_results"))
+ #
+ for f in glob.glob(os.path.join(working_dir, "*.mean")):
+ os.unlink(f)
+ try_rmdir(working_dir)
+
+
+ #_________________________________________________________________________________________
+ #
+ # Step 1:
+ #
+ # for n_file in NNN_pairs_of_input_files:
+ # for m_file in MMM_simulation_data:
+ #
+ # [n_file.gene,
+ # n_file.gwas,
+ # m_file.simulation] -> working_dir/n_file.m_file.simulation_res
+ #
+ #_________________________________________________________________________________________
+ def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+ simulation_files, simulation_file_roots = get_simulation_files()
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+ #
+ for sim_file, sim_file_root in izip(simulation_files, simulation_file_roots):
+ for (gene, gwas), gene_file_root in izip(gene_gwas_file_pairs, gene_gwas_file_roots):
+ #
+ result_file = "%s.%s.simulation_res" % (gene_file_root, sim_file_root)
+ result_file_path = os.path.join(working_dir, "simulation_results", result_file)
+ #
+ yield [gene, gwas, sim_file], result_file_path, gene_file_root, sim_file_root, result_file
+
+
+
+ #
+ # mkdir: makes sure output directories exist before task
+ #
+ @follows(mkdir(working_dir, os.path.join(working_dir, "simulation_results")))
+ @files(generate_simulation_params)
+ def gwas_simulation(input_files, result_file_path, gene_file_root, sim_file_root, result_file):
+ """
+ Dummy calculation of gene gwas vs simulation data
+ Normally runs in parallel on a computational cluster
+ """
+ (gene_file,
+ gwas_file,
+ simulation_data_file) = input_files
+ #
+ simulation_res_file = open(result_file_path, "w")
+ simulation_res_file.write("%s + %s -> %s\n" % (gene_file_root, sim_file_root, result_file))
+
+
+ #_________________________________________________________________________________________
+ #
+ # Step 2:
+ #
+ # Statistical summary per gene/gwas file pair
+ #
+ # for n_file in NNN_pairs_of_input_files:
+ # working_dir/simulation_results/n.*.simulation_res
+ # -> working_dir/n.mean
+ #
+ #_________________________________________________________________________________________
+
+
+ @collate(gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")
+ @posttask(lambda : sys.stdout.write("\nOK\n"))
+ def statistical_summary (result_files, summary_file):
+ """
+ Simulate statistical summary
+ """
+ summary_file = open(summary_file, "w")
+ for f in result_files:
+ summary_file.write(open(f).read())
+
+
+
+ pipeline_run([setup_simulation_data], multiprocess = 5, verbose = 2)
+ pipeline_run([statistical_summary], multiprocess = 5, verbose = 2)
+
+ # uncomment to printout flowchar
+ #
+ # pipeline_printout(sys.stdout, [statistical_summary], verbose=2)
+ # graph_printout ("flowchart.jpg", "jpg", [statistical_summary])
+ #
+
+ cleanup_simulation_data ()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run([setup_simulation_data], multiprocess = 5, verbose = 2)
+ Make directories [temp_NxM/gene, temp_NxM/simulation] completed
+ Completed Task = setup_simulation_data_mkdir_1
+ Job completed
+ Completed Task = setup_simulation_data
+
+
+ >>> pipeline_run([statistical_summary], multiprocess = 5, verbose = 2)
+ Make directories [temp_NxM, temp_NxM/simulation_results] completed
+ Completed Task = gwas_simulation_mkdir_1
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/001.000.simulation_res, 001, 000, 001.000.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/000.simulation] -> temp_NxM/simulation_results/000.000.simulation_res, 000, 000, 000.000.simulation_res] completed
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/001.001.simulation_res, 001, 001, 001.001.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/001.simulation] -> temp_NxM/simulation_results/000.001.simulation_res, 000, 001, 000.001.simulation_res] completed
+ Job = [[temp_NxM/gene/000.gene, temp_NxM/gene/000.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/000.002.simulation_res, 000, 002, 000.002.simulation_res] completed
+ Job = [[temp_NxM/gene/001.gene, temp_NxM/gene/001.gwas, temp_NxM/simulation/002.simulation] -> temp_NxM/simulation_results/001.002.simulation_res, 001, 002, 001.002.simulation_res] completed
+ Completed Task = gwas_simulation
+ Job = [[temp_NxM/simulation_results/000.000.simulation_res, temp_NxM/simulation_results/000.001.simulation_res, temp_NxM/simulation_results/000.002.simulation_res] -> temp_NxM/000.mean] completed
+ Job = [[temp_NxM/simulation_results/001.000.simulation_res, temp_NxM/simulation_results/001.001.simulation_res, temp_NxM/simulation_results/001.002.simulation_res] -> temp_NxM/001.mean] completed
diff --git a/doc/tutorials/new_tutorial/originate.rst b/doc/tutorials/new_tutorial/originate.rst
new file mode 100644
index 0000000..0c7f0fe
--- /dev/null
+++ b/doc/tutorials/new_tutorial/originate.rst
@@ -0,0 +1,92 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+
+.. index::
+ pair: originate; Tutorial
+
+.. _new_manual.originate:
+
+######################################################################################################
+|new_manual.originate.chapter_num|: Creating files with ``@originate``
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@originate syntax in detail <decorators.originate>`
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.originate.code`
+
+********************************************************************************************
+Simplifying our example with :ref:`@originate <decorators.originate>`
+********************************************************************************************
+
+ Our previous pipeline example started off with a set of files which we had to create first.
+
+ This is a common task: pipelines have to start *somewhere*.
+
+ Ideally, though, we would only want to create these starting files if they didn't already exist. In other words, we want a sort of ``@transform`` which makes files from nothing (``None``?).
+
+ This is exactly what :ref:`@originate <decorators.originate>` helps you to do.
+
+ Rewriting our pipeline with :ref:`@originate <decorators.originate>` gives the following three steps:
+
+
+ .. code-block:: python
+ :emphasize-lines: 6
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+
+ ::
+
+ Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+ Completed Task = create_initial_file_pairs
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+ Completed Task = first_task
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+ Completed Task = second_task
+
+
+
diff --git a/doc/tutorials/new_tutorial/originate_code.rst b/doc/tutorials/new_tutorial/originate_code.rst
new file mode 100644
index 0000000..215e47a
--- /dev/null
+++ b/doc/tutorials/new_tutorial/originate_code.rst
@@ -0,0 +1,71 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.originate.code:
+
+######################################################################################################
+|new_manual.originate.chapter_num|: Python Code for Creating files with ``@originate``
+######################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.originate.chapter_num|: :ref:`@originate <new_manual.originate>`
+
+**********************************************
+Using ``@originate``
+**********************************************
+ ::
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+************************************
+Resulting Output
+************************************
+
+ ::
+
+ Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+ Completed Task = create_initial_file_pairs
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+ Completed Task = first_task
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+ Completed Task = second_task
+
diff --git a/doc/tutorials/new_tutorial/output_file_names.rst b/doc/tutorials/new_tutorial/output_file_names.rst
new file mode 100644
index 0000000..5523794
--- /dev/null
+++ b/doc/tutorials/new_tutorial/output_file_names.rst
@@ -0,0 +1,560 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: formatter; Tutorial
+ pair: suffix; Tutorial
+ pair: regex; Tutorial
+ pair: output file names; Tutorial
+
+.. _new_manual.output_file_names:
+
+############################################################################################################################################################################################################
+|new_manual.output_file_names.chapter_num|: Specifying output file names with :ref:`formatter() <decorators.formatter>` and :ref:`regex() <decorators.regex>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`suffix() <decorators.suffix>` syntax
+ * :ref:`formatter() <decorators.formatter>` syntax
+ * :ref:`regex() <decorators.regex>` syntax
+
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.output_file_names.code`
+
+
+
+***************************************
+Review
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ The most straightforward way to use Ruffus is to hold the intermediate results after each stage
+ in a series of files with related file names.
+
+ Part of telling Ruffus how these pipeline stages or :term:`task` functions are connected
+ together is to write simple rules for how to the file names for each stage follow on from each other.
+ Ruffus helps you to specify these file naming rules.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **Write down the file names of the data as it flows across your pipeline.**
+ Do these file names follow a *pattern* ?
+ * **Write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+.. _new_manual.suffix:
+
+**********************************************************************************************************************************************
+A different file name :ref:`suffix() <decorators.suffix>` for each pipeline stage
+**********************************************************************************************************************************************
+
+
+ The easiest and cleanest way to write Ruffus pipelines is to use a different suffix
+ for each stage of your pipeline.
+
+ We used this approach in :ref:`new_manual.introduction` and in :ref:`code <new_manual.transform_in_parallel.code>` from :ref:`new_manual.transform_in_parallel`:
+
+
+ .. code-block:: bash
+ :emphasize-lines: 1
+
+ #Task Name: File suffices
+ _________________________ ______________________
+ create_initial_file_pairs *.start
+ first_task *.output.1
+ second_task *.output.2
+
+
+ There is a long standing convention of using file suffices to denote file type: For example, a **"compile"** task might convert **source** files of type ``*.c`` to **object** files of type ``*.o``.
+
+ We can think of Ruffus tasks comprising :
+ * recipes in ``@transform(...)`` for transforming file names: changing ``.c`` to a ``.o`` (e.g. ``AA.c -> AA.o`` ``BB.c -> BB.o``)
+ * recipes in a task function ``def foo_bar()`` for transforming your data: from **source** ``.c`` to **object** ``.o``
+
+
+ Let us review the Ruffus syntax for doing this:
+
+ .. code-block:: bash
+ :emphasize-lines: 1,2,3
+
+ @transform( create_initial_file_pairs, # Input: Name of previous task(s)
+ suffix(".start"), # Matching suffix
+ ".output.1") # Replacement string
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #. **Input**:
+
+ The first parameter for ``@transform`` can be a mixture of one or more:
+ * previous tasks (e.g. ``create_initial_file_pairs``)
+ * file names (all python strings are treated as paths)
+ * glob specifications (e.g ``*.c``, ``/my/path/*.foo``)
+
+ Each element provides an input for the task. So if the previous task ``create_initial_file_pairs`` has five outputs, the next ``@transform`` task will accept
+ these as five separate inputs leading to five independent jobs.
+
+ #. :ref:`suffix() <decorators.suffix>`:
+
+ The second parameter ``suffix(".start")`` must match the end of the first string in each input.
+ For example, ``create_initial_file_pairs`` produces the list ``['job1.a.start', 'job1.b.start']``, then ``suffix(".start")`` must matches the first string, i.e. ``'job1.a.start'``.
+ If the input is nested structure, this would be iterated through recursively to find the first string.
+
+ .. note::
+
+ Inputs which do not match the suffix are discarded altogether.
+
+ #. **Replacement**:
+
+ The third parameter is the replacement for the suffix.
+ The pair of input strings in the step3 example produces the following output parameter
+
+ ::
+
+ input_parameters = ['job1.a.start', 'job1.b.start']
+ matching_input = 'job1.a.start'
+ output_parameter = 'job1.a.output.1'
+
+
+ When the pipeline is run, this results in the following equivalent call to ``first_task(...)``:
+
+ .. code-block:: python
+
+ first_task(['job1.a.start', 'job1.b.start'], 'job1.a.output.1'):
+
+ The replacement parameter can itself be a list or any arbitrary complicated structure:
+
+ .. code-block:: bash
+ :emphasize-lines: 1,2,3
+
+ @transform(create_initial_file_pairs, # Input
+ suffix(".a.start"), # Matching suffix
+ [".output.a.1", ".output.b.1", 45]) # Replacement list
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+ In which case, all the strings are used as replacements, other values are left untouched, and we obtain the following:
+
+ .. code-block:: bash
+ :emphasize-lines: 1,5,9
+
+ # job #1
+ input = ['job1.a.start', 'job1.b.start']
+ output = ['job1.output.a.1', 'job1.output.b.1', 45]
+
+ # job #2
+ input = ['job2.a.start', 'job2.b.start']
+ output = ['job2.output.a.1', 'job2.output.b.1', 45]
+
+ # job #3
+ input = ['job3.a.start', 'job3.b.start']
+ output = ['job3.output.a.1', 'job3.output.b.1', 45]
+
+
+ Note how task function is called with the value ``45`` *verbatim* because it is not a string.
+
+
+
+.. _new_manual.formatter:
+
+************************************************************************************************************************************************************
+ :ref:`formatter() <decorators.formatter>` manipulates pathnames and regular expression
+************************************************************************************************************************************************************
+
+ :ref:`suffix() <decorators.suffix>` replacement is the cleanest and easiest way to generate suitable output file names for each stage in a pipeline.
+ Often, however, we require more complicated manipulations to specify our file names.
+ For example,
+
+ * It is common to have to change directories from a *data* directory to a *working* directory as the first step of a pipeline.
+ * Data management can be simplified by separate files from each pipeline stage into their own directory.
+ * Information may have to be decoded from data file names, e.g. ``"experiment373.IBM.03March2002.txt"``
+
+
+ Though :ref:`formatter() <decorators.formatter>` is much more powerful, the principle and syntax are the same:
+ we take string elements from the **Input** and perform some replacements to generate the **Output** parameters.
+
+
+ :ref:`formatter() <decorators.formatter>`
+
+ * Allows easy manipulation of path subcomponents in the style of `os.path.split() <http://docs.python.org/2/library/os.path.html#os.path.split>`__, and `os.path.basename <http://docs.python.org/2/library/os.path.html#os.path.basename>`__
+ * Uses familiar python `string.format <http://docs.python.org/2/library/string.html#string-formatting>`__ syntax (See `string.format examples <http://docs.python.org/2/library/string.html#format-examples>`__. )
+ * Supports optional regular expression (`re <http://docs.python.org/2/library/re.html#re.MatchObject.group>`__) matches including named captures.
+ * Can refer to any file path (i.e. python string) in each input and is not limited like :ref:`suffix() <decorators.suffix>` to the first string.
+ * Can even refer to individual letters within a match
+
+
+========================
+Path name components
+========================
+
+ :ref:`formatter() <decorators.formatter>` breaks down each input pathname into path name components which can then be recombined in whichever way by the replacement string.
+
+ Given an example string of :
+
+ .. code-block:: python
+
+ input_string = "/directory/to/a/file.name.ext"
+ formatter()
+
+ the path components are:
+
+ * ``basename``: The `base name <http://docs.python.org/2/library/os.path.html#os.path.basename>`__ *excluding* `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``"file.name"``
+ * ``ext`` : The `extension <http://docs.python.org/2/library/os.path.html#os.path.splitext>`__, ``".ext"``
+ * ``path`` : The `dirname <http://docs.python.org/2/library/os.path.html#os.path.dirname>`__, ``"/directory/to/a"``
+ * ``subdir`` : A list of sub-directories in the ``path`` in reverse order, ``["a", "to", "directory", "/"]``
+ * ``subpath`` : A list of descending sub-paths in reverse order, ``["/directory/to/a", "/directory/to", "/directory", "/"]``
+
+
+ The replacement string refers to these components by using python `string.format <http://docs.python.org/2/library/string.html#string-formatting>`__ style curly braces. ``"{NAME}"``
+
+ We refer to an element from the Nth input string by index, for example:
+
+ * ``"{ext[0]}"`` is the extension of the first file name string in **Input**.
+ * ``"{basename[1]}"`` is the basename of the second file name in **Input**.
+ * ``"{basename[1][0:3]}"`` are the first three letters from the basename of the second file name in **Input**.
+
+
+ ``subdir``, ``subpath`` were designed to help you navigate directory hierachies with the minimum of fuss.
+ For example, you might want to graft a hierachical path to another location:
+ ``"{subpath[0][2]}/from/{subdir[0][0]}/{basename[0]}"`` neatly replaces just one directory (``"to"``) in the path with another (``"from"``):
+
+ .. code-block:: python
+
+ replacement_string = "{subpath[0][2]}/from/{subdir[0][0]}/{basename[0]}"
+
+ input_string = "/directory/to/a/file.name.ext"
+ result_string = "/directory/from/a/file.name.ext"
+
+
+.. _new_manual.formatter.regex:
+
+================================================
+Filter and parse using regular expressions
+================================================
+
+ `Regular expression <http://docs.python.org/2/library/re.html#re.MatchObject.group>`__ matches can be used with the similar syntax.
+ Our example string can be parsed using the following regular expression:
+
+ .. code-block:: python
+
+ input_string = "/directory/to/a/file.name.ext"
+ formatter(r"/directory/(.+)/(?P<MYFILENAME>)\.ext")
+
+ We capture part of the path using ``(.+)``, and the base name using ``(?P<MYFILENAME>)``.
+ These `matching subgroups <http://docs.python.org/2/library/re.html#re.MatchObject.group>`__ can be referred to by index
+ but for greater clarity the second named capture can also be referred to by name, i.e. ``{MYFILENAME}``.
+
+
+ The regular expression components for the first string can thus be referred to as follows:
+
+ * ``{0[0]}`` : The entire match captured by index, ``"/directory/to/a/file.name.ext"``
+ * ``{1[0]}`` : The first match captured by index, ``"to/a"``
+ * ``{2[0]}`` : The second match captured by index, ``"file.name"``
+ * ``{MYFILENAME[0]}`` : The match captured by name, ``"file.name"``
+
+
+ If each input consists of a list of paths such as ``['job1.a.start', 'job1.b.start', 'job1.c.start']``, we can match each of them separately
+ by using as many regular expressions as necessary. For example:
+
+ .. code-block:: python
+
+ input_string = ['job1.a.start', 'job1.b.start', 'job1.c.start']
+ # Regular expression matches for 1st, 2nd but not 3rd element
+ formatter(".+a.start", "b.start$")
+
+
+ Or if you only wanted regular expression matches for the second file name (string), pad with ``None``:
+
+ .. code-block:: python
+
+ input_string = ['job1.a.start', 'job1.b.start', 'job1.c.start']
+ # Regular expression matches for 2nd but not 1st or 3rd elements
+ formatter(None, "b.start$")
+
+================================================================================================
+Using :ref:`@transform() <decorators.transform>` with :ref:`formatter() <decorators.formatter>`
+================================================================================================
+
+ We can put these together in the following example:
+
+ .. code-block:: python
+ :emphasize-lines: 21,22
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1", 45])
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+ This produces:
+
+ .. code-block:: pycon
+
+ input_parameters = ['job1.a.start',
+ 'job1.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs1.output.a.1',
+ '/home/lg/src/temp/jobs1.output.b.1', 45]
+
+ input_parameters = ['job2.a.start',
+ 'job2.b.start']
+ output_parameters = ['/home/lg/src/temp/jobs2.output.a.1',
+ '/home/lg/src/temp/jobs2.output.b.1', 45]
+
+
+
+ Notice that ``job3`` has ``'job3.c.start'`` as the second file.
+ This fails to match the regular expression and is discarded.
+
+ .. note::
+
+ Failed regular expression mismatches are ignored.
+
+ :ref:`formatter() <decorators.formatter>` regular expressions are thus very useful in filtering out all
+ files which do not match your specified criteria.
+
+ If your some of your task inputs have a mixture of different file types, a simple ``Formatter(".txt$")``, for example, will make
+ your code a lot simpler...
+
+
+
+================================================================================================
+string substitution for "extra" arguments
+================================================================================================
+
+ The first two arguments for Ruffus task functions are special because they are the **Input** and **Output**
+ parameters which link different stages of a pipeline.
+
+
+ Python strings in these arguments are names of data files whose modification times indicate whether the pipeline is up to date or not.
+
+ Other arguments to task functions are not passed down the pipeline but consumed.
+ Any python strings they contain do not need to be file names. These extra arguments are very useful
+ for passing data to pipelined tasks, such as shared values, loggers, programme options etc.
+
+ One helpful feature is that strings in these extra arguments are also subject to :ref:`formatter() <decorators.formatter>` string substitution.
+ This means you can leverage the parsing capabilities of Ruffus to decode any information about the pipeline data files,
+ These might include the directories you are running in and parts of the file name.
+
+ For example, if we would want to know which files go with which "job number" in the previous example:
+
+
+ .. code-block:: python
+ :emphasize-lines: 21,22
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # print job number as an extra argument
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"],
+
+ "{JOBNUMBER[0]}"
+ def first_task(input_files, output_parameters, job_number):
+ print job_number, ":", input_files
+
+
+ pipeline_run(verbose=0)
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ 1 : ['job1.a.start', 'job1.b.start']
+ 2 : ['job2.a.start', 'job2.b.start']
+
+
+
+.. _new_manual.output_file_names.formatter.zoo:
+
+================================================================================================
+Changing directories using :ref:`formatter() <decorators.formatter>` in a zoo...
+================================================================================================
+
+ Here is a more fun example. We would like to feed the denizens of a zoo. Unfortunately, the file names for
+ these are spread over several directories. Ideally, we would like their food supply to be grouped more
+ sensibly. And, of course, we only want to feed the animals, not the plants.
+
+ I have colour coded the input and output files for this task to show how we would like to rearrange them:
+
+ .. image:: ../../images/simple_tutorial_zoo_animals_formatter_example.jpg
+ :scale: 50
+
+ .. code-block:: python
+ :emphasize-lines: 7,22,26,27,28
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+ pipeline_run(verbose=0)
+
+
+ .. comment **
+
+ We can see that the food for each animal are now grouped by clade in the same directory, which makes a lot more sense...
+
+ Note how we used ``subpath[0][1]`` to move down one level of the file path to build a new file name.
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ Food for the wild crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles
+ Food for the tame dog = ./mammals/tame.dog.food will be placed in ./mammals
+ Food for the wild dog = ./mammals/wild.dog.food will be placed in ./mammals
+ Food for the handreared lion = ./mammals/handreared.lion.food will be placed in ./mammals
+ Food for the wild lion = ./mammals/wild.lion.food will be placed in ./mammals
+ Food for the wild tiger = ./mammals/wild.tiger.food will be placed in ./mammals
+
+
+.. _new_manual.regex:
+
+******************************************************************************
+ :ref:`regex() <decorators.regex>` manipulates via regular expressions
+******************************************************************************
+
+
+ If you are a hard core regular expressions fan, you may want to use :ref:`regex() <decorators.regex>` instead of :ref:`suffix() <decorators.suffix>` or :ref:`formatter() <decorators.formatter>`.
+
+ .. note::
+
+ :ref:`regex() <decorators.regex>` uses regular expressions like :ref:`formatter() <decorators.formatter>` but
+
+ * It only matches the first file name in the input. As described above, :ref:`formatter() <decorators.formatter>` can match any one or more of the input filename strings.
+ * It does not understand file paths so you may have to perform your own directory / file name parsing.
+ * String replacement uses syntax borrowed from `re.sub() <http://docs.python.org/2/library/re.html#re.sub>`__, rather than building a result from parsed regular expression (and file path) components
+
+ In general :ref:`formatter() <decorators.formatter>` is more powerful and was introduced from version 2.4 is intended to be a more user friendly replacement for :ref:`regex() <decorators.regex>`.
+
+ Let us see how the previous zoo example looks with :ref:`regex() <decorators.regex>`:
+
+
+ :ref:`formatter() <decorators.formatter>` code:
+
+ .. code-block:: python
+ :emphasize-lines: 4,6
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+ :ref:`regex() <decorators.regex>` code:
+
+ .. code-block:: python
+ :emphasize-lines: 4,6
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ r"\1/\g<clade>/\g<tame>.\2.food", # Replacement
+
+ r"\1/\g<clade>", # new_directory
+ r"\2", # animal_name
+ "\g<tame>") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+
+ The regular expression to parse the input file path safely was a bit hairy to write, and it is not
+ clear that it handles all edge conditions (e.g. files in the root directory). Apart from that, if the
+ limitations of :ref:`regex() <decorators.regex>` do not preclude its use, then the two approaches
+ are not so different in practice.
+
+
+
+
diff --git a/doc/tutorials/new_tutorial/output_file_names_code.rst b/doc/tutorials/new_tutorial/output_file_names_code.rst
new file mode 100644
index 0000000..7e70f58
--- /dev/null
+++ b/doc/tutorials/new_tutorial/output_file_names_code.rst
@@ -0,0 +1,248 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.output_file_names.code:
+
+############################################################################################################################################################################################################
+|new_manual.output_file_names.chapter_num|: Python Code for Specifying output file names with :ref:`formatter() <decorators.formatter>` and :ref:`regex() <decorators.regex>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`suffix() <decorators.suffix>` syntax
+ * :ref:`formatter() <decorators.formatter>` syntax
+ * :ref:`regex() <decorators.regex>` syntax
+ * Back to |new_manual.output_file_names.chapter_num|: :ref:`Specifying output file names <new_manual.output_file_names>`
+
+************************************************************************
+Example Code for :ref:`suffix() <decorators.suffix>`
+************************************************************************
+ .. code-block:: python
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ #
+ # suffix
+ #
+ @transform(create_initial_file_pairs, # name of previous task(s) (or list of files, or a glob)
+ suffix(".start"), # matching suffix of the "input file"
+ [".output.a.1", 45, ".output.b.1"]) # resulting suffix
+ def first_task(input_files, output_parameters):
+ print " input_parameters = ", input_files
+ print " output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run([first_task])
+
+
+
+************************************************************************
+Example Code for :ref:`formatter() <decorators.formatter>`
+************************************************************************
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # formatter
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1", 45])
+ def first_task(input_files, output_parameters):
+ print "input_parameters = ", input_files
+ print "output_parameters = ", output_parameters
+
+
+ #
+ # Run
+ #
+ pipeline_run(verbose=0)
+
+
+****************************************************************************************************************
+Example Code for :ref:`formatter() <decorators.formatter>` with replacements in *extra* arguments
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # create initial files
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.c.start'] ])
+ def create_initial_file_pairs(output_files):
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+
+ #---------------------------------------------------------------
+ #
+ # print job number as an extra argument
+ #
+
+ # first task
+ @transform(create_initial_file_pairs, # Input
+
+ formatter(".+/job(?P<JOBNUMBER>\d+).a.start", # Extract job number
+ ".+/job[123].b.start"), # Match only "b" files
+
+ ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1", # Replacement list
+ "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"],
+
+ "{JOBNUMBER[0]}"
+ def first_task(input_files, output_parameters, job_number):
+ print job_number, ":", input_files
+
+
+ pipeline_run(verbose=0)
+
+
+****************************************************************************************************************
+Example Code for :ref:`formatter() <decorators.formatter>` in Zoos
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement
+
+ "{subpath[0][1]}/{clade[0]}", # new_directory
+ "{subdir[0][0]}", # animal_name
+ "{tame[0]}") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+
+ pipeline_run(verbose=0)
+
+
+ Results in:
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ Food for the wild crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles
+ Food for the tame dog = ./mammals/tame.dog.food will be placed in ./mammals
+ Food for the wild dog = ./mammals/wild.dog.food will be placed in ./mammals
+ Food for the handreared lion = ./mammals/handreared.lion.food will be placed in ./mammals
+ Food for the wild lion = ./mammals/wild.lion.food will be placed in ./mammals
+ Food for the wild tiger = ./mammals/wild.tiger.food will be placed in ./mammals
+
+
+
+****************************************************************************************************************
+Example Code for :ref:`regex() <decorators.regex>` in zoos
+****************************************************************************************************************
+
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ # Make directories
+ @mkdir(["tiger", "lion", "dog", "crocodile", "rose"])
+
+ @originate(
+ # List of animals and plants
+ [ "tiger/mammals.wild.animals",
+ "lion/mammals.wild.animals",
+ "lion/mammals.handreared.animals",
+ "dog/mammals.tame.animals",
+ "dog/mammals.wild.animals",
+ "crocodile/reptiles.wild.animals",
+ "rose/flowering.handreared.plants"])
+ def create_initial_files(output_file):
+ with open(output_file, "w") as oo: pass
+
+
+
+ # Put different animals in different directories depending on their clade
+ @transform(create_initial_files, # Input
+
+ regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!
+
+ r"\1/\g<clade>/\g<tame>.\2.food", # Replacement
+
+ r"\1/\g<clade>", # new_directory
+ r"\2", # animal_name
+ "\g<tame>") # tameness
+ def feed(input_file, output_file, new_directory, animal_name, tameness):
+ print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())
+
+
+ pipeline_run(verbose=0)
+
+
+ Results in:
+
+ ::
+
+ >>> pipeline_run(verbose=0)
+ Food for the wild crocodile = reptiles/wild.crocodile.food will be placed in reptiles
+ Food for the tame dog = mammals/tame.dog.food will be placed in mammals
+ Food for the wild dog = mammals/wild.dog.food will be placed in mammals
+ Food for the handreared lion = mammals/handreared.lion.food will be placed in mammals
+ Food for the wild lion = mammals/wild.lion.food will be placed in mammals
+ Food for the wild tiger = mammals/wild.tiger.food will be placed in mammals
+
+
diff --git a/doc/tutorials/new_tutorial/parallel.rst b/doc/tutorials/new_tutorial/parallel.rst
new file mode 100644
index 0000000..90a825f
--- /dev/null
+++ b/doc/tutorials/new_tutorial/parallel.rst
@@ -0,0 +1,63 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: @parallel; Tutorial
+
+.. _new_manual.deprecated_parallel:
+
+####################################################################################################################################################
+|new_manual.parallel.chapter_num|: Esoteric: Running jobs in parallel without files using :ref:`@parallel<decorators.parallel>`
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@parallel<decorators.parallel>` syntax in detail
+
+
+
+***************************************
+**@parallel**
+***************************************
+
+ **@parallel** supplies parameters for multiple **jobs** exactly like :ref:`@files<new_manual.deprecated_files>` except that:
+
+ #. The first two parameters are not treated like *inputs* and *ouputs* parameters,
+ and strings are not assumed to be file names
+ #. Thus no checking of whether each job is up-to-date is made using *inputs* and *outputs* files
+ #. No expansions of |glob|_ patterns or *output* from previous tasks is carried out.
+
+ This syntax is most useful when a pipeline stage does not involve creating or consuming any files, and
+ you wish to forego the conveniences of :ref:`@files<new_manual.deprecated_files>`, :ref:`@transform<new_manual.transform>` etc.
+
+ The following code performs some arithmetic in parallel:
+
+ ::
+
+ import sys
+ from ruffus import *
+ parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+ @parallel(parameters)
+ def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\n" % (param1, param2, param1 + param2))
+
+ pipeline_run([parallel_task])
+
+ produces the following::
+
+ Task = parallel_task
+ Parallel task A: 1 + 2 = 3
+ Job = ["A", 1, 2] completed
+ Parallel task B: 3 + 4 = 7
+ Job = ["B", 3, 4] completed
+ Parallel task C: 5 + 6 = 11
+ Job = ["C", 5, 6] completed
+
+
diff --git a/doc/tutorials/new_tutorial/pipeline_printout.rst b/doc/tutorials/new_tutorial/pipeline_printout.rst
new file mode 100644
index 0000000..02ad07d
--- /dev/null
+++ b/doc/tutorials/new_tutorial/pipeline_printout.rst
@@ -0,0 +1,215 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: pipeline_printout; Tutorial
+
+.. _new_manual.pipeline_printout:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout.chapter_num|: Understanding how your pipeline works with :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+############################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` syntax
+ * :ref:`Python Code for this chapter <new_manual.pipeline_printout.code>`
+
+
+.. note::
+
+ * **Whether you are learning or developing ruffus pipelines, your best friend is** :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+ **This shows the exact parameters and files as they are passed through the pipeline.**
+
+ * **We also** *strongly* **recommend you use the** ``Ruffus.cmdline`` **convenience module which**
+ **will take care of all the command line arguments for you. See** :ref:`new_manual.cmdline`.
+
+
+
+=======================================
+Printing out which jobs will be run
+=======================================
+
+ :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` takes the same parameters as pipeline_run but just prints
+ the tasks which are and are not up-to-date.
+
+ The ``verbose`` parameter controls how much detail is displayed.
+
+ Let us take the pipelined code we previously wrote in
+ |new_manual.transform_in_parallel.chapter_num| :ref:`More on @transform-ing data and @originate <new_manual.transform_in_parallel.code>`
+ but call :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` instead of
+ :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`.
+ This lists the tasks which will be run in the pipeline:
+
+ ::
+
+ >>> import sys
+ >>> pipeline_printout(sys.stdout, [second_task])
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = create_initial_file_pairs
+ Task = first_task
+ Task = second_task
+ ________________________________________
+
+
+
+ To see the input and output parameters of each job in the pipeline, try increasing the verbosity from the default (``1``) to ``3``
+ (See :ref:`code <new_manual.pipeline_printout.code>`)
+
+ This is very useful for checking that the input and output parameters have been specified correctly.
+
+=============================================
+Determining which jobs are out-of-date or not
+=============================================
+
+ It is often useful to see which tasks are or are not up-to-date. For example, if we
+ were to run the pipeline in full, and then modify one of the intermediate files, the
+ pipeline would be partially out of date.
+
+
+ Let us start by run the pipeline in full but then modify ``job1.a.output.1`` so that the second task appears out-of-date:
+
+ .. code-block:: python
+ :emphasize-lines: 3
+
+ pipeline_run([second_task])
+
+ # "touch" job1.stage1
+ open("job1.a.output.1", "w").close()
+
+
+ Run :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` with a verbosity of ``5``.
+
+ This will tell you exactly why ``second_task(...)`` needs to be re-run:
+ because ``job1.a.output.1`` has a file modification time *after* ``job1.a.output.2`` (highlighted):
+
+
+ .. code-block:: pycon
+ :emphasize-lines: 9
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 5)
+
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = create_initial_file_pairs
+ Task = first_task
+
+ ________________________________________
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = second_task
+ Job = [job1.a.output.1
+ -> job1.a.output.2]
+ >>> # File modification times shown for out of date files
+ Job needs update:
+ Input files:
+ * 22 Jul 2014 15:29:19.33: job1.a.output.1
+ Output files:
+ * 22 Jul 2014 15:29:07.53: job1.a.output.2
+
+ Job = [job2.a.output.1
+ -> job2.a.output.2]
+ Job = [job3.a.output.1
+ -> job3.a.output.2]
+
+ ________________________________________
+
+
+ N.B. At a verbosity of 5, even jobs which are up-to-date in ``second_task`` are displayed.
+
+
+
+=============================================
+Verbosity levels
+=============================================
+
+ The verbosity levels for :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` and :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>`
+ can be specified from ``verbose = 0`` (print out nothing) to the extreme verbosity of ``verbose=6``. A verbosity of above 10 is reserved for the internal
+ debugging of Ruffus
+
+ * level **0** : *nothing*
+ * level **1** : *Out-of-date Task names*
+ * level **2** : *All Tasks (including any task function docstrings)*
+ * level **3** : *Out-of-date Jobs in Out-of-date Tasks, no explanation*
+ * level **4** : *Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings*
+ * level **5** : *All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)*
+ * level **6** : *All jobs in All Tasks whether out of date or not*
+ * level **10**: *logs messages useful only for debugging ruffus pipeline code*
+
+.. _new_manual.pipeline_printout.verbose_abbreviated_path:
+
+==========================================================================================
+Abbreviating long file paths with ``verbose_abbreviated_path``
+==========================================================================================
+
+ Pipelines often produce interminable lists of deeply nested filenames. It would be nice to be able to abbreviate this
+ to just enough information to follow the progress.
+
+ The ``verbose_abbreviated_path`` parameter specifies that :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` and :ref:`pipeline_run(...) <pipeline_functions.pipeline_run>` only display
+
+ 1) the ``NNN`` th top level sub-directories to be included, or that
+ 2) the message to be truncated to a specified ```MMM`` characters (to fit onto a line, for example). ``MMM`` is specified by setting ``verbose_abbreviated_path = -MMM``, i.e. negative values.
+
+ Note that the number of characters specified is just the separate lengths of the input and output parameters,
+ not the entire indented line. You many need to specify a smaller limit that you expect (e.g. ``60`` rather than `80`)
+
+ .. code-block:: python
+
+ pipeline_printout(verbose_abbreviated_path = NNN)
+ pipeline_run(verbose_abbreviated_path = -MMM)
+
+
+
+ ``verbose_abbreviated_path`` defaults to ``2``
+
+
+ For example:
+
+ Given ``["aa/bb/cc/dddd.txt", "aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt"]``
+
+
+ .. code-block:: python
+ :emphasize-lines: 1,4,8,19
+
+ # Original relative paths
+ "[aa/bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Full abspath
+ verbose_abbreviated_path = 0
+ "[/test/ruffus/src/aa/bb/cc/dddd.txt, /test/ruffus/src/aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+ # Specifed level of nested directories
+ verbose_abbreviated_path = 1
+ "[.../dddd.txt, .../gggg.txt]"
+
+ verbose_abbreviated_path = 2
+ "[.../cc/dddd.txt, .../ffff/gggg.txt]"
+
+ verbose_abbreviated_path = 3
+ "[.../bb/cc/dddd.txt, .../eeee/ffff/gggg.txt]"
+
+
+ # Truncated to MMM characters
+ verbose_abbreviated_path = -60
+ "<???> /bb/cc/dddd.txt, aaa/bbbb/cccc/eeed/eeee/ffff/gggg.txt]"
+
+
+=============================================
+Getting a list of all tasks in a pipeline
+=============================================
+
+ If you just wanted a list of all tasks (Ruffus decorated function names), then you can
+ just run Run :ref:`pipeline_get_task_names(...) <pipeline_functions.pipeline_get_task_names>`.
+
+ This doesn't touch any pipeline code or even check to see if the pipeline is connected up properly.
+
+ However, it is sometimes useful to allow users at the command line to choose from a list of
+ possible tasks as a target.
diff --git a/doc/tutorials/new_tutorial/pipeline_printout_code.rst b/doc/tutorials/new_tutorial/pipeline_printout_code.rst
new file mode 100644
index 0000000..f8eedf3
--- /dev/null
+++ b/doc/tutorials/new_tutorial/pipeline_printout_code.rst
@@ -0,0 +1,203 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.pipeline_printout.code:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout.chapter_num|: Python Code for Understanding how your pipeline works with :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout(...) <pipeline_functions.pipeline_printout>` syntax
+ * Back to |new_manual.pipeline_printout.chapter_num|: :ref:`Understanding how your pipeline works <new_manual.pipeline_printout>`
+
+******************************************
+Display the initial state of the pipeline
+******************************************
+ ::
+
+ from ruffus import *
+ import sys
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ pipeline_printout(sys.stdout, [second_task], verbose = 1)
+ pipeline_printout(sys.stdout, [second_task], verbose = 3)
+
+************************************
+Normal Output
+************************************
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 1)
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = create_initial_file_pairs
+ Task = first_task
+ Task = second_task
+
+
+************************************
+High Verbosity Output
+************************************
+
+ ::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 4)
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = create_initial_file_pairs
+ Job = [None
+ -> job1.a.start
+ -> job1.b.start]
+ Job needs update: Missing files [job1.a.start, job1.b.start]
+ Job = [None
+ -> job2.a.start
+ -> job2.b.start]
+ Job needs update: Missing files [job2.a.start, job2.b.start]
+ Job = [None
+ -> job3.a.start
+ -> job3.b.start]
+ Job needs update: Missing files [job3.a.start, job3.b.start]
+
+ Task = first_task
+ Job = [[job1.a.start, job1.b.start]
+ -> job1.a.output.1]
+ Job needs update: Missing files [job1.a.start, job1.b.start, job1.a.output.1]
+ Job = [[job2.a.start, job2.b.start]
+ -> job2.a.output.1]
+ Job needs update: Missing files [job2.a.start, job2.b.start, job2.a.output.1]
+ Job = [[job3.a.start, job3.b.start]
+ -> job3.a.output.1]
+ Job needs update: Missing files [job3.a.start, job3.b.start, job3.a.output.1]
+
+ Task = second_task
+ Job = [job1.a.output.1
+ -> job1.a.output.2]
+ Job needs update: Missing files [job1.a.output.1, job1.a.output.2]
+ Job = [job2.a.output.1
+ -> job2.a.output.2]
+ Job needs update: Missing files [job2.a.output.1, job2.a.output.2]
+ Job = [job3.a.output.1
+ -> job3.a.output.2]
+ Job needs update: Missing files [job3.a.output.1, job3.a.output.2]
+
+ ________________________________________
+
+******************************************
+Display the partially up-to-date pipeline
+******************************************
+ Run the pipeline, modify ``job1.stage`` so that the second task is no longer up-to-date
+ and printout the pipeline stage again::
+
+ >>> pipeline_run([second_task], verbose=3)
+ Task enters queue = create_initial_file_pairs
+ Job = [None -> [job1.a.start, job1.b.start]]
+ Job = [None -> [job2.a.start, job2.b.start]]
+ Job = [None -> [job3.a.start, job3.b.start]]
+ Job = [None -> [job1.a.start, job1.b.start]] completed
+ Job = [None -> [job2.a.start, job2.b.start]] completed
+ Job = [None -> [job3.a.start, job3.b.start]] completed
+ Completed Task = create_initial_file_pairs
+ Task enters queue = first_task
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1]
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1]
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1]
+ Job = [[job1.a.start, job1.b.start] -> job1.a.output.1] completed
+ Job = [[job2.a.start, job2.b.start] -> job2.a.output.1] completed
+ Job = [[job3.a.start, job3.b.start] -> job3.a.output.1] completed
+ Completed Task = first_task
+ Task enters queue = second_task
+ Job = [job1.a.output.1 -> job1.a.output.2]
+ Job = [job2.a.output.1 -> job2.a.output.2]
+ Job = [job3.a.output.1 -> job3.a.output.2]
+ Job = [job1.a.output.1 -> job1.a.output.2] completed
+ Job = [job2.a.output.1 -> job2.a.output.2] completed
+ Job = [job3.a.output.1 -> job3.a.output.2] completed
+ Completed Task = second_task
+
+
+ # modify job1.stage1
+ >>> open("job1.a.output.1", "w").close()
+
+ At a verbosity of 6, even jobs which are up-to-date will be displayed::
+
+ >>> pipeline_printout(sys.stdout, [second_task], verbose = 6)
+
+ ________________________________________
+ Tasks which are up-to-date:
+
+ Task = create_initial_file_pairs
+ Job = [None
+ -> job1.a.start
+ -> job1.b.start]
+ Job = [None
+ -> job2.a.start
+ -> job2.b.start]
+ Job = [None
+ -> job3.a.start
+ -> job3.b.start]
+
+ Task = first_task
+ Job = [[job1.a.start, job1.b.start]
+ -> job1.a.output.1]
+ Job = [[job2.a.start, job2.b.start]
+ -> job2.a.output.1]
+ Job = [[job3.a.start, job3.b.start]
+ -> job3.a.output.1]
+
+ ________________________________________
+
+
+
+ ________________________________________
+ Tasks which will be run:
+
+ Task = second_task
+ Job = [job1.a.output.1
+ -> job1.a.output.2]
+ Job needs update:
+ Input files:
+ * 22 Jul 2014 15:29:19.33: job1.a.output.1
+ Output files:
+ * 22 Jul 2014 15:29:07.53: job1.a.output.2
+
+ Job = [job2.a.output.1
+ -> job2.a.output.2]
+ Job = [job3.a.output.1
+ -> job3.a.output.2]
+
+ ________________________________________
+
+
+
+ We can now see that the there is only one job in "second_task" which needs to be re-run
+ because 'job1.stage1' has been modified after 'job1.stage2'
diff --git a/doc/tutorials/new_tutorial/pipeline_printout_graph.rst b/doc/tutorials/new_tutorial/pipeline_printout_graph.rst
new file mode 100644
index 0000000..5076efe
--- /dev/null
+++ b/doc/tutorials/new_tutorial/pipeline_printout_graph.rst
@@ -0,0 +1,170 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: pipeline_printout_graph; Tutorial
+
+.. _new_manual.pipeline_printout_graph:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout_graph.chapter_num|: Displaying the pipeline visually with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>` syntax
+ * :ref:`@graphviz(...) <decorators.graphviz>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.pipeline_printout_graph.code`
+
+=============================================
+Printing out a flowchart of our pipeline
+=============================================
+
+ It is all very well being able to trace the data flow through the pipeline as text.
+ Sometimes, however, we need a bit of eye-candy!
+
+ We can see a flowchart for our fledgling pipeline by executing:
+
+ ::
+
+ pipeline_printout_graph ( 'flowchart.svg',
+ 'svg',
+ [second_task],
+ no_key_legend = False)
+
+ .. image:: ../../images/simple_tutorial_stage5_flowchart.png
+ :scale: 70
+
+
+ Flowcharts can be printed in a large number of formats including ``jpg``, ``svg``, ``png`` and ``pdf``.
+
+
+ .. note::
+
+ Flowcharts rely on the ``dot`` programme from `Graphviz <http://www.graphviz.org/>`__.
+
+ Please make sure this is installed.
+
+ There are 8 standard colour schemes, but you can further customise all the colours to your satisfaction:
+
+
+ .. image:: ../../images/flowchart_colour_schemes.png
+
+ See :ref:`here <new_manual.flowchart_colours>` for example code.
+
+================================================================
+Command line options made easier with ``ruffus.cmdline``
+================================================================
+
+
+ If you are using ``ruffus.cmdline``, then you can easily ask for a flowchart from the command line:
+
+ .. code-block:: bash
+
+ your_script.py --flowchart pipeline_flow_chart.png
+
+
+ The output format is deduced from the extension but can be specified manually:
+
+ .. code-block:: bash
+
+ # specify format. Otherwise, deduced from the extension
+ your_script.py --flowchart pipeline_flow_chart.png --flowchart_format png
+
+ Print the flow chart horizontally or vertically...
+
+ .. code-block:: bash
+
+ # flowchart proceeds from left to right , rather than from top to bottom
+ your_script.py --flowchart pipeline_flow_chart.png --draw_graph_horizontally
+
+ ...with or without a key legend
+
+ .. code-block:: bash
+
+ # Draw key legend
+ your_script.py --flowchart pipeline_flow_chart.png --key_legend_in_graph
+
+
+=============================================
+Horribly complicated pipelines!
+=============================================
+ Flowcharts are especially useful if you have really complicated pipelines, such as
+
+ .. image:: ../../images/simple_tutorial_complex_flowchart.png
+ :scale: 70
+
+
+=============================================
+Circular dependency errors in pipelines!
+=============================================
+ Especially, if the pipeline is not set up properly, and vicious circular dependencies
+ are present:
+
+ .. image:: ../../images/simple_tutorial_complex_flowchart_error.png
+ :scale: 70
+
+==========================================================================================
+``@graphviz``: Customising the appearance of each task
+==========================================================================================
+
+ The graphic for each task can be further customised as you please by adding
+ `graphviz attributes <http://www.graphviz.org/doc/info/attrs.html>`__ such as the URL, shape, colour
+ directly to that node using the decorator ```@graphviz``.
+
+ For example, we can customise the graphic for ``myTask()`` to look like:
+
+ .. image:: ../../images/history_html_flowchart.png
+ :scale: 30
+
+ by adding the requisite attributes as follows:
+
+
+
+ .. code-block:: python
+
+
+ @graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ def Up_to_date_task2(infile, outfile):
+ pass
+
+ # Can use dictionary if you wish...
+ graphviz_params = {"URL":"http://cnn.com", "fontcolor": '"#FF00FF"'}
+ @graphviz(**graphviz_params)
+ def myTask(input,output):
+ pass
+
+ .. **
+
+
+ You can even using HTML formatting in task names, including specifying line wraps (as in the above example),
+ using the ``label`` parameter. However, HTML labels **must** be enclosed in ``<`` and ``>``.
+
+
+ .. code-block:: python
+
+ label = "<Line <BR/> wrapped task_name()>"
+
+ Otherwise, you can also opt to keep the task name and wrap it with a prefix and suffix:
+
+ .. code-block:: python
+
+ label_suffix = "??? ", label_prefix = ": What is this?"
+
+ The ``URL`` attribute allows the generation of clickable svg, and also client / server
+ side image maps usable in web pages.
+ See `Graphviz documentation <http://www.graphviz.org/content/output-formats#dimap>`__
+
+
+
diff --git a/doc/tutorials/new_tutorial/pipeline_printout_graph_code.rst b/doc/tutorials/new_tutorial/pipeline_printout_graph_code.rst
new file mode 100644
index 0000000..c836b83
--- /dev/null
+++ b/doc/tutorials/new_tutorial/pipeline_printout_graph_code.rst
@@ -0,0 +1,109 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.pipeline_printout_graph.code:
+
+############################################################################################################################################################################################################
+|new_manual.pipeline_printout_graph.chapter_num|: Python Code for Displaying the pipeline visually with :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>`
+############################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`pipeline_printout_graph(...) <pipeline_functions.pipeline_printout_graph>` syntax
+ * Back to |new_manual.pipeline_printout_graph.chapter_num|: :ref:`Displaying the pipeline visually <new_manual.pipeline_printout_graph>`
+
+************************************
+Code
+************************************
+ .. code-block:: python
+ :emphasize-lines: 28, 51
+ :linenos:
+
+ from ruffus import *
+ import sys
+
+ #---------------------------------------------------------------
+ # create initial files
+ #
+ @originate([ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'] ])
+ def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+ #---------------------------------------------------------------
+ # first task
+ @transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+ def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ # second task
+ @transform(first_task, suffix(".output.1"), ".output.2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+ # Print graph before running pipeline
+
+ #---------------------------------------------------------------
+ #
+ # Show flow chart and tasks before running the pipeline
+ #
+ print "Show flow chart and tasks before running the pipeline"
+ pipeline_printout_graph ( open("simple_tutorial_stage5_before.png", "w"),
+ "png",
+ [second_task],
+ minimal_key_legend=True)
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+ # modify job1.stage1
+ open("job1.a.output.1", "w").close()
+
+
+ # Print graph after everything apart from ``job1.a.output.1`` is update
+
+ #---------------------------------------------------------------
+ #
+ # Show flow chart and tasks after running the pipeline
+ #
+ print "Show flow chart and tasks after running the pipeline"
+ pipeline_printout_graph ( open("simple_tutorial_stage5_after.png", "w"),
+ "png",
+ [second_task],
+ no_key_legend=True)
+
+
+************************************
+Resulting Flowcharts
+************************************
+ +-------------------------------------------------------------+-----------------------------------------------------------------------+
+ | .. image:: ../../images/simple_tutorial_stage5_before.png | .. image:: ../../images/simple_tutorial_stage5_after.png |
+ | :alt: Before running the pipeline | :alt: After running the pipeline |
+ | :scale: 95 | :scale: 95 |
+ | :align: center | :align: center |
+ | | |
+ | .. centered:: Before | .. centered:: After |
+ | | |
+ +-------------------------------------------------------------+-----------------------------------------------------------------------+
+
+ +-------------------------------------------------------------------------------------------------------------------------------------+
+ | .. image:: ../../images/tutorial_key.png |
+ | :alt: Legend key |
+ | :scale: 50 |
+ | :align: center |
+ | |
+ | .. centered:: Legend |
+ | |
+ +-------------------------------------------------------------------------------------------------------------------------------------+
+
+
diff --git a/doc/tutorials/new_tutorial/posttask.rst b/doc/tutorials/new_tutorial/posttask.rst
new file mode 100644
index 0000000..a468a27
--- /dev/null
+++ b/doc/tutorials/new_tutorial/posttask.rst
@@ -0,0 +1,122 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: posttask; Tutorial
+
+.. _new_manual.posttask:
+
+####################################################################################################################################################
+|new_manual.posttask.chapter_num|: Signal the completion of each stage of our pipeline with :ref:`@posttask <decorators.posttask>`
+####################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@posttask <decorators.posttask>` syntax
+
+
+***********************
+Overview
+***********************
+
+
+
+ It is often useful to signal the completion of each task by specifying a specific
+ action to be taken or function to be called. This can range from
+ printing out some message, or `touching <http://en.wikipedia.org/wiki/Touch_(Unix)>`__ some sentinel file,
+ to emailing the author. This is particular useful if the :term:`task` is a recipe apply to an unspecified number
+ of parameters in parallel in different :term:`job`\ s. If the task is never run, or if it
+ fails, needless-to-say no task completion action will happen.
+
+
+ *Ruffus* uses the :ref:`@posttask <decorators.posttask>` decorator for this purpose.
+
+
+=================
+**@posttask**
+=================
+
+ We can signal the completion of each task by specifying
+ one or more function(s) using :ref:`@posttask <decorators.posttask>` ::
+
+ from ruffus import *
+
+ def task_finished():
+ print "hooray"
+
+ @posttask(task_finished)
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+ pipeline_run([create_if_necessary])
+
+
+ This is such a short function, we might as well write it in-line:
+
+ ::
+
+ @posttask(lambda: sys.stdout.write("hooray\n"))
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+
+.. note::
+
+ The function(s) provided to :ref:`@posttask <decorators.posttask>` will be called if the pipeline passes
+ through a task, even if none of its jobs are run because they are up-to-date.
+ This happens when a upstream task is out-of-date, and the execution passes through
+ this point in the pipeline. See the example in :ref:`new_manual.dependencies`
+ of this manual.
+
+
+.. index::
+ single: @posttask; touchfile (Manual)
+ single: touchfile ; @posttask (Manual)
+
+
+.. _new_manual.posttask.touch_file:
+
+============================================
+:ref:`touch_file<decorators.touch_file>`
+============================================
+
+ One way to note the completion of a task is to create some sort of
+ "flag" file. Each stage in a traditional ``make`` pipeline would contain a
+ ``touch completed.flag``.
+
+ This is such a useful idiom that *Ruffus* provides the shorthand :ref:`touch_file<decorators.touch_file>`:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @posttask(touch_file("task_completed.flag"))
+ @files(None, "a.1")
+ def create_if_necessary(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+=======================================
+Adding several post task actions
+=======================================
+ You can, of course, add more than one different action to be taken on completion of the
+ task, either by stacking up as many :ref:`@posttask<decorators.posttask>` decorators
+ as necessary, or by including several functions in the same :ref:`@posttask <decorators.posttask>`:
+
+ .. code-block:: python
+
+ from ruffus import *
+
+ @posttask(print_hooray, print_whoppee)
+ @posttask(print_hip_hip, touch_file("sentinel_flag"))
+ @originate("a.1")
+ def create_if_necessary(output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
diff --git a/doc/tutorials/new_tutorial/split.rst b/doc/tutorials/new_tutorial/split.rst
new file mode 100644
index 0000000..dc866b2
--- /dev/null
+++ b/doc/tutorials/new_tutorial/split.rst
@@ -0,0 +1,233 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: split; Tutorial
+
+.. _new_manual.split:
+
+######################################################################################################
+|new_manual.split.chapter_num|: Splitting up large tasks / files with **@split**
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@split <decorators.split>` syntax
+ * :ref:`Example code for this chapter <new_manual.split.code>`
+
+
+**************************************************************************************
+Overview
+**************************************************************************************
+
+ A common requirement in computational pipelines is to split up a large task into
+ small jobs which can be run on different processors, (or sent to a computational
+ cluster). Very often, the number of jobs depends dynamically on the size of the
+ task, and cannot be known beforehand.
+
+ *Ruffus* uses the :ref:`@split <decorators.split>` decorator to indicate that
+ the :term:`task` function will produce an indeterminate number of independent *Outputs* from a single *Input*.
+
+**************************************************************************************
+Example: Calculate variance for a large list of numbers in parallel
+**************************************************************************************
+
+ Suppose we wanted to calculate the `variance <http://en.wikipedia.org/wiki/Variance>`__ for
+ 100,000 numbers, how can we parallelise the calculation so that we can get an answer as
+ speedily as possible?
+
+ We need to
+
+ * break down the problem into manageable chunks
+ * solve these in parallel, possibly on a computational cluster and then
+ * merge the partial solutions back together for a final result.
+
+
+ To complicate things, we usually do not want to hard-code the number of parallel chunks beforehand.
+ The degree of parallelism is often only apparent as we process our data.
+
+ **Ruffus** was designed to solve such problems which are common, for example, in bioinformatics and genomics.
+
+ A flowchart for our variance problem might look like this:
+
+ .. image:: ../../images/manual_split_merge_example.jpg
+ :scale: 30
+
+ (In this toy example, we create our own starting data in ``create_random_numbers()``.)
+
+
+**************************************************************************************
+Output files for :ref:`@split <decorators.split>`
+**************************************************************************************
+
+
+ The *Ruffus* decorator :ref:`@split<decorators.split>` is designed specifically with this run-time flexibility in mind:
+
+
+ .. code-block:: python
+
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ pass
+
+
+ This will split the incoming ``input_file_names`` into ``NNN`` number of *outputs* where ``NNN`` is not predetermined:
+
+ The *output* (second) parameter of :ref:`@split<decorators.split>` often contains a |glob|_ pattern like the ``*.chunks`` above.
+
+ Only **after** the task function has completed, will Ruffus match the **Output** parameter (``*.chunks``)
+ against the files which have been created by ``split_problem()`` (e.g. ``1.chunks``, ``2.chunks``, ``3.chunks``)
+
+**************************************************************************************
+Be careful in specifying **Output** globs
+**************************************************************************************
+
+ Note that it is your responsibility to keep the **Output** specification tight enough so that Ruffus does not
+ pick up extraneous files.
+
+ You can specify multiple |glob|_ patterns to match *all* the files which are the
+ result of the splitting task function. These can even cover different directories,
+ or groups of file names. This is a more extreme example:
+
+ ::
+
+ @split("input.file", ['a*.bits', 'b*.pieces', 'somewhere_else/c*.stuff'])
+ def split_function (input_filename, output_files):
+ "Code to split up 'input.file'"
+
+**************************************************************************************
+Clean up previous pipeline runs
+**************************************************************************************
+
+ Problem arise when the current directory contains results of previous pipeline runs.
+
+ * For example, if the previous analysis involved a large data set, there might be 3 chunks: ``1.chunks``, ``2.chunks``, ``3.chunks``.
+ * In the current analysis, there might be a smaller data set which divides into only 2 chunks, ``1.chunks`` and ``2.chunks``.
+ * Unfortunately, ``3.chunks`` from the previous run is still hanging around and will be included erroneously by the glob ``*.chunks``.
+
+
+ .. warning::
+
+ **Your first duty in** :ref:`@split <decorators.split>` **tasks functions should be to clean up**
+
+ To help you clean up thoroughly, Ruffus initialises the **output** parameter to all files which match specification.
+
+ The first order of business is thus invariably to cleanup ( delete with ``os.unlink``) all files in **Output**.
+
+ .. code-block:: python
+ :emphasize-lines: 11
+
+ #---------------------------------------------------------------
+ #
+ # split initial file
+ #
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ """
+ splits random numbers file into xxx files of chunk_size each
+ """
+ #
+ # clean up any files from previous runs
+ #
+ #for ff in glob.glob("*.chunks"):
+ for ff in input_file_names:
+ os.unlink(ff)
+
+ (The first time you run the example code, ``*.chunks`` will initialise ``output_files`` to an empty list. )
+
+.. _new_manual.split.one_to_many:
+
+**************************************************************************************
+1 to many
+**************************************************************************************
+
+ :ref:`@split <decorators.split>` is a one to many operator because its
+ outputs are a list of *independent* items.
+
+ If :ref:`@split <decorators.split>` generates 5 files, then this will lead to 5 jobs downstream.
+
+ This means we can just connect our old friend :ref:`@transform <decorators.transform>` to our pipeline
+ and the results of :ref:`@split <decorators.split>` will be analysed in parallel. This code should look
+ familiar:
+
+ .. code-block:: python
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(split_problem, suffix(".chunks"), ".sums")
+ def sum_of_squares (input_file_name, output_file_name):
+ pass
+
+
+ Which results in output like this:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [[random_numbers.list] -> *.chunks] completed
+ Completed Task = split_problem
+ Job = [1.chunks -> 1.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [6.chunks -> 6.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Completed Task = sum_of_squares
+
+ Have a look at the :ref:`Example code for this chapter <new_manual.split.code>`
+
+.. _new_manual.split.nothing_to_many:
+
+**************************************************************************************
+Nothing to many
+**************************************************************************************
+
+
+ Normally we would use :ref:`@originate <new_manual.originate>` to create files from
+ scratch, for example at the beginning of the pipeline.
+
+ However, sometimes, it is not possible to determine ahead of time how many files you
+ will be creating from scratch. :ref:`@split<decorators.split>` can also be useful even in such cases:
+
+ .. code-block:: python
+ :emphasize-lines: 6
+
+ from random import randint
+ from ruffus import *
+ import os
+
+ # Create between 2 and 5 files
+ @split(None, "*.start")
+ def create_initial_files(no_input_file, output_files):
+ # cleanup first
+ for oo in output_files:
+ os.unlink(oo)
+ # make new files
+ for ii in range(randint(2,5)):
+ open("%d.start" % ii, "w")
+
+ @transform(create_initial_files, suffix(".start"), ".processed")
+ def process_files(input_file, output_file):
+ open(output_file, "w")
+
+ pipeline_run()
+
+ Giving:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [None -> *.start] completed
+ Completed Task = create_initial_files
+ Job = [0.start -> 0.processed] completed
+ Job = [1.start -> 1.processed] completed
+ Completed Task = process_files
+
diff --git a/doc/tutorials/new_tutorial/split_code.rst b/doc/tutorials/new_tutorial/split_code.rst
new file mode 100644
index 0000000..eb4e725
--- /dev/null
+++ b/doc/tutorials/new_tutorial/split_code.rst
@@ -0,0 +1,115 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.split.code:
+
+##############################################################################################################
+|new_manual.split.chapter_num|: Python Code for Splitting up large tasks / files with **@split**
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@split syntax in detail <decorators.split>`
+ * Back to |new_manual.split.chapter_num|: :ref:`Splitting up large tasks / files with @split <new_manual.split>`
+
+*******************************************
+Splitting large jobs
+*******************************************
+
+ ::
+
+ from ruffus import *
+
+ NUMBER_OF_RANDOMS = 10000
+ CHUNK_SIZE = 1000
+
+
+ import random, os, glob
+
+ #---------------------------------------------------------------
+ #
+ # Create random numbers
+ #
+ @originate("random_numbers.list")
+ def create_random_numbers(output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+ #---------------------------------------------------------------
+ #
+ # split initial file
+ #
+ @split(create_random_numbers, "*.chunks")
+ def split_problem (input_file_names, output_files):
+ """
+ splits random numbers file into xxx files of chunk_size each
+ """
+ #
+ # clean up any files from previous runs
+ #
+ #for ff in glob.glob("*.chunks"):
+ for ff in input_file_names:
+ os.unlink(ff)
+ #
+ #
+ # create new file every chunk_size lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for input_file_name in input_file_names:
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open("%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+ #---------------------------------------------------------------
+ #
+ # Calculate sum and sum of squares for each chunk file
+ #
+ @transform(split_problem, suffix(".chunks"), ".sums")
+ def sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run()
+
+
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [None -> random_numbers.list] completed
+ Completed Task = create_random_numbers
+ Job = [[random_numbers.list] -> *.chunks] completed
+ Completed Task = split_problem
+ Job = [1.chunks -> 1.sums] completed
+ Job = [10.chunks -> 10.sums] completed
+ Job = [2.chunks -> 2.sums] completed
+ Job = [3.chunks -> 3.sums] completed
+ Job = [4.chunks -> 4.sums] completed
+ Job = [5.chunks -> 5.sums] completed
+ Job = [6.chunks -> 6.sums] completed
+ Job = [7.chunks -> 7.sums] completed
+ Job = [8.chunks -> 8.sums] completed
+ Job = [9.chunks -> 9.sums] completed
+ Completed Task = sum_of_squares
+
diff --git a/doc/tutorials/new_tutorial/subdivide_collate.rst b/doc/tutorials/new_tutorial/subdivide_collate.rst
new file mode 100644
index 0000000..ae913b9
--- /dev/null
+++ b/doc/tutorials/new_tutorial/subdivide_collate.rst
@@ -0,0 +1,234 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: @subdivide; Tutorial
+ pair: @collate; Tutorial
+
+.. _new_manual.subdivide_collate:
+
+############################################################################################################################################################################
+|new_manual.subdivide_collate.chapter_num|: :ref:`@subdivide <decorators.subdivide>` tasks to run efficiently and regroup with :ref:`@collate <decorators.collate>`
+############################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@subdivide <decorators.subdivide>` syntax
+ * :ref:`@collate <decorators.collate>` syntax
+
+
+***********************
+Overview
+***********************
+
+ In |new_manual.split.chapter_num| and |new_manual.merge.chapter_num|, we saw how a large
+ task can be :ref:`@split <new_manual.split>` into small jobs to be analysed efficiently
+ in parallel. Ruffus can then :ref:`@merge <new_manual.split>` these back together
+ to give a single, unified result.
+
+ This assumes that your pipeline is processing one item at a time. Usually, however, we
+ will have, for example, 10 large pieces of data in play, each of which has to be
+ subdivided into smaller pieces for analysis before being put back together.
+
+ This is the role of :ref:`@subdivide <decorators.subdivide>` and :ref:`@subdivide <decorators.collate>`.
+
+ Like :ref:`@split <decorators.split>`, the number of output files
+ :ref:`@subdivide <decorators.subdivide>` produces for *each* **Input** is not predetermined.
+
+ On the other hand, these output files should be named in such a way that they can
+ later be grouped back together later using :ref:`@subdivide <decorators.collate>`.
+
+ This will be clearer with some worked examples.
+
+
+.. _new_manual.subdivide:
+
+*********************************************************************
+:ref:`@subdivide <decorators.subdivide>` in parallel
+*********************************************************************
+
+ Let us start from 3 files with varying number of lines. We wish to process these two
+ lines at a time but we do not know ahead of time how long each file is:
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ from ruffus import *
+ import os, random, sys
+
+ # Create files a random number of lines
+ @originate(["a.start",
+ "b.start",
+ "c.start"])
+ def create_test_files(output_file):
+ cnt_lines = random.randint(1,3) * 2
+ with open(output_file, "w") as oo:
+ for ii in range(cnt_lines):
+ oo.write("data item = %d\n" % ii)
+ print " %s has %d lines" % (output_file, cnt_lines)
+
+
+ #
+ # subdivide the input files into NNN fragment files of 2 lines each
+ #
+ @subdivide( create_test_files,
+ formatter(),
+ "{path[0]}/{basename[0]}.*.fragment",
+ "{path[0]}/{basename[0]}")
+ def subdivide_files(input_file, output_files, output_file_name_stem):
+ #
+ # cleanup any previous results
+ #
+ for oo in output_files:
+ os.unlink(oo)
+ #
+ # Output files contain two lines each
+ # (new output files every even line)
+ #
+ cnt_output_files = 0
+ for ii, line in enumerate(open(input_file)):
+ if ii % 2 == 0:
+ cnt_output_files += 1
+ output_file_name = "%s.%d.fragment" % (output_file_name_stem, cnt_output_files)
+ output_file = open(output_file_name, "w")
+ print " Subdivide %s -> %s" % (input_file, output_file_name)
+ output_file.write(line)
+
+
+ #
+ # Analyse each fragment independently
+ #
+ @transform(subdivide_files, suffix(".fragment"), ".analysed")
+ def analyse_fragments(input_file, output_file):
+ print " Analysing %s -> %s" % (input_file, output_file)
+ with open(output_file, "w") as oo:
+ for line in open(input_file):
+ oo.write("analysed " + line)
+
+
+ This produces the following output:
+
+ .. code-block:: pycon
+ :emphasize-lines: 8,20,36
+
+ >>> pipeline_run(verbose = 1)
+ a.start has 2 lines
+ Job = [None -> a.start] completed
+ b.start has 6 lines
+ Job = [None -> b.start] completed
+ c.start has 6 lines
+ Job = [None -> c.start] completed
+ Completed Task = create_test_files
+
+ Subdivide a.start -> /home/lg/temp/a.1.fragment
+ Job = [a.start -> a.*.fragment, a] completed
+
+ Subdivide b.start -> /home/lg/temp/b.1.fragment
+ Subdivide b.start -> /home/lg/temp/b.2.fragment
+ Subdivide b.start -> /home/lg/temp/b.3.fragment
+ Job = [b.start -> b.*.fragment, b] completed
+
+ Subdivide c.start -> /home/lg/temp/c.1.fragment
+ Subdivide c.start -> /home/lg/temp/c.2.fragment
+ Subdivide c.start -> /home/lg/temp/c.3.fragment
+ Job = [c.start -> c.*.fragment, c] completed
+
+ Completed Task = subdivide_files
+
+ Analysing /home/lg/temp/a.1.fragment -> /home/lg/temp/a.1.analysed
+ Job = [a.1.fragment -> a.1.analysed] completed
+ Analysing /home/lg/temp/b.1.fragment -> /home/lg/temp/b.1.analysed
+ Job = [b.1.fragment -> b.1.analysed] completed
+
+ [ ...SEE EXAMPLE CODE FOR MORE LINES ...]
+
+ Completed Task = analyse_fragments
+
+
+ ``a.start`` has two lines and results in a single ``.fragment`` file,
+ while there are 3 ``b.*.fragment`` files because it has 6 lines.
+ Whatever their origin, all of the different fragment files are treated equally
+ in ``analyse_fragments()`` and processed (in parallel) in the same way.
+
+
+
+.. _new_manual.collate:
+
+*********************************************************************
+Grouping using :ref:`@collate <decorators.collate>`
+*********************************************************************
+
+ All that is left in our example is to reassemble the analysed fragments back together into
+ 3 sets of results corresponding to the original 3 pieces of starting data.
+
+ This is straightforward by eye: the file names all have the same pattern: ``[abc].*.analysed``:
+
+ ::
+
+ a.1.analysed -> a.final_result
+ b.1.analysed -> b.final_result
+ b.2.analysed -> ..
+ b.3.analysed -> ..
+ c.1.analysed -> c.final_result
+ c.2.analysed -> ..
+
+ :ref:`@collate <decorators.collate>` does something similar:
+
+ #. Specify a string substitution e.g. ``c.??.analysed -> c.final_result`` and
+ #. Ask *ruffus* to group together any **Input** (e.g. ``c.1.analysed``, ``c.2.analysed``)
+ that will result in the same **Output** (e.g. ``c.final_result``)
+
+
+ .. code-block:: python
+ :emphasize-lines: 3,5
+
+ #
+ # ``XXX.??.analysed -> XXX.final_result``
+ # Group results using original names
+ #
+ @collate( analyse_fragments,
+
+ # split file name into [abc].NUMBER.analysed
+ formatter("/(?P<NAME>[abc]+)\.\d+\.analysed$"),
+
+ "{path[0]}/{NAME[0]}.final_result")
+ def recombine_analyses(input_file_names, output_file):
+ with open(output_file, "w") as oo:
+ for input_file in input_file_names:
+ print " Recombine %s -> %s" % (input_file, output_file)
+ for line in open(input_file):
+ oo.write(line)
+
+ This produces the following output:
+
+ .. code-block:: pycon
+ :emphasize-lines: 11
+
+ Recombine /home/lg/temp/a.1.analysed -> /home/lg/temp/a.final_result
+ Job = [[a.1.analysed] -> a.final_result] completed
+ Recombine /home/lg/temp/b.1.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.2.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.3.analysed -> /home/lg/temp/b.final_result
+ Job = [[b.1.analysed, b.2.analysed, b.3.analysed] -> b.final_result] completed
+ Recombine /home/lg/temp/c.1.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.2.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.3.analysed -> /home/lg/temp/c.final_result
+ Job = [[c.1.analysed, c.2.analysed, c.3.analysed] -> c.final_result] completed
+ Completed Task = recombine_analyses
+
+
+ .. warning::
+
+ * **Input** file names are grouped together not in a guaranteed order.
+
+ For example, the fragment files may not be sent to ``recombine_analyses(input_file_names, ...)``
+ in alphabetically or any other useful order.
+
+ You may want to sort **Input** before concatenation.
+
+ * All **Input** are grouped together if they have both the same **Output** *and* **Extra**
+ parameters. If any string substitution is specified in any of the other **Extra** parameters
+ to :ref:`@subdivide <decorators.subdivide>`, they must give the same answers for **Input**
+ in the same group.
diff --git a/doc/tutorials/new_tutorial/subdivide_collate_code.rst b/doc/tutorials/new_tutorial/subdivide_collate_code.rst
new file mode 100644
index 0000000..3160181
--- /dev/null
+++ b/doc/tutorials/new_tutorial/subdivide_collate_code.rst
@@ -0,0 +1,155 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.subdivide_collate.code:
+
+#############################################################################################################################################################################################################################################################################################################################
+|new_manual.subdivide_collate.chapter_num|: Python Code for :ref:`@subdivide <decorators.subdivide>` tasks to run efficiently and regroup with :ref:`@collate <decorators.collate>`
+#############################################################################################################################################################################################################################################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@jobs_limit <decorators.jobs_limit>` syntax
+ * :ref:`pipeline_run() <pipeline_functions.pipeline_run>` syntax
+ * :ref:`drmaa_wrapper.run_job() <drmaa_wrapper.run_job>` syntax
+ * Back to |new_manual.subdivide_collate.chapter_num|: :ref:`:ref:`@subdivide tasks to run efficiently and regroup with @collate <new_manual.subdivide_collate>`
+
+*****************************************************************************************************************************
+:ref:`@subdivide <decorators.subdivide>` and regroup with :ref:`@collate <decorators.collate>` example
+*****************************************************************************************************************************
+
+ .. code-block:: python
+ :emphasize-lines: 17
+
+ from ruffus import *
+ import os, random, sys
+
+ # Create files a random number of lines
+ @originate(["a.start",
+ "b.start",
+ "c.start"])
+ def create_test_files(output_file):
+ cnt_lines = random.randint(1,3) * 2
+ with open(output_file, "w") as oo:
+ for ii in range(cnt_lines):
+ oo.write("data item = %d\n" % ii)
+ print " %s has %d lines" % (output_file, cnt_lines)
+
+
+ #
+ # subdivide the input files into NNN fragment files of 2 lines each
+ #
+ @subdivide( create_test_files,
+ formatter(),
+ "{path[0]}/{basename[0]}.*.fragment",
+ "{path[0]}/{basename[0]}")
+ def subdivide_files(input_file, output_files, output_file_name_stem):
+ #
+ # cleanup any previous results
+ #
+ for oo in output_files:
+ os.unlink(oo)
+ #
+ # Output files contain two lines each
+ # (new output files every even line)
+ #
+ cnt_output_files = 0
+ for ii, line in enumerate(open(input_file)):
+ if ii % 2 == 0:
+ cnt_output_files += 1
+ output_file_name = "%s.%d.fragment" % (output_file_name_stem, cnt_output_files)
+ output_file = open(output_file_name, "w")
+ print " Subdivide %s -> %s" % (input_file, output_file_name)
+ output_file.write(line)
+
+
+ #
+ # Analyse each fragment independently
+ #
+ @transform(subdivide_files, suffix(".fragment"), ".analysed")
+ def analyse_fragments(input_file, output_file):
+ print " Analysing %s -> %s" % (input_file, output_file)
+ with open(output_file, "w") as oo:
+ for line in open(input_file):
+ oo.write("analysed " + line)
+
+
+ #
+ # Group results using original names
+ #
+ @collate( analyse_fragments,
+
+ # split file name into [abc].NUMBER.analysed
+ formatter("/(?P<NAME>[abc]+)\.\d+\.analysed$"),
+
+ "{path[0]}/{NAME[0]}.final_result")
+ def recombine_analyses(input_file_names, output_file):
+ with open(output_file, "w") as oo:
+ for input_file in input_file_names:
+ print " Recombine %s -> %s" % (input_file, output_file)
+ for line in open(input_file):
+ oo.write(line)
+
+
+
+
+ #pipeline_printout(sys.stdout, verbose = 3)
+
+
+ pipeline_run(verbose = 1)
+
+ Results in
+
+ .. code-block:: pycon
+
+ >>> pipeline_run(verbose = 1)
+
+ a.start has 2 lines
+ Job = [None -> a.start] completed
+ b.start has 6 lines
+ Job = [None -> b.start] completed
+ c.start has 6 lines
+ Job = [None -> c.start] completed
+ Completed Task = create_test_files
+
+ Subdivide a.start -> /home/lg/temp/a.1.fragment
+ Job = [a.start -> a.*.fragment, a] completed
+ Subdivide b.start -> /home/lg/temp/b.1.fragment
+ Subdivide b.start -> /home/lg/temp/b.2.fragment
+ Subdivide b.start -> /home/lg/temp/b.3.fragment
+ Job = [b.start -> b.*.fragment, b] completed
+ Subdivide c.start -> /home/lg/temp/c.1.fragment
+ Subdivide c.start -> /home/lg/temp/c.2.fragment
+ Subdivide c.start -> /home/lg/temp/c.3.fragment
+ Job = [c.start -> c.*.fragment, c] completed
+ Completed Task = subdivide_files
+
+ Analysing /home/lg/temp/a.1.fragment -> /home/lg/temp/a.1.analysed
+ Job = [a.1.fragment -> a.1.analysed] completed
+ Analysing /home/lg/temp/b.1.fragment -> /home/lg/temp/b.1.analysed
+ Job = [b.1.fragment -> b.1.analysed] completed
+ Analysing /home/lg/temp/b.2.fragment -> /home/lg/temp/b.2.analysed
+ Job = [b.2.fragment -> b.2.analysed] completed
+ Analysing /home/lg/temp/b.3.fragment -> /home/lg/temp/b.3.analysed
+ Job = [b.3.fragment -> b.3.analysed] completed
+ Analysing /home/lg/temp/c.1.fragment -> /home/lg/temp/c.1.analysed
+ Job = [c.1.fragment -> c.1.analysed] completed
+ Analysing /home/lg/temp/c.2.fragment -> /home/lg/temp/c.2.analysed
+ Job = [c.2.fragment -> c.2.analysed] completed
+ Analysing /home/lg/temp/c.3.fragment -> /home/lg/temp/c.3.analysed
+ Job = [c.3.fragment -> c.3.analysed] completed
+ Completed Task = analyse_fragments
+
+ Recombine /home/lg/temp/a.1.analysed -> /home/lg/temp/a.final_result
+ Job = [[a.1.analysed] -> a.final_result] completed
+ Recombine /home/lg/temp/b.1.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.2.analysed -> /home/lg/temp/b.final_result
+ Recombine /home/lg/temp/b.3.analysed -> /home/lg/temp/b.final_result
+ Job = [[b.1.analysed, b.2.analysed, b.3.analysed] -> b.final_result] completed
+ Recombine /home/lg/temp/c.1.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.2.analysed -> /home/lg/temp/c.final_result
+ Recombine /home/lg/temp/c.3.analysed -> /home/lg/temp/c.final_result
+ Job = [[c.1.analysed, c.2.analysed, c.3.analysed] -> c.final_result] completed
+ Completed Task = recombine_analyses
+
diff --git a/doc/tutorials/new_tutorial/transform.rst b/doc/tutorials/new_tutorial/transform.rst
new file mode 100644
index 0000000..9e743c0
--- /dev/null
+++ b/doc/tutorials/new_tutorial/transform.rst
@@ -0,0 +1,194 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: transform; Tutorial
+
+.. _new_manual.transform:
+
+############################################################################################################################################################################################################
+|new_manual.transform.chapter_num|: Transforming data in a pipeline with :ref:`@transform <decorators.transform>`
+############################################################################################################################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform <decorators.transform>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.transform.code`
+
+
+***************************************
+Review
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ Ruffus automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+ and tell Ruffus how these pipeline stages or :term:`task` functions are connected together.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **write down the file names of the data as it flows across your pipeline**
+ * **write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+
+************************************
+Task functions as recipes
+************************************
+
+ Each :term:`task` function of the pipeline is a recipe or
+ `rule <http://www.gnu.org/software/make/manual/make.html#Rule-Introduction>`_
+ which can be applied repeatedly to our data.
+
+ For example, one can have
+
+ * a ``compile()`` *task* which will compile any number of source code files, or
+ * a ``count_lines()`` *task* which will count the number of lines in any file or
+ * an ``align_dna()`` *task* which will align the DNA of many chromosomes.
+
+
+.. index::
+ pair: one to one @transform; Tutorial
+
+******************************************************************************
+:ref:`@transform <decorators.transform>` is a 1 to 1 operation
+******************************************************************************
+
+
+ ``@transform`` is a 1:1 operation because for each input, it generates one output.
+
+ .. image:: ../../images/transform_1_to_1_example.png
+ :scale: 50
+
+
+ This is obvious when you count the number of jobs at each step. In our example pipeline, there are always
+ three jobs moving through in step at each stage (:term:`task`).
+
+ Each **Input** or **Output** is not limited, however, to a single filename. Each job can accept, for example,
+ a pair of files as its **Input**, or generate more than one file or a dictionary or numbers as its **Output**.
+
+ When each job outputs a pair of files, this does not generate two jobs downstream. It just means that the successive
+ :term:`task` in the pipeline will receive a list or tuple of files as its input parameter.
+
+ .. note::
+
+ The different sort of decorators in Ruffus determine the *topology* of your pipeline,
+ i.e. how the jobs from different tasks are linked together seamlessly.
+
+ :ref:`@transform <decorators.transform>` always generates one **Output** for one **Input**.
+
+ In the later parts of the tutorial, we will encounter more decorators which can *split up*, or *join together* or *group* inputs.
+
+ In other words, using other decorators **Input** and **Output** can have **many to one**, **many to many** etc. relationships.
+
+=======================================
+A pair of files as the **Input**
+=======================================
+
+ Let us rewrite our previous example so that the **Input** of the first task
+ are `matching pairs <http://en.wikipedia.org/wiki/DNA_sequencing_theory#Pairwise_end-sequencing>`__
+ of DNA sequence files, processed in tandem.
+
+
+ .. code-block:: python
+ :emphasize-lines: 6-8,17-19,29-31
+
+ from ruffus import *
+
+ starting_files = [("a.1.fastq", "a.2.fastq"),
+ ("a.1.fastq", "a.2.fastq"),
+ ("a.1.fastq", "a.2.fastq")]
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".1.fastq"), # suffix = .1.fastq
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_files,
+ output_file):
+ # remember there are two input files now
+ ii1 = open(input_files[0])
+ ii2 = open(input_files[1])
+ oo = open(output_file, "w")
+
+
+ The only changes are to the first task:
+
+ .. code-block:: pycon
+
+ pipeline_run()
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Completed Task = map_dna_sequence
+
+
+ :ref:`suffix <decorators.suffix>` always matches only the first file name in each **Input**.
+
+.. index::
+ pair: input / output parameters; Tutorial
+
+************************************
+**Input** and **Output** parameters
+************************************
+
+ **Ruffus** chains together different tasks by taking the **Output** from one job
+ and plugging it automatically as the **Input** of the next.
+
+ The first two parameters of each job are the **Input** and **Output** parameters respectively.
+
+ In the above example, we have:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run()
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
+
+
+ .. table:: Parameters for ``summarise_bam_file()``
+
+ ================ ==================== =============================================
+ **Inputs** **Outputs** **Extra**
+ ================ ==================== =============================================
+ ``"a.bam"`` ``"a.statistics"`` ``"use_linear_model"``
+ ``"b.bam"`` ``"b.statistics"`` ``"use_linear_model"``
+ ``"c.bam"`` ``"c.statistics"`` ``"use_linear_model"``
+ ================ ==================== =============================================
+
+
+
+ **Extra** parameters are for the consumption of ``summarise_bam_file()`` and will not passed to the next task.
+
+ Ruffus was designed for pipelines which save intermediate data in files. This is not
+ compulsory but saving your data in files at each step provides many advantages:
+
+ #. Ruffus can use file system time stamps to check if your pipeline is up to date
+ #. Your data is persistent across runs
+ #. This is a good way to pass large amounts of data across processes and computational nodes
+
+ Nevertheless, *all* the :term:`task` parameters can include anything which suits your workflow, from lists of files, to numbers,
+ sets or tuples. *Ruffus* imposes few constraints on what *you*
+ would like to send to each stage of your pipeline.
+
+
+ *Ruffus* does, however, assume that if the **Input** and **Output** parameter contains strings, these will be interpreted as file names
+ required by and produced by that job. As we shall see, the modification times of these file names
+ indicate whether that part of the pipeline is up to date or needs to be rerun.
+
+
+
diff --git a/doc/tutorials/new_tutorial/transform_code.rst b/doc/tutorials/new_tutorial/transform_code.rst
new file mode 100644
index 0000000..2f2457d
--- /dev/null
+++ b/doc/tutorials/new_tutorial/transform_code.rst
@@ -0,0 +1,99 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.transform.code:
+
+##############################################################################################################
+|new_manual.introduction.chapter_num|: Python Code for Transforming data in a pipeline with ``@transform``
+##############################################################################################################
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.transform.chapter_num|: :ref:`Transforming data in a pipeline with @transform <new_manual.transform>`
+
+*******************************************
+Your first Ruffus script
+*******************************************
+
+ .. code-block:: python
+
+ #
+ # The starting data files would normally exist beforehand!
+ # We create some empty files for this example
+ #
+ starting_files = [("a.1.fastq", "a.2.fastq"),
+ ("b.1.fastq", "b.2.fastq"),
+ ("c.1.fastq", "c.2.fastq")]
+
+
+ for ff_pair in starting_files:
+ open(ff_pair[0], "w")
+ open(ff_pair[1], "w")
+
+
+ from ruffus import *
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform(starting_files, # Input = starting files
+ suffix(".1.fastq"), # suffix = .1.fastq
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_files,
+ output_file):
+ # remember there are two input files now
+ ii1 = open(input_files[0])
+ ii2 = open(input_files[1])
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 2 sam->bam
+ #
+ @transform(map_dna_sequence, # Input = previous stage
+ suffix(".sam"), # suffix = .sam
+ ".bam") # Output suffix = .bam
+ def compress_sam_file(input_file,
+ output_file):
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ #
+ # STAGE 3 bam->statistics
+ #
+ @transform(compress_sam_file, # Input = previous stage
+ suffix(".bam"), # suffix = .bam
+ ".statistics", # Output suffix = .statistics
+ "use_linear_model") # Extra statistics parameter
+ def summarise_bam_file(input_file,
+ output_file,
+ extra_stats_parameter):
+ """
+ Sketch of real analysis function
+ """
+ ii = open(input_file)
+ oo = open(output_file, "w")
+
+ pipeline_run()
+
+
+************************************
+Resulting Output
+************************************
+ ::
+
+ >>> pipeline_run()
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Job = [[b.1.fastq, b.2.fastq] -> b.sam] completed
+ Job = [[c.1.fastq, c.2.fastq] -> c.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [a.sam -> a.bam] completed
+ Job = [b.sam -> b.bam] completed
+ Job = [c.sam -> c.bam] completed
+ Completed Task = compress_sam_file
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Job = [b.bam -> b.statistics, use_linear_model] completed
+ Job = [c.bam -> c.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
diff --git a/doc/tutorials/new_tutorial/transform_in_parallel.rst b/doc/tutorials/new_tutorial/transform_in_parallel.rst
new file mode 100644
index 0000000..57d0337
--- /dev/null
+++ b/doc/tutorials/new_tutorial/transform_in_parallel.rst
@@ -0,0 +1,394 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. index::
+ pair: transforming in parallel; Tutorial
+
+.. _new_manual.transform_in_parallel:
+
+######################################################################################################
+|new_manual.transform_in_parallel.chapter_num|: More on ``@transform``-ing data
+######################################################################################################
+
+
+.. seealso::
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform <decorators.transform>` syntax
+
+.. note::
+
+ Remember to look at the example code:
+
+ * :ref:`new_manual.transform_in_parallel.code`
+
+***************************************
+Review
+***************************************
+ .. image:: ../../images/theoretical_pipeline_schematic.png
+ :scale: 50
+
+ Computational pipelines transform your data in stages until the final result is produced.
+ *Ruffus* automates the plumbing in your pipeline. You supply the python functions which perform the data transformation,
+ and tell *Ruffus* how these pipeline stages or :term:`task` functions are connected together.
+
+ .. note::
+
+ **The best way to design a pipeline is to:**
+
+ * **write down the file names of the data as it flows across your pipeline**
+ * **write down the names of functions which transforms the data at each stage of the pipeline.**
+
+
+ :ref:`new_manual.introduction` described the bare bones of a simple *Ruffus* pipeline.
+
+ Using the *Ruffus* :ref:`@transform <decorators.transform>` decorator, we were able to
+ specify the data files moving through our pipeline so that our specified task functions
+ could be invoked.
+
+ This may seem like a lot of effort and complication for something so simple: a couple of
+ simple python function calls we could have invoked ourselves.
+ However, By letting *Ruffus* manage your pipeline parameters, you will get the following features
+ for free:
+
+ #. Only out-of-date parts of the pipeline will be re-run
+ #. Multiple jobs can be run in parallel (on different processors if possible)
+ #. Pipeline stages can be chained together automatically. This means you can apply your
+ pipeline just as easily to 1000 files as to 3.
+
+
+
+************************************
+Running pipelines in parallel
+************************************
+ Even though three sets of files have been specified for our initial pipeline, and they can be
+ processed completely independently, by default *Ruffus* runs each of them serially in succession.
+
+ To ask *Ruffus* to run them in parallel, all you have to do is to add a ``multiprocess`` parameter to ``pipeline_run``:
+
+ ::
+
+ >>> pipeline_run(multiprocess = 5)
+
+ In this case, we are telling *Ruffus* to run a maximum of 5 jobs at the same time. Since we only have
+ three sets of data, that is as much parallelism as we are going to get...
+
+
+
+.. _new_manual.only_rerun_out_of_date:
+
+**************************************************
+Up-to-date jobs are not re-run unnecessarily
+**************************************************
+
+ A job will be run only if the output file timestamps are out of date.
+ If you ran our example code a second time, nothing would happen because all the work is already complete.
+
+ We can check the details by asking *Ruffus* for more ``verbose`` output
+
+ ::
+
+ >>> pipeline_run(verbose = 4)
+ Task = map_dna_sequence
+ All jobs up to date
+ Task = compress_sam_file
+ All jobs up to date
+ Task = summarise_bam_file
+ All jobs up to date
+
+
+ Nothing happens because:
+ * ``a.sam`` was created later than ``a.1.fastq`` and ``a.2.fastq``, and
+ * ``a.bam`` was created later than ``a.sam`` and
+ * ``a.statistics`` was created later than ``a.bam``.
+
+ and so on...
+
+
+ Let us see what happens if we recreated the file ``a.1.fastq`` so that it appears as if 1 out of the original data files is out of date
+ ::
+
+ open("a.1.fastq", "w")
+ pipeline_run(multiprocess = 5)
+
+
+ The up to date jobs are cleverly ignored and only the out of date files are reprocessed.
+
+ .. code-block:: pycon
+ :emphasize-lines: 3,4,7,8,11,12
+
+ >>> open("a.1.fastq", "w")
+ >>> pipeline_run(verbose=2)
+ Job = [[b.1.fastq, b.2.fastq] -> b.sam] # unnecessary: already up to date
+ Job = [[c.1.fastq, c.2.fastq] -> c.sam] # unnecessary: already up to date
+ Job = [[a.1.fastq, a.2.fastq] -> a.sam] completed
+ Completed Task = map_dna_sequence
+ Job = [b.sam -> b.bam] # unnecessary: already up to date
+ Job = [c.sam -> c.bam] # unnecessary: already up to date
+ Job = [a.sam -> a.bam] completed
+ Completed Task = compress_sam_file
+ Job = [b.bam -> b.statistics, use_linear_model] # unnecessary: already up to date
+ Job = [c.bam -> c.statistics, use_linear_model] # unnecessary: already up to date
+ Job = [a.bam -> a.statistics, use_linear_model] completed
+ Completed Task = summarise_bam_file
+
+
+
+
+.. index::
+ pair: output_from; referring to functions before they are defined
+ pair: output_from; defining tasks out of order
+
+.. _new_manual.output_from:
+
+***************************************
+Defining pipeline tasks out of order
+***************************************
+
+ The examples so far assumes that all your pipelined tasks are defined in order.
+ (``first_task`` before ``second_task``). This is usually the most sensible way to arrange your code.
+
+ If you wish to refer to tasks which are not yet defined, you can do so by quoting the function name as a string and wrapping
+ it with the :ref:`indicator class <decorators.indicator_objects>` :ref:`output_from(...) <decorators.output_from>` so that *Ruffus*
+ knowns this is a :term:`task` name, not a file name
+
+ .. code-block:: python
+ :emphasize-lines: 5
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ # task name string wrapped in output_from(...)
+ @transform(output_from("first_task"), suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+ You can also refer to tasks (functions) in other modules, in which case the full
+ qualified name must be used:
+
+ ::
+
+ @transform(output_from("other_module.first_task"), suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ pass
+
+
+
+.. index::
+ pair: @transform; multiple dependencies
+
+.. _new_manual.transform.multiple_dependencies:
+
+***************************************
+Multiple dependencies
+***************************************
+
+ Each task can depend on more than one antecedent simply by chaining to a list in :ref:`@transform <decorators.transform>`
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # third_task depends on both first_task() and second_task()
+ #
+ @transform([first_task, second_task], suffix(".output.1"), ".output2")
+ def third_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ ``third_task()`` depends on and follows both ``first_task()`` and ``second_task()``. However, these latter two tasks are independent of each other
+ and can and will run in parallel. This can be clearly shown for our example if we added a little randomness to the run time of each job:
+
+ .. code-block:: python
+
+ time.sleep(random.random())
+
+ The execution of ``first_task()`` and ``second_task()`` jobs will be interleaved and they finish in no particular order:
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+
+
+ .. note::
+
+ See the :ref:`example code <new_manual.transform.multiple_dependencies.code>`
+
+
+.. index::
+ pair: @follow; imposing order with
+
+.. _new_manual.follows:
+
+***************************************
+:ref:`@follows <decorators.follows>`
+***************************************
+
+ If there is some extrinsic reason one non-dependent task has to precede the other, then this can be specified explicitly using :ref:`@follows <decorators.follows>`:
+
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # @follows specifies a preceding task
+ #
+ @follows("first_task")
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+
+
+ :ref:`@follows <decorators.follows>` specifies either a preceding task (e.g. ``first_task``), or if
+ it has not yet been defined, the name (as a string) of a task function (e.g. ``"first_task"``).
+
+ With the addition of :ref:`@follows <decorators.follows>`, all the jobs
+ of ``second_task()`` start *after* those from ``first_task()`` have finished:
+
+ .. code-block:: pycon
+
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+
+
+
+.. index::
+ single: @follows; mkdir (Manual)
+ single: mkdir; @follows (Manual)
+
+.. _new_manual.follows.mkdir:
+
+************************************************************************************************************************************************************
+Making directories automatically with :ref:`@follows <decorators.follows>` and :ref:`mkdir <decorators.mkdir>`
+************************************************************************************************************************************************************
+
+ :ref:`@follows <decorators.follows>` is also useful for making sure one or more destination directories
+ exist before a task is run.
+
+ *Ruffus* provides special syntax to support this, using the special
+ :ref:`mkdir <decorators.mkdir>` indicator class. For example:
+
+ .. code-block:: python
+ :emphasize-lines: 2
+
+ #
+ # @follows specifies both a preceding task and a directory name
+ #
+ @follows("first_task", mkdir("output/results/here"))
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+
+ Before ``second_task()`` is run, the ``output/results/here`` directory will be created if necessary.
+
+
+.. index::
+ pair: inputs parameters; globs
+ pair: globs in input parameters; Tutorial
+
+.. _new_manual.globs_as_input:
+
+
+******************************************************************************
+Globs in the **Input** parameter
+******************************************************************************
+
+ * As a syntactic convenience, *Ruffus* also allows you to specify a |glob|_ pattern (e.g. ``*.txt``) in the
+ **Input** parameter.
+ * |glob|_ patterns will be automatically specify all matching file names as the **Input**.
+ * Any strings within **Input** which contain the letters: ``*?[]`` will be treated as a |glob|_ pattern.
+
+ The first function in our initial *Ruffus* pipeline example could have been written as:
+
+ .. code-block:: python
+ :emphasize-lines: 4
+
+ #
+ # STAGE 1 fasta->sam
+ #
+ @transform("*.fasta", # Input = glob
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ""
+
+
+.. index::
+ pair: Mixing tasks, globs and file names; Tutorial
+
+
+******************************************************************************
+Mixing Tasks and Globs in the **Input** parameter
+******************************************************************************
+
+ |glob|_ patterns, references to tasks and file names strings
+ can be mixed freely in (nested) python lists and tuples in the **Input** parameter.
+
+ For example, a task function can chain to the **Output** from multiple upstream tasks:
+
+ .. code-block:: python
+
+ @transform([task1, task2, # Input = multiple tasks
+ "aa*.fasta", + all files matching glob
+ "zz.fasta"] + file name
+ suffix(".fasta"), # suffix = .fasta
+ ".sam") # Output suffix = .sam
+ def map_dna_sequence(input_file,
+ output_file):
+ ""
+
+ In all cases, *Ruffus* tries to do the right thing, and to make the simple or
+ obvious case require the simplest, least onerous syntax.
+
+ If sometimes *Ruffus* does not behave the way you expect, please write to the authors:
+ it may be a bug!
+
+ :ref:`new_manual.pipeline_printout` and
+ :ref:`new_manual.cmdline` will show you how to
+ to make sure that your intentions are reflected in *Ruffus* code.
+
diff --git a/doc/tutorials/new_tutorial/transform_in_parallel_code.rst b/doc/tutorials/new_tutorial/transform_in_parallel_code.rst
new file mode 100644
index 0000000..3160e2e
--- /dev/null
+++ b/doc/tutorials/new_tutorial/transform_in_parallel_code.rst
@@ -0,0 +1,366 @@
+.. include:: ../../global.inc
+.. include:: manual_chapter_numbers.inc
+
+.. _new_manual.transform_in_parallel.code:
+
+######################################################################################################
+|new_manual.transform_in_parallel.chapter_num|: Python Code for More on ``@transform``-ing data
+######################################################################################################
+
+.. seealso::
+
+
+ * :ref:`Manual Table of Contents <new_manual.table_of_contents>`
+ * :ref:`@transform syntax in detail <decorators.transform>`
+ * Back to |new_manual.transform_in_parallel.chapter_num|: :ref:`More on @transform-ing data and @originate <new_manual.transform_in_parallel>`
+
+*******************************************
+Producing several items / files per job
+*******************************************
+
+ ::
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @transform(first_task, suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+=============================
+Resulting Output
+=============================
+
+ ::
+
+ >>> pipeline_run([second_task])
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Completed Task = second_task
+
+
+
+*******************************************
+Defining tasks function out of order
+*******************************************
+
+ .. code-block:: python
+ :emphasize-lines: 22
+
+ from ruffus import *
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+
+ #---------------------------------------------------------------
+ #
+ # second task defined first
+ #
+ # task name string wrapped in output_from(...)
+ @transform(output_from("first_task"), suffix(".output.1"), ".output2")
+ def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([second_task])
+
+
+=============================
+Resulting Output
+=============================
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([second_task])
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Completed Task = second_task
+
+.. _new_manual.transform.multiple_dependencies.code:
+
+*******************************************
+Multiple dependencies
+*******************************************
+
+ .. code-block:: python
+ :emphasize-lines: 58
+
+ from ruffus import *
+ import time
+ import random
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+ second_task_params = [
+ ['job4.a.start', 'job4.b.start'],
+ ['job5.a.start', 'job5.b.start'],
+ ['job6.a.start', 'job6.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params + second_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+ #---------------------------------------------------------------
+ #
+ # third task
+ #
+ # depends on both first_task() and second_task()
+ @transform([first_task, second_task], suffix(".output.1"), ".output2")
+ def third_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([third_task], multiprocess = 6)
+
+=============================
+Resulting Output
+=============================
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Job = [[job4.a.output.1, job4.a.output.extra.1] -> job4.a.output2] completed
+ Job = [[job5.a.output.1, job5.a.output.extra.1] -> job5.a.output2] completed
+ Job = [[job6.a.output.1, job6.a.output.extra.1] -> job6.a.output2] completed
+ Completed Task = third_task
+
+
+*******************************************
+Multiple dependencies after @follows
+*******************************************
+
+ .. code-block:: python
+ :emphasize-lines: 31
+
+ from ruffus import *
+ import time
+ import random
+
+ #---------------------------------------------------------------
+ # Create pairs of input files
+ #
+ first_task_params = [
+ ['job1.a.start', 'job1.b.start'],
+ ['job2.a.start', 'job2.b.start'],
+ ['job3.a.start', 'job3.b.start'],
+ ]
+ second_task_params = [
+ ['job4.a.start', 'job4.b.start'],
+ ['job5.a.start', 'job5.b.start'],
+ ['job6.a.start', 'job6.b.start'],
+ ]
+
+ for input_file_pairs in first_task_params + second_task_params:
+ for input_file in input_file_pairs:
+ open(input_file, "w")
+
+
+
+ #---------------------------------------------------------------
+ #
+ # first task
+ #
+ @transform(first_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def first_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+
+ #---------------------------------------------------------------
+ #
+ # second task
+ #
+ @follows("first_task")
+ @transform(second_task_params, suffix(".start"),
+ [".output.1",
+ ".output.extra.1"],
+ "some_extra.string.for_example", 14)
+ def second_task(input_files, output_file_pair,
+ extra_parameter_str, extra_parameter_num):
+ for output_file in output_file_pair:
+ with open(output_file, "w"):
+ pass
+ time.sleep(random.random())
+
+
+ #---------------------------------------------------------------
+ #
+ # third task
+ #
+ # depends on both first_task() and second_task()
+ @transform([first_task, second_task], suffix(".output.1"), ".output2")
+ def third_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+ #---------------------------------------------------------------
+ #
+ # Run
+ #
+ pipeline_run([third_task], multiprocess = 6)
+
+=======================================================================================
+Resulting Output: ``first_task`` completes before ``second_task``
+=======================================================================================
+
+ .. code-block:: pycon
+
+ >>> pipeline_run([third_task], multiprocess = 6)
+ Job = [[job2.a.start, job2.b.start] -> [job2.a.output.1, job2.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job3.a.start, job3.b.start] -> [job3.a.output.1, job3.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job1.a.start, job1.b.start] -> [job1.a.output.1, job1.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = first_task
+ Job = [[job4.a.start, job4.b.start] -> [job4.a.output.1, job4.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job6.a.start, job6.b.start] -> [job6.a.output.1, job6.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Job = [[job5.a.start, job5.b.start] -> [job5.a.output.1, job5.a.output.extra.1], some_extra.string.for_example, 14] completed
+ Completed Task = second_task
+ Job = [[job1.a.output.1, job1.a.output.extra.1] -> job1.a.output2] completed
+ Job = [[job2.a.output.1, job2.a.output.extra.1] -> job2.a.output2] completed
+ Job = [[job3.a.output.1, job3.a.output.extra.1] -> job3.a.output2] completed
+ Job = [[job4.a.output.1, job4.a.output.extra.1] -> job4.a.output2] completed
+ Job = [[job5.a.output.1, job5.a.output.extra.1] -> job5.a.output2] completed
+ Job = [[job6.a.output.1, job6.a.output.extra.1] -> job6.a.output2] completed
diff --git a/doc/why_ruffus.rst b/doc/why_ruffus.rst
new file mode 100644
index 0000000..3e26396
--- /dev/null
+++ b/doc/why_ruffus.rst
@@ -0,0 +1,37 @@
+.. Design:
+
+.. include:: global.inc
+
+.. index::
+ pair: Ruffus; Etymology
+ pair: Ruffus; Name origins
+
+.. _design.why_ruffus:
+
+###############################
+Why *Ruffus*?
+###############################
+
+**Cylindrophis ruffus** is the name of the
+`red-tailed pipe snake <http://en.wikipedia.org/wiki/Cylindrophis_ruffus>`_ (bad python-y pun)
+which can be found in `Hong Kong <http://www.discoverhongkong.com/eng/index.html>`_ where the original author comes from.
+
+
+*Ruffus* is a shy creature, and pretends to be a cobra or a `banded krait <http://en.wikipedia.org/wiki/File:Bandedkrait.jpg>`__ by putting up its red tail and ducking its
+head in its coils when startled.
+
++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| .. image:: images/wikimedia_cyl_ruffus.jpg | .. image:: images/wikimedia_bandedkrait.jpg |
+| | :scale: 77 |
++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| * Not venomous | * Deadly poisonous |
+| * `Mostly Harmless <http://en.wikipedia.org/wiki/Mostly_Harmless>`_ | * `Seriously unfriendly <http://en.wikipedia.org/wiki/List_of_races_and_species_in_The_Hitchhiker's_Guide_to_the_Galaxy#Ravenous_Bugblatter_Beast_of_Traal>`_ |
++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
+Be careful not to step on one when running down country park lanes at full speed
+in Hong Kong: this snake is a `rare breed <http://www.hkras.org/eng/info/hkspp.htm>`_!
+
+*Ruffus* does most of its work at night and sleeps during the day: typical of many (but alas not all) python programmers!
+
+The original `red-tail pipe <http://upload.wikimedia.org/wikipedia/commons/a/a1/Cyl_ruffus_061212_2025_tdp.jpg>`__ and `banded krait <http://en.wikipedia.org/wiki/File:AB_054_Banded_Krait.JPG>`__ images are from wikimedia.
diff --git a/ruffus/__init__.py b/ruffus/__init__.py
new file mode 100644
index 0000000..301ee73
--- /dev/null
+++ b/ruffus/__init__.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+################################################################################
+#
+# __init__.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+#from graph import *
+#from print_dependencies import *
+from .task import pipeline_printout, pipeline_printout_graph, pipeline_run, pipeline_get_task_names, register_cleanup, check_if_uptodate, active_if, split, transform, merge, collate, files, files_re, follows, parallel, stderr_logger, black_hole_logger, suffix, regex, inputs, add_inputs, touch_file, combine, mkdir, output_from, posttask, JobSignalledBreak, runtime_parameter, jobs_limit, formatter, subdivide, originate, graphviz
+from .graph import graph_colour_demo_printout
+from .file_name_parameters import needs_update_check_modify_time
+from . import cmdline
+from . import combinatorics
+#output_dependency_tree_in_dot_format, output_dependency_tree_key_in_dot_format
+from . import ruffus_version
+__version__=ruffus_version.__version
+
+
diff --git a/ruffus/adjacent_pairs_iterate.py b/ruffus/adjacent_pairs_iterate.py
new file mode 100644
index 0000000..c5423d2
--- /dev/null
+++ b/ruffus/adjacent_pairs_iterate.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+if sys.hexversion < 0x03000000:
+ from future_builtins import zip
+################################################################################
+#
+# adjacent_pairs_iterate.py
+#
+#
+# Copyright (c) 2007 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+
+
+def adjacent_pairs_iterate (array, reverse = False):
+ """
+ returns pairs of iterators to successive positions
+ """
+ if len(array) < 2:
+ return
+
+ # get iterators to successive positions
+ if reverse:
+ curr_iter = reversed(array)
+ next_iter = reversed(array)
+ else:
+ curr_iter = iter(array)
+ next_iter = iter(array)
+ next(next_iter)
+ for i, j in zip(curr_iter, next_iter):
+ yield i, j
+
+
+def unit_test():
+ numbers = list(range(10))
+ print("Forward")
+ for i, j in adjacent_pairs_iterate(numbers):
+ print(i, j)
+ print("Reversed")
+ for i, j in adjacent_pairs_iterate(numbers, reverse=True):
+ print(i, j)
+ print(numbers)
+
+if __name__ == '__main__':
+ unit_test()
+
+
+
+
diff --git a/ruffus/cmdline.py b/ruffus/cmdline.py
new file mode 100644
index 0000000..5b8937b
--- /dev/null
+++ b/ruffus/cmdline.py
@@ -0,0 +1,827 @@
+#!/usr/bin/env python
+################################################################################
+#
+#
+# cmd_line_helper.py
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+
+********************************************
+:mod:`ruffus.cmdline` -- Overview
+********************************************
+
+.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk>
+
+
+ #
+ # Using argparse (new in python v 2.7)
+ #
+ from ruffus import *
+
+ parser = cmdline.get_argparse( description='WHAT DOES THIS PIPELINE DO?')
+
+ parser.add_argument("--input_file")
+
+ options = parser.parse_args()
+
+ # logger which can be passed to ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+ #_____________________________________________________________________________________
+
+ # pipelined functions go here
+
+ #_____________________________________________________________________________________
+
+ cmdline.run (options)
+
+
+
+ #
+ # Using optparse (new in python v 2.6)
+ #
+ from ruffus import *
+
+ parser = cmdline.get_optgparse(version="%prog 1.0", usage = "\n\n %prog [options]")
+
+ parser.add_option("-c", "--custom", dest="custom", action="count")
+
+
+ (options, remaining_args) = parser.parse_args()
+
+ # logger which can be passed to ruffus tasks
+ logger, logger_mutex = cmdline.setup_logging ("this_program", options.log_file, options.verbose)
+
+ #_____________________________________________________________________________________
+
+ # pipelined functions go here
+
+ #_____________________________________________________________________________________
+
+ cmdline.run (options)
+
+
+"""
+
+
+#_________________________________________________________________________________________
+
+# get_argparse
+#_________________________________________________________________________________________
+def get_argparse (*args, **args_dict):
+ """
+ Set up argparse
+ to allow for ruffus specific options:
+
+ --verbose
+ --version
+ --log_file
+
+ -t, --target_tasks
+ -j, --jobs
+ -n, --just_print
+ --flowchart
+ --touch_files_only
+ --recreate_database
+ --checksum_file_name
+ --key_legend_in_graph
+ --draw_graph_horizontally
+ --flowchart_format
+ --forced_tasks
+
+ Optionally specify ignored_args = ["verbose", "recreate_database",...]
+ listing names which will not be added as valid options on the command line
+
+ Optionally specify version = "%(prog)s version 1.234"
+
+ """
+ import argparse
+
+ # version and ignored_args are for append_to_argparse
+ orig_args_dict = dict(args_dict)
+ if "version" in args_dict:
+ del args_dict["version"]
+ if "ignored_args" in args_dict:
+ del args_dict["ignored_args"]
+
+ parser = argparse.ArgumentParser(*args, **args_dict)
+
+
+ return append_to_argparse(parser, **orig_args_dict)
+
+#_________________________________________________________________________________________
+
+# append_to_argparse
+#_________________________________________________________________________________________
+def append_to_argparse (parser, **args_dict):
+ """
+ Common options:
+
+ --verbose
+ --version
+ --log_file
+ """
+
+ if "version" in args_dict:
+ prog_version = args_dict["version"]
+ else:
+ prog_version = "%(prog)s 1.0"
+
+ #
+ # ignored_args contains a list of options which will *not* be added
+ #
+ if "ignored_args" in args_dict:
+ if isinstance(args_dict["ignored_args"], str):
+ ignored_args = set([args_dict["ignored_args"]])
+ else:
+ try:
+ ignored_args = set(args_dict["ignored_args"])
+ except:
+ raise Exception("Error: expected ignored_args = ['list_of', 'option_names']")
+ else:
+ ignored_args = set()
+
+
+
+ common_options = parser.add_argument_group('Common options')
+ if "verbose" not in ignored_args:
+ common_options.add_argument('--verbose', "-v", const="+", default=[], nargs='?',
+ action="append",
+ help="Print more verbose messages for each additional verbose level.")
+ if "version" not in ignored_args:
+ common_options.add_argument('--version', action='version', version=prog_version)
+ if "log_file" not in ignored_args:
+ common_options.add_argument("-L", "--log_file", metavar="FILE", type=str,
+ help="Name and path of log file")
+
+
+ #
+ # pipeline
+ #
+ pipeline_options = parser.add_argument_group('pipeline arguments')
+ if "target_tasks" not in ignored_args:
+ pipeline_options.add_argument("-T", "--target_tasks", action="append",
+ metavar="JOBNAME", type=str,
+ help="Target task(s) of pipeline.", default = [])
+ if "jobs" not in ignored_args:
+ pipeline_options.add_argument("-j", "--jobs", default=1, metavar="N", type=int,
+ help="Allow N jobs (commands) to run simultaneously.")
+ if "use_threads" not in ignored_args:
+ pipeline_options.add_argument("--use_threads", action="store_true",
+ help="Use multiple threads rather than processes. Needs --jobs N with N > 1")
+ if "just_print" not in ignored_args:
+ pipeline_options.add_argument("-n", "--just_print", action="store_true",
+ help="Don't actually run any commands; just print the pipeline.")
+ if "touch_files_only" not in ignored_args:
+ pipeline_options.add_argument("--touch_files_only", action="store_true",
+ help="Don't actually run the pipeline; just 'touch' the output for each task to make them appear up to date.")
+ if "recreate_database" not in ignored_args:
+ pipeline_options.add_argument("--recreate_database", action="store_true",
+ help="Don't actually run the pipeline; just recreate the checksum database.")
+ if "checksum_file_name" not in ignored_args:
+ pipeline_options.add_argument("--checksum_file_name", dest = "history_file", metavar="FILE", type=str,
+ help="Path of the checksum file.")
+ if "flowchart" not in ignored_args:
+ pipeline_options.add_argument("--flowchart", metavar="FILE", type=str,
+ help="Don't run any commands; just print pipeline as a flowchart.")
+
+ #
+ # Less common pipeline options
+ #
+ if "key_legend_in_graph" not in ignored_args:
+ pipeline_options.add_argument("--key_legend_in_graph", action="store_true",
+ help="Print out legend and key for dependency graph.")
+ if "draw_graph_horizontally" not in ignored_args:
+ pipeline_options.add_argument("--draw_graph_horizontally", action="store_true", dest= "draw_horizontally",
+ help="Draw horizontal dependency graph.")
+ if "flowchart_format" not in ignored_args:
+ pipeline_options.add_argument("--flowchart_format", metavar="FORMAT",
+ type=str, choices = ["svg", "svgz", "png", "jpg", "pdf", "dot"],
+ # "eps", "jpeg", "gif", "plain", "ps", "wbmp", "canon",
+ # "cmap", "cmapx", "cmapx_np", "fig", "gd", "gd2",
+ # "gv", "imap", "imap_np", "ismap", "jpe", "plain-ext",
+ # "ps2", "tk", "vml", "vmlz", "vrml", "x11", "xdot", "xlib"
+ default = None,
+ help="format of dependency graph file. Can be 'pdf', " +
+ "'svg', 'svgz' (Structured Vector Graphics), 'pdf', " +
+ "'png' 'jpg' (bitmap graphics) etc ")
+ if "forced_tasks" not in ignored_args:
+ pipeline_options.add_argument("--forced_tasks", action="append",
+ metavar="JOBNAME", type=str,
+ help="Task(s) which will be included even if they are up to date.", default = [])
+
+
+
+ return parser
+
+
+#_________________________________________________________________________________________
+
+# Hacky extension to *deprecated!!* optparse to support variable number of arguments
+# for --verbose
+#_________________________________________________________________________________________
+def vararg_callback(option, opt_str, value, parser):
+ #
+ # get current value
+ #
+ if hasattr(parser.values, option.dest):
+ value = getattr(parser.values, option.dest)
+ else:
+ value = []
+ if not len(parser.rargs):
+ value.append("+")
+ else:
+ arg = parser.rargs[0]
+ # stop on --foo like options
+ if arg[:1] == "-":
+ value.append("+")
+ else:
+ value.append(arg)
+ del parser.rargs[:1]
+ setattr(parser.values, option.dest, value)
+
+
+#_________________________________________________________________________________________
+
+# optparse is deprecated!
+#_________________________________________________________________________________________
+def get_optparse (*args, **args_dict):
+ """
+ Set up OptionParser from optparse
+ to allow for ruffus specific options:
+
+ --verbose
+ --version
+ --log_file
+
+ -t, --target_tasks
+ -j, --jobs
+ -n, --just_print
+ --flowchart
+ --touch_files_only
+ --recreate_database
+ --checksum_file_name
+ --key_legend_in_graph
+ --draw_graph_horizontally
+ --flowchart_format
+ --forced_tasks
+
+ Optionally specify ignored_args = ["verbose", "recreate_database",...]
+ listing names which will not be added as valid options on the command line
+
+ N.B. optparse is deprecated since python version 2.7.
+ """
+ from optparse import OptionParser
+
+ # ignored_args are for append_to_optparse
+ orig_args_dict = dict(args_dict)
+ if "ignored_args" in args_dict:
+ del args_dict["ignored_args"]
+
+ parser = OptionParser(*args, **args_dict)
+
+ return append_to_optparse(parser, **orig_args_dict)
+
+#_________________________________________________________________________________________
+
+# optparse is deprecated!
+#_________________________________________________________________________________________
+def append_to_optparse (parser, **args_dict):
+ """
+ Set up OptionParser from optparse
+ to allow for ruffus specific options:
+
+ --verbose
+ --version
+ --log_file
+
+ -t, --target_tasks
+ -j, --jobs
+ -n, --just_print
+ --flowchart
+ --touch_files_only
+ --recreate_database
+ --checksum_file_name
+ --key_legend_in_graph
+ --draw_graph_horizontally
+ --flowchart_format
+ --forced_tasks
+
+ Optionally specify ignored_args = ["verbose", "recreate_database",...]
+ listing names which will not be added as valid options on the command line
+ """
+
+ #
+ # ignored_args contains a list of options which will *not* be added
+ #
+ if "ignored_args" in args_dict:
+ if isinstance(args_dict["ignored_args"], str):
+ ignored_args = set([args_dict["ignored_args"]])
+ else:
+ try:
+ ignored_args = set(args_dict["ignored_args"])
+ except:
+ raise Exception("Error: expected ignored_args = ['list_of', 'option_names']")
+ else:
+ ignored_args = set()
+
+ #
+ # general options: verbosity / logging
+ #
+ if "verbose" not in ignored_args:
+ parser.add_option("-v", "--verbose", dest = "verbose",
+ action="callback", default=[],
+ #---------------------------------------------------------------
+ # hack to get around unreasonable discrimination against
+ # --long_options=with_equals in opt_parse::_process_long_opt()
+ # when using a callback
+ type = int,
+ nargs=0,
+ #---------------------------------------------------------------
+ callback=vararg_callback,
+ help="Print more verbose messages for each additional verbose level.")
+ if "log_file" not in ignored_args:
+ parser.add_option("-L", "--log_file", dest="log_file",
+ metavar="FILE",
+ type="string",
+ help="Name and path of log file")
+ #
+ # pipeline
+ #
+ if "target_tasks" not in ignored_args:
+ parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+ if "jobs" not in ignored_args:
+ parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="N",
+ type="int",
+ help="Allow N jobs (commands) to run simultaneously.")
+ if "use_threads" not in ignored_args:
+ parser.add_option("--use_threads", dest="use_threads",
+ action="store_true", default=False,
+ help="Use multiple threads rather than processes. Needs --jobs N with N > 1")
+ if "just_print" not in ignored_args:
+ parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Don't actually run any commands; just print the pipeline.")
+ if "touch_files_only" not in ignored_args:
+ parser.add_option("--touch_files_only", dest="touch_files_only",
+ action="store_true", default=False,
+ help="Don't actually run the pipeline; just 'touch' the output for each task to make them appear up to date.")
+ if "recreate_database" not in ignored_args:
+ parser.add_option("--recreate_database", dest="recreate_database",
+ action="store_true", default=False,
+ help="Don't actually run the pipeline; just recreate the checksum database.")
+ if "checksum_file_name" not in ignored_args:
+ parser.add_option("--checksum_file_name", dest="history_file",
+ metavar="FILE",
+ type="string",
+ help="Path of the checksum file.")
+ if "flowchart" not in ignored_args:
+ parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+
+ #
+ # Less common pipeline options
+ #
+ if "key_legend_in_graph" not in ignored_args:
+ parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+ if "flowchart_format" not in ignored_args:
+ parser.add_option("--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+ if "flowchart_format" not in ignored_args:
+ parser.add_option("--flowchart_format", dest="flowchart_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+ if "forced_tasks" not in ignored_args:
+ parser.add_option("--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+
+
+ return parser
+
+
+
+
+import logging
+import logging.handlers
+MESSAGE = 15
+logging.addLevelName(MESSAGE, "MESSAGE")
+from . import task
+from . import proxy_logger
+from .ruffus_utility import CHECKSUM_REGENERATE
+import sys
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Logger
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# Allow logging across Ruffus pipeline
+#
+def setup_logging (module_name, log_file_name, verbose):
+ return proxy_logger.make_shared_logger_and_proxy (setup_logging_factory, module_name, [log_file_name, verbose])
+
+
+def setup_logging_factory (logger_name, args):
+ log_file_name, verbose = args
+ """
+ This function is a simple around wrapper around the python
+ `logging <http://docs.python.org/library/logging.html>`_ module.
+
+ This *logger_factory* example creates logging objects which can
+ then be managed by proxy via ``ruffus.proxy_logger.make_shared_logger_and_proxy()``
+
+ This can be:
+
+ * a `disk log file <http://docs.python.org/library/logging.html#filehandler>`_
+ * a automatically backed-up `(rotating) log <http://docs.python.org/library/logging.html#rotatingfilehandler>`_.
+ * any log specified in a `configuration file <http://docs.python.org/library/logging.html#configuration-file-format>`_
+
+ These are specified in the ``args`` dictionary forwarded by ``make_shared_logger_and_proxy()``
+
+ :param logger_name: name of log
+ :param args: a dictionary of parameters forwarded from ``make_shared_logger_and_proxy()``
+
+ Valid entries include:
+
+ .. describe:: "level"
+
+ Sets the `threshold <http://docs.python.org/library/logging.html#logging.Handler.setLevel>`_ for the logger.
+
+ .. describe:: "config_file"
+
+ The logging object is configured from this `configuration file <http://docs.python.org/library/logging.html#configuration-file-format>`_.
+
+ .. describe:: "file_name"
+
+ Sets disk log file name.
+
+ .. describe:: "rotating"
+
+ Chooses a `(rotating) log <http://docs.python.org/library/logging.html#rotatingfilehandler>`_.
+
+ .. describe:: "maxBytes"
+
+ Allows the file to rollover at a predetermined size
+
+ .. describe:: "backupCount"
+
+ If backupCount is non-zero, the system will save old log files by appending the extensions ``.1``, ``.2``, ``.3`` etc., to the filename.
+
+ .. describe:: "delay"
+
+ Defer file creation until the log is written to.
+
+ .. describe:: "formatter"
+
+ `Converts <http://docs.python.org/library/logging.html#formatter-objects>`_ the message to a logged entry string.
+ For example,
+ ::
+
+ "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
+
+
+
+ """
+
+ #
+ # Log file name with logger level
+ #
+ new_logger = logging.getLogger(logger_name)
+
+ class debug_filter(logging.Filter):
+ """
+ Ignore INFO messages
+ """
+ def filter(self, record):
+ return logging.INFO != record.levelno
+
+ class NullHandler(logging.Handler):
+ """
+ for when there is no logging
+ """
+ def emit(self, record):
+ pass
+
+ # We are interesting in all messages
+ new_logger.setLevel(logging.DEBUG)
+ has_handler = False
+
+ # log to file if that is specified
+ if log_file_name:
+ handler = logging.FileHandler(log_file_name, delay=False)
+ class stipped_down_formatter(logging.Formatter):
+ def format(self, record):
+ prefix = ""
+ if not hasattr(self, "first_used"):
+ self.first_used = True
+ prefix = "\n" + self.formatTime(record, "%Y-%m-%d")
+ prefix += " %(name)s\n" % record.__dict__
+ if record.levelname in ("INFO", "MESSAGE", "DEBUG"):
+ self._fmt = " %(asctime)s - %(message)s"
+ else:
+ self._fmt = " %(asctime)s - %(levelname)-7s - %(message)s"
+ return prefix + logging.Formatter.format(self, record)
+ handler.setFormatter(stipped_down_formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s", "%H:%M:%S"))
+ handler.setLevel(MESSAGE)
+ new_logger.addHandler(handler)
+ has_handler = True
+
+ # log to stderr if verbose
+ if verbose:
+ stderrhandler = logging.StreamHandler(sys.stderr)
+ stderrhandler.setFormatter(logging.Formatter(" %(message)s"))
+ stderrhandler.setLevel(logging.DEBUG)
+ if log_file_name:
+ stderrhandler.addFilter(debug_filter())
+ new_logger.addHandler(stderrhandler)
+ has_handler = True
+
+ # no logging
+ if not has_handler:
+ new_logger.addHandler(NullHandler())
+
+ #
+ # This log object will be wrapped in proxy
+ #
+ return new_logger
+
+
+
+
+#
+# valid arguments to each function which are not exposed by any options in the command line
+#
+extra_pipeline_printout_graph_options = [
+ "ignore_upstream_of_target" ,
+ "skip_uptodate_tasks" ,
+ "gnu_make_maximal_rebuild_mode" ,
+ "test_all_task_for_update" ,
+ "minimal_key_legend" ,
+ "user_colour_scheme" ,
+ "pipeline_name" ,
+ "size" ,
+ "dpi" ,
+ "history_file" ,
+ "checksum_level" ,
+ "runtime_data" ,
+ ]
+extra_pipeline_printout_options = [
+ "indent" ,
+ "gnu_make_maximal_rebuild_mode" ,
+ "checksum_level" ,
+ "history_file" ,
+ "wrap_width" ,
+ "runtime_data"]
+
+
+extra_pipeline_run_options = [
+ "gnu_make_maximal_rebuild_mode" ,
+ "runtime_data" ,
+ "one_second_per_job" ,
+ # exposed directly in command line
+ #"touch_files_only" ,
+ "history_file" ,
+ "logger" ,
+ "exceptions_terminate_immediately" ,
+ "log_exceptions" ,
+ "checksum_level" ,
+ "multithread"]
+
+
+#_________________________________________________________________________________________
+
+# get_extra_options_appropriate_for_command
+
+#_________________________________________________________________________________________
+def get_extra_options_appropriate_for_command (appropriate_option_names, extra_options):
+ """
+ Get extra options which are appropriate for
+ pipeline_printout
+ pipeline_printout_graph
+ pipeline_run
+ """
+
+ appropriate_options = dict()
+ for option_name in appropriate_option_names:
+ if option_name in extra_options:
+ appropriate_options[option_name] = extra_options[option_name]
+ return appropriate_options
+
+
+
+#_________________________________________________________________________________________
+
+# handle_verbose
+
+#_________________________________________________________________________________________
+def handle_verbose (options):
+ """
+ raw options.verbose is a list of specifiers
+ '+' : i.e. --verbose. This just increases the current verbosity value by 1
+ '\d+' : e.g. --verbose 6. This (re)sets the verbosity value
+ '\d+:\d+' : e.g. --verbose 7:-5 The second number is the verbose_abbreviated_path
+ Set
+ options.verbosity
+ options.verbose_abbreviated_path
+
+ Since the latter cannot be disabled via ignored_args (it never appears as a
+ command line option), we do the next best thing
+ by not overriding whatever the user sets
+
+ """
+ #
+ # verbosity specified manually or deliberately disabled: use that
+ #
+ if options.verbose == None or isinstance(options.verbose, int):
+ # verbose_abbreviated_path default to None unless set explicity by the user
+ # in which case we shall prudently not override it!
+ # verbose_abbreviated_path of None will be set to the default at
+ # pipeline_run() / pipeline_printout()
+ if not hasattr(options, "verbose_abbreviated_path"):
+ setattr(options, "verbose_abbreviated_path", None)
+ return options
+ #
+ # The user is having a laugh by passing in a string!
+ # wrap in list
+ if isinstance(options.verbose, str):
+ options.verbose = [options.verbose]
+ #
+ #
+ curr_verbosity = 0
+ curr_verbose_abbreviated_path = None
+ import re
+ match_regex = re.compile(r"(\+)|(\d+)(?::(\-?\d+))?")
+ #
+ # Each verbosity specifier can be
+ # '+' : i.e. --verbose. This just increases the current verbosity value by 1
+ # '\d+' : e.g. --verbose 6. This (re)sets the verbosity value
+ # '\d+:\d+' : e.g. --verbose 7:-5 The second number is the verbose_abbreviated_path
+ #
+ for vv in options.verbose:
+ mm = match_regex.match(vv)
+ if not mm:
+ raise Exception("error: verbosity argument is specified as --verbose INT or --verbose INT:INT. invalid value '%s'" % vv)
+ if mm.group(1):
+ curr_verbosity += 1
+ else:
+ curr_verbosity = int(mm.group(2))
+ if mm.group(3):
+ curr_verbose_abbreviated_path = int(mm.group(3))
+ #
+ # set verbose_abbreviated_path unless set explicity by the user
+ # in which case we shall prudently not override it!
+ if not hasattr(options, "verbose_abbreviated_path"):
+ setattr(options, "verbose_abbreviated_path", curr_verbose_abbreviated_path)
+ options.verbose = curr_verbosity
+ #
+ return options
+
+
+#_________________________________________________________________________________________
+
+# run
+
+#_________________________________________________________________________________________
+def run (options, **extra_options):
+ """
+ Take action depending on options
+ extra_options are passed (as appropriate to the underlying functions
+ Returns True if pipeline_run
+ """
+
+ #
+ # be very defensive: these options names are use below. Make sure they already
+ # exist in ``options`` , even if they have a value of None
+ #
+ for attr_name in [ "just_print",
+ "verbose",
+ "flowchart",
+ "flowchart_format",
+ "target_tasks",
+ "forced_tasks",
+ "draw_horizontally",
+ "key_legend_in_graph",
+ "use_threads",
+ "jobs",
+ "recreate_database",
+ "touch_files_only",
+ "history_file" ]:
+ if not hasattr(options, attr_name):
+ setattr(options, attr_name, None)
+
+
+ #
+ # handle verbosity specification
+ #
+ # the special attribute verbose_abbreviated_path is set in this function
+ options=handle_verbose (options)
+
+ #
+ # touch files or not
+ #
+ if options.recreate_database:
+ touch_files_only = CHECKSUM_REGENERATE
+ elif options.touch_files_only:
+ touch_files_only = True
+ else:
+ touch_files_only = False
+
+ if options.just_print:
+ appropriate_options = get_extra_options_appropriate_for_command (extra_pipeline_printout_options, extra_options)
+ task.pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ history_file = options.history_file,
+ verbose_abbreviated_path=options.verbose_abbreviated_path,
+ verbose=options.verbose, **appropriate_options)
+ return False
+
+ elif options.flowchart:
+ appropriate_options = get_extra_options_appropriate_for_command (extra_pipeline_printout_graph_options, extra_options)
+ task.pipeline_printout_graph ( open(options.flowchart, "w"),
+ options.flowchart_format,
+ options.target_tasks,
+ options.forced_tasks,
+ history_file = options.history_file,
+ draw_vertically = not options.draw_horizontally,
+ no_key_legend = not options.key_legend_in_graph,
+ **appropriate_options)
+ return False
+ else:
+
+ #
+ # turn on multithread if --use_threads specified and --jobs > 1
+ # ignore if manually specified
+ #
+ if ( options.use_threads
+ # ignore if manual override
+ and not "multithread" in extra_options
+ and options.jobs and options.jobs > 1):
+ multithread = options.jobs
+ elif "multithread" in extra_options:
+ multithread = extra_options["multithread"]
+ del extra_options["multithread"]
+ else:
+ multithread = None
+
+
+ if not "logger" in extra_options:
+ extra_options["logger"] = None
+ if extra_options["logger"] == False:
+ extra_options["logger"] = task.black_hole_logger
+ elif extra_options["logger"] == None:
+ extra_options["logger"] = task.stderr_logger
+ appropriate_options = get_extra_options_appropriate_for_command (extra_pipeline_run_options, extra_options)
+ task.pipeline_run( options.target_tasks,
+ options.forced_tasks,
+ multiprocess = options.jobs,
+ multithread = multithread,
+ verbose = options.verbose,
+ touch_files_only= touch_files_only,
+ history_file = options.history_file,
+ verbose_abbreviated_path=options.verbose_abbreviated_path,
+ **appropriate_options)
+ return True
+
+
diff --git a/ruffus/combinatorics.py b/ruffus/combinatorics.py
new file mode 100644
index 0000000..4365f0d
--- /dev/null
+++ b/ruffus/combinatorics.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+################################################################################
+#
+#
+# combinatorics.py
+#
+# Copyright (c) 2013 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+
+********************************************
+:mod:`ruffus.combinatorics` -- Overview
+********************************************
+
+.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk>
+
+
+ #
+ # @product
+ #
+ from ruffus import *
+
+ @product(
+ "/a_directory/dataset1.*.bam",
+ formatter( ".*/dataset1\.(?P<ID>\d+).bam" ),
+ "/a_different/directory/dataset2.*.bam",
+ formatter(".species", ".*/dataset2\.(?P<ID>\d+).bam" ),
+ "{path[0][0]}/{base_name[0][0]}.{base_name[0][0]}.out",
+ "{path[0][0]}", # extra: path for 1st input, 1st file
+ "{path[1][0]}", # extra: path for 2nd input, 1st file
+ "{basename[0][1]}", # extra: file name for 1st input, 2nd file
+ "{ID[1][1]}", # extra: regular expression named capture group for 2nd input, 2nd file
+ )
+ def task1( infiles, outfile,
+ input_1__path,
+ input_2__path,
+ input_1__2nd_file_name,
+ input_2__3rd_file_match
+ ):
+ print infiles
+ print outfile
+ print input_1__path
+ print input_2__path
+ print input_1__2nd_file_name
+ print input_2__3rd_file_match
+"""
+
+from .task import task_decorator
+
+
+#
+# Combinatoric generators:
+#
+class product(task_decorator):
+ pass
+
+class permutations(task_decorator):
+ pass
+
+class combinations(task_decorator):
+ pass
+
+class combinations_with_replacement(task_decorator):
+ pass
diff --git a/ruffus/dbdict.py b/ruffus/dbdict.py
new file mode 100644
index 0000000..7c25119
--- /dev/null
+++ b/ruffus/dbdict.py
@@ -0,0 +1,459 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+'''
+A dictionary-like object with SQLite backend
+============================================
+
+Python dictionaries are very efficient objects for fast data access. But when
+data is too large to fit in memory, you want to keep data on disk but available
+for fast random access.
+
+Here's a dictionary-like object which uses a SQLite database backend for random
+access to the dictionary's key-value pairs.
+
+Use it like a standard dictionary, except that you give it a name
+(eg.'tempdict'):
+
+ import dbdict
+ d = dbdict.open('tempdict')
+ d['foo'] = 'bar'
+ # At this point, the key value pair foo and bar is written to disk.
+ d['John'] = 'doh!'
+ d['pi'] = 3.999
+ d['pi'] = 3.14159 # replaces the previous version of pi
+ d['pi'] += 1
+ d.close() # close the database file
+
+You can access your dictionary later on:
+
+ d = dbdict.open('tempdict')
+ del d['foo']
+
+ if 'John' in d:
+ print 'John is in there !'
+ print d.items()
+
+For efficient inserting/updating a list of key-value pairs, use the update()
+method:
+
+ d.update([('f1', 'test'), ('f2', 'example')])
+ d.update({'f1':'test', 'f2':'example'})
+ d.update(f1='test', f2='example')
+
+Use get() method to most efficiently get a number of items as specified by a
+lis of keys:
+
+ d.get(['f1', 'f2'])
+
+Use remove() method to most efficiently remove a number of items as specified
+by a list of keys:
+
+ d.remove(['f1', 'f2'])
+
+There is also an alternative (fully equivalent) way to instanstiate a dbdict
+object by the call:
+
+ from dbdict import dbdict
+ d = dbdict('tempdict')
+
+Make a memory-based (ie not filed based) SQLite database by the call:
+
+ dbdict(':memory:')
+
+Other special functionality as compared to dict:
+
+ d.clear() Clear all items (and free up unused disk space)
+ d.reindex() Delete and recreate the key index
+ d.vacuum() Free up unused disk space
+ d.con Access to the underlying SQLite connection (for advanced use)
+
+Some things to note:
+
+ - You can't directly store Python objects. Only numbers, strings and binary
+ data. Objects need to be serialized first in order to be stored. Use e.g.
+ pickle, json (or simplejson) or yaml for that purpose.
+
+ - Explicit database connection closing using the close() method is not
+ required. Changes are written on key-value assignment to the dictionary.
+ The file stays open until the object is destroyed or the close() method is
+ called.
+
+
+ Original code by Jacob Sondergaard
+ hg clone ssh://hg@bitbucket.org/nephics/dbdict
+
+ Modified to pickle automatically
+'''
+
+__version__ = '1.3.1'
+
+import sqlite3
+try:
+ # MutableMapping is new in Python 2.6+
+ from collections import MutableMapping
+except ImportError:
+ # DictMixin will be (or is?) deprecated in the Python 3.x series
+ from UserDict import DictMixin as MutableMapping
+from os import path
+try:
+ import cPickle as pickle
+except ImportError:
+ import pickle
+import itertools
+import sys
+
+class DbDict(MutableMapping):
+ ''' DbDict, a dictionary-like object with SQLite back-end '''
+
+ def __init__(self, filename, picklevalues=False):
+ self.picklevalues = picklevalues
+ if filename == ':memory:' or not path.isfile(filename):
+ self.con = sqlite3.connect(filename)
+ self._create_table()
+ else:
+ self.con = sqlite3.connect(filename)
+
+
+ #_____________________________________________________________________________________
+
+
+ # Add automatic pickling and unpickling
+
+ def pickle_loads (self, value):
+ """
+ pickle.load if specified
+ """
+ if self.picklevalues:
+ value = pickle.loads(bytes(value))
+ return value
+ def pickle_dumps (self, value):
+ """
+ pickle.load if specified
+ """
+ if self.picklevalues:
+ #
+ # Protocol = 0 generates ASCII 7 bit strings and is less efficient
+ # Protocol = -1 generates ASCII 8 bit strings which need to be handled as
+ # blobs by sqlite3.
+ #
+ # Unfortunately, sqlite3 only understands memoryview objects in python3 and
+ # buffer objects in python2
+ #
+ # http://bugs.python.org/issue7723 suggests there is no portable
+ # python2/3 way to write blobs to Sqlite
+ #
+ # However, sqlite3.Binary seems to do the trick
+ #
+ # Otherwise, to use protocol -1, we need to use the following code:
+ #
+ #if sys.hexversion >= 0x03000000:
+ # value = memoryview(pickle.dumps(value, protocol = -1))
+ #else:
+ # value = buffer(pickle.dumps(value, protocol = -1))
+ #
+ value = sqlite3.Binary(pickle.dumps(value, protocol = -1))
+ return value
+ #_____________________________________________________________________________________
+
+ def _create_table(self):
+ '''Creates an SQLite table 'data' with the columns 'key' and 'value'
+ where column 'key' is the table's primary key.
+
+ Note: SQLite automatically creates an unique index for the 'key' column.
+ The index may get fragmented with lots of insertions/updates/deletions
+ therefore it is recommended to use reindex() when searches becomes
+ gradually slower.
+ '''
+ self.con.execute('create table data (key PRIMARY KEY,value)')
+ self.con.commit()
+
+ def __getitem__(self, key):
+ '''Return value for specified key'''
+ row = self.con.execute('select value from data where key=?',
+ (key, )).fetchone()
+ if not row:
+ raise KeyError(key)
+ return self.pickle_loads(row[0])
+
+ def __setitem__(self, key, value):
+ '''Set value at specified key'''
+ value = self.pickle_dumps(value)
+ self.con.execute('insert or replace into data (key, value) '
+ 'values (?,?)', (key, value))
+ self.con.commit()
+
+ def __delitem__(self, key):
+ '''Delete item (key-value pair) at specified key'''
+ if key in self:
+ self.con.execute('delete from data where key=?',(key, ))
+ self.con.commit()
+ else:
+ raise KeyError
+
+ def __iter__(self):
+ '''Return iterator over keys'''
+ return self._iterquery(self.con.execute('select key from data'),
+ single_value=True)
+
+ def __len__(self):
+ '''Return the number of stored items'''
+ cursor = self.con.execute('select count() from data')
+ return cursor.fetchone()[0]
+
+ @staticmethod
+ def _iterquery(cursor, single_value=False):
+ '''Return iterator over query result with pre-fetching of items in
+ set sizes determined by SQLite backend'''
+ rows = True
+ while rows:
+ rows = cursor.fetchmany()
+ for row in rows:
+ if single_value:
+ yield row[0]
+ else:
+ yield row
+
+ def iterkeys(self):
+ '''Return iterator of all keys in the database'''
+ return self.__iter__()
+
+ def itervalues(self):
+ '''Return iterator of all values in the database'''
+ it = self._iterquery(self.con.execute('select value from data'),
+ single_value=True)
+ return iter(self.pickle_loads(x) for x in it)
+
+ def iteritems(self):
+ '''Return iterator of all key-value pairs in the database'''
+ it = self._iterquery(self.con.execute('select key, value from data'))
+ return iter( (x[0], self.pickle_loads(x[1])) for x in it)
+
+ def keys(self):
+ '''Return all keys in the database'''
+ return [row[0]
+ for row in self.con.execute('select key from data').fetchall()]
+
+ def items(self):
+ '''Return all key-value pairs in the database'''
+ values = self.con.execute('select key, value from data').fetchall()
+ return [(x[0], self.pickle_loads(x[1])) for x in values]
+
+ def clear(self):
+ '''Clear the database for all key-value pairs, and free up unsused
+ disk space.
+ '''
+ self.con.execute('drop table data')
+ self.vacuum()
+ self._create_table()
+
+ def _update(self, items):
+ '''Perform the SQL query of updating items (list of key-value pairs)'''
+ items = [(k, self.pickle_dumps(v)) for k,v in items]
+ self.con.executemany('insert or replace into data (key, value)'
+ ' values (?, ?)', items)
+ self.con.commit()
+
+ def update(self, items=None, **kwds):
+ '''Updates key-value pairs in the database.
+
+ Items (key-value pairs) may be given by keyword assignments or using
+ the parameter 'items' a dict or list/tuple of items.
+ '''
+ if isinstance(items, dict):
+ self._update(list(items.items()))
+ elif isinstance(items, list) or isinstance(items, tuple):
+ self._update(items)
+ elif items:
+ # probably a generator
+ try:
+ self._update(list(items))
+ except TypeError:
+ raise ValueError('Could not interpret value of parameter `items` as a dict, list/tuple or iterator.')
+
+ if kwds:
+ self._update(list(kwds.items()))
+
+ def popitem(self):
+ '''Pop a key-value pair from the database. Returns the next key-value
+ pair which is then removed from the database.'''
+ res = self.con.execute('select key, value from data').fetchone()
+ if res:
+ key, value = res
+ else:
+ raise StopIteration
+ del self[key]
+ value = self.pickle_loads(value)
+ return key, value
+
+ def close(self):
+ '''Close database connection'''
+ self.con.close()
+
+ def vacuum(self):
+ '''Free unused disk space from the database file.
+
+ The operation has no effect if database is in memory.
+
+ Note: The operation can take some time to run (around a half second per
+ megabyte on the Linux box where SQLite is developed) and it can use up
+ to twice as much temporary disk space as the original file while it is
+ running.
+ '''
+ self.con.execute('vacuum')
+ self.con.commit()
+
+ def get(self, keys):
+ '''Get item(s) for the specified key or list of keys.
+
+ Items will be returned only for those keys that are defined. The
+ function will pass silently (i.e. not raise an error) if one or more of
+ the keys is not defined.'''
+ try:
+ keys = tuple(keys)
+ except TypeError:
+ # probably a single key (ie not an iterable)
+ keys = (keys,)
+ values = self.con.execute('select key, value from data where key in '
+ '%s' % (keys,)).fetchall()
+ return [(k, self.pickle_loads(v)) for k,v in values]
+
+ def remove(self, keys):
+ '''Removes item(s) for the specified key or list of keys.
+
+ The function will pass silently (i.e. not raise an error) if one or more
+ of the keys is not defined.'''
+ try:
+ keys = tuple(keys)
+ except TypeError:
+ # probably a single key (ie not an iterable)
+ keys = (keys,)
+ self.con.execute('delete from data where key in %s' % (keys,))
+ self.con.commit()
+
+ def reindex(self):
+ '''Delete and recreate key index.
+
+ Use this function if key lookup time becomes slower. This may happen as
+ the index will become fragmented with lots of
+ insertions/updates/deletions.'''
+ self.con.execute('reindex sqlite_autoindex_data_1')
+ self.con.commit()
+
+def dbdict(filename, picklevalues=False):
+ '''Open a persistent dictionary for reading and writing.
+
+ The filename parameter is the base filename for the underlying
+ database. If filename is ':memory:' the database is created in
+ memory.
+
+ See the module's __doc__ string for an overview of the interface.
+ '''
+ return DbDict(filename, picklevalues)
+
+def open(filename, picklevalues=False):
+ '''Open a persistent dictionary for reading and writing.
+
+ The filename parameter is the base filename for the underlying
+ database. If filename is ':memory:' the database is created in
+ memory.
+
+ See the module's __doc__ string for an overview of the interface.
+ '''
+ return DbDict(filename, picklevalues)
+
+if __name__ == '__main__':
+
+ # Perform some tests
+
+ d = open(':memory:')
+
+ d[1] = 'test'
+ assert d[1] == 'test'
+
+ d[1] += '1'
+ assert d[1] == 'test1'
+
+ try:
+ assert d[2], 'Lookup did not fail on non-existent key'
+ except KeyError:
+ pass
+
+ # test len
+ assert len(d) == 1, 'Failed to count number of items'
+
+ # test clear
+ d.clear()
+ assert len(d) == 0, 'Database not cleared as expected'
+
+ # test with list of items as (key, value) pairs
+ range10 = list(range(10))
+ items = [(i, i) for i in range10]
+ d.update(items)
+ assert list(d.items()) == items, 'Failed to update using list'
+ d.clear()
+
+ # test with tuple of items as (key, value) pairs
+ d.update(tuple(items))
+ assert list(d.items()) == items, 'Failed to update using tuple'
+ d.clear()
+
+ # test with dict
+ d.update(dict(items))
+ assert list(d.items()) == items
+ d.clear()
+
+ # test with generator
+ d.update((i, i) for i in range10)
+ assert list(d.items()) == items, 'Failed to update using generator'
+
+ # check the std. dict methods
+ assert list(d.keys()) == range10
+ assert list(d.values()) == range10
+ assert list(d.items()) == items
+ #assert list(d.iterkeys()) == range10
+ #assert list(d.itervalues()) == range10
+ #assert list(d.iteritems()) == items
+
+ # test get
+ assert d.get(list(range(8,12))) == items[-2:]
+
+ # test remove
+ d.remove(list(range(8,10)))
+ assert len(d.get(list(range(8,10)))) == 0, 'Items not removed successfully'
+
+ d.clear()
+
+ # test with key,value pairs as parameters
+ d.update(foo=1, bar=2)
+ assert list(d.items()) == [('foo', 1), ('bar', 2)], \
+ 'keyword assignment not successful'
+
+ # test popitem
+ while True:
+ try:
+ value = d.popitem()
+ assert value in [('foo', 1), ('bar', 2)], \
+ 'Popitem not in expected result set'
+ except StopIteration:
+ break
+
+ # test setdefault
+ d.setdefault(10, 10)
+ assert d[10] == 10, 'Failed to set default value'
+
+ # test vacuum call (no assert)
+ d.reindex()
+
+ # test vacuum call (no assert, and call has no effect on an in memory db)
+ d.vacuum()
+
+ # test close call (assert is given reading from closed database)
+ d.close()
+
+ # try reading from a closed database
+ try:
+ d[1] = 1
+ raise AssertionError('Database not closed')
+ except sqlite3.ProgrammingError:
+ pass
diff --git a/ruffus/drmaa_wrapper.py b/ruffus/drmaa_wrapper.py
new file mode 100644
index 0000000..007fd34
--- /dev/null
+++ b/ruffus/drmaa_wrapper.py
@@ -0,0 +1,462 @@
+#!/usr/bin/env python
+################################################################################
+#
+#
+# drmaa_wrapper.py
+#
+# Copyright (C) 2013 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+
+********************************************
+:mod:`ruffus.cmdline` -- Overview
+********************************************
+
+.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk>
+
+
+ #
+ # Using drmaa
+ #
+ from ruffus import *
+ import drmaa_wrapper
+
+"""
+
+import sys, os
+import stat
+#
+# tempfile for drmaa scripts
+#
+import tempfile
+import datetime
+import subprocess
+import time
+
+if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ path_str_type = str
+else:
+ path_str_type = basestring
+
+
+#_________________________________________________________________________________________
+
+# error_drmaa_job
+
+#_________________________________________________________________________________________
+class error_drmaa_job(Exception):
+ """
+ All exceptions throw in this module
+ """
+ def __init__(self, *errmsg):
+ Exception.__init__(self, *errmsg)
+
+
+
+#_________________________________________________________________________________________
+
+# read_stdout_stderr_from_files
+
+#_________________________________________________________________________________________
+def read_stdout_stderr_from_files( stdout_path, stderr_path, logger = None, cmd_str = "", tries=5):
+ '''
+ Reads the contents of two specified paths and returns the strings
+
+ Thanks to paranoia approach contributed by Andreas Heger:
+
+ Retry just in case file system hasn't committed.
+
+ Logs error if files are missing: No big deal?
+
+ Cleans up files afterwards
+
+ Returns tuple of stdout and stderr.
+
+ '''
+ #
+ # delay up to 10 seconds until files are ready
+ #
+ for xxx in range(tries):
+ if os.path.exists( stdout_path ) and os.path.exists( stderr_path ):
+ break
+ time.sleep(2)
+
+ try:
+ stdout = open( stdout_path, "r" ).readlines()
+ except IOError:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ msg = str(exceptionValue)
+ if logger:
+ logger.warn( "could not open stdout: %s for \n%s" % (msg, cmd_str))
+ stdout = []
+
+ try:
+ stderr = open( stderr_path, "r" ).readlines()
+ except IOError:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ msg = str(exceptionValue)
+ if logger:
+ logger.warn( "could not open stderr: %s for \n%s" % (msg, cmd_str))
+ stderr = []
+
+ #
+ # cleanup ignoring errors
+ #
+ try:
+ os.unlink( stdout_path )
+ os.unlink( stderr_path )
+ except OSError:
+ pass
+
+ return stdout, stderr
+
+
+#_________________________________________________________________________________________
+
+# setup_drmaa_job
+
+#_________________________________________________________________________________________
+def setup_drmaa_job( drmaa_session, job_name, job_environment, working_directory, job_other_options):
+
+ job_template = drmaa_session.createJobTemplate()
+
+ if not working_directory:
+ job_template.workingDirectory = os.getcwd()
+ else:
+ job_template.workingDirectory = working_directory
+ if job_environment:
+ # dictionary e.g. { 'BASH_ENV' : '~/.bashrc' }
+ job_template.jobEnvironment = job_environment
+ job_template.args = []
+ if job_name:
+ job_template.jobName = job_name
+ else:
+ # nameless jobs sometimes breaks drmaa implementations...
+ job_template.jobName = "ruffus_job_" + "_".join(map(str, datetime.datetime.now().timetuple()[0:6]))
+
+ #
+ # optional job parameters
+ #
+ job_template.nativeSpecification = job_other_options
+
+ # separate stdout and stderr
+ job_template.joinFiles=False
+
+ return job_template
+
+#_________________________________________________________________________________________
+
+# write_job_script_to_temp_file
+
+#_________________________________________________________________________________________
+def write_job_script_to_temp_file( cmd_str, job_script_directory, job_name, job_other_options, job_environment, working_directory):
+ '''
+ returns (job_script_path, stdout_path, stderr_path)
+
+ '''
+ import sys
+ time_stmp_str = "_".join(map(str, datetime.datetime.now().timetuple()[0:6]))
+ # create script directory if necessary
+ # Ignore errors rather than test for existence to avoid race conditions
+ try:
+ os.makedirs(job_script_directory)
+ except:
+ pass
+ tmpfile = tempfile.NamedTemporaryFile(mode='w+b', prefix='drmaa_script_' + time_stmp_str + "__", dir = job_script_directory, delete = False)
+
+ #
+ # hopefully #!/bin/sh is universally portable among unix-like operating systems
+ #
+ tmpfile.write( "#!/bin/sh\n" )
+ #
+ # log parameters as suggested by Bernie Pope
+ #
+ for title, parameter in ( ("job_name", job_name, ),
+ ("job_other_options", job_other_options,),
+ ("job_environment", job_environment, ),
+ ("working_directory", working_directory), ):
+ if parameter:
+ tmpfile.write( "#%s=%s\n" % (title, parameter))
+
+ tmpfile.write( cmd_str + "\n" )
+ tmpfile.close()
+
+ job_script_path = os.path.abspath( tmpfile.name )
+ stdout_path = job_script_path + ".stdout"
+ stderr_path = job_script_path + ".stderr"
+
+ os.chmod( job_script_path, stat.S_IRWXG | stat.S_IRWXU )
+
+ return (job_script_path, stdout_path, stderr_path)
+
+
+
+#_________________________________________________________________________________________
+
+# run_job_using_drmaa
+
+#_________________________________________________________________________________________
+def run_job_using_drmaa (cmd_str, job_name = None, job_other_options = "", job_script_directory = None, job_environment = None, working_directory = None, retain_job_scripts = False, logger = None, drmaa_session = None, verbose = False):
+
+ """
+ Runs specified command remotely using drmaa,
+ either with the specified session, or the module shared drmaa session
+ """
+ import drmaa
+
+ #
+ # used specified session else module session
+ #
+ if drmaa_session == None:
+ raise error_drmaa_job( "Please specify a drmaa_session in run_job()")
+
+ #
+ # make job template
+ #
+ job_template = setup_drmaa_job( drmaa_session, job_name, job_environment, working_directory, job_other_options)
+
+ #
+ # make job script
+ #
+ if not job_script_directory:
+ job_script_directory = os.getcwd()
+ job_script_path, stdout_path, stderr_path = write_job_script_to_temp_file( cmd_str, job_script_directory, job_name, job_other_options, job_environment, working_directory)
+ job_template.remoteCommand = job_script_path
+ # drmaa paths specified as [hostname]:file_path.
+ # See http://www.ogf.org/Public_Comment_Docs/Documents/2007-12/ggf-drmaa-idl-binding-v1%2000%20RC7.pdf
+ job_template.outputPath = ":" + stdout_path
+ job_template.errorPath = ":" + stderr_path
+
+
+ #
+ # Run job and wait
+ #
+ jobid = drmaa_session.runJob(job_template)
+ if logger:
+ logger.debug( "job has been submitted with jobid %s" % str(jobid ))
+
+ try:
+ job_info = drmaa_session.wait(jobid, drmaa.Session.TIMEOUT_WAIT_FOREVER)
+ except Exception:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ msg = str(exceptionValue)
+ # ignore message 24 in PBS
+ # code 24: drmaa: Job finished but resource usage information and/or termination status could not be provided.":
+ if not msg.message.startswith("code 24"): raise
+ if logger:
+ logger.log(MESSAGE, "Warning %s\n"
+ "The original command was:\n%s\njobid=jobid\n"
+ (msg.message, cmd_str,jobid) )
+ job_info = None
+
+
+ #
+ # Read output
+ #
+ stdout, stderr = read_stdout_stderr_from_files( stdout_path, stderr_path, logger, cmd_str)
+
+
+ job_info_str = ("The original command was: >> %s <<\n"
+ "The jobid was: %s\n"
+ "The job script name was: %s\n" %
+ (cmd_str,
+ jobid,
+ job_script_path))
+ if stderr:
+ job_info_str += "The stderr was: \n%s\n\n" % ("".join( stderr))
+ if stdout:
+ job_info_str += "The stdout was: \n%s\n\n" % ("".join( stdout))
+
+ #
+ # Throw if failed
+ #
+ if job_info:
+ job_info_str += "Resources used: %s " % (job_info.resourceUsage)
+ if job_info.hasExited:
+ if job_info.exitStatus:
+ raise error_drmaa_job( "The drmaa command was terminated by signal %i:\n%s"
+ % (job_info.exitStatus, job_info_str))
+ #
+ # Decorate normal exit with some resource usage information
+ #
+ elif verbose:
+ def nice_mem_str(num):
+ """
+ Format memory sizes
+ http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
+ """
+ num = float(num)
+ for x in ['bytes','KB','MB','GB']:
+ if num < 1024.0:
+ return "%3.1f%s" % (num, x)
+ num /= 1024.0
+ return "%3.1f%s" % (num, 'TB')
+
+ try:
+ resource_usage_str = []
+ if 'maxvmem' in job_info.resourceUsage:
+ if 'mem' in job_info.resourceUsage:
+ resource_usage_str.append("Mem=%s(%s)" % (nice_mem_str(job_info.resourceUsage['maxvmem']), job_info.resourceUsage['mem']))
+ else:
+ resource_usage_str.append("Mem=%s" % nice_mem_str(job_info.resourceUsage['maxvmem']))
+ if 'ru_wallclock' in job_info.resourceUsage:
+ resource_usage_str.append("CPU wallclock= %.2gs" % float(job_info.resourceUsage['ru_wallclock']))
+ if len(resource_usage_str):
+ logger.info("Drmaa command used %s in running %s" % (", ".join(resource_usage_str), cmd_str))
+ else:
+ logger.info("Drmaa command successfully ran %s" % cmd_str)
+ except:
+ logger.info("Drmaa command used %s in running %s" % (job_info.resourceUsage, cmd_str))
+ elif job_info.wasAborted:
+ raise error_drmaa_job( "The drmaa command was never ran but used %s:\n%s"
+ % (job_info.resourceUsage, job_info_str))
+ elif job_info.hasSignal:
+ raise error_drmaa_job( "The drmaa command was terminated by signal %i:\n%s"
+ % (job_info.terminatingSignal, job_info_str))
+
+ #
+ # clean up job template
+ #
+ drmaa_session.deleteJobTemplate(job_template)
+
+ #
+ # Cleanup job script unless retain_job_scripts is set
+ #
+ if retain_job_scripts:
+ # job scripts have the jobid as an extension
+ os.rename(job_script_path, job_script_path + ".%s" % jobid )
+ else:
+ try:
+ os.unlink( job_script_path )
+ except OSError:
+ if logger:
+ logger.warn( "Temporary job script wrapper '%s' missing (and ignored) at clean-up" % job_script_path )
+
+ return stdout, stderr
+
+
+#_________________________________________________________________________________________
+
+# run_job_locally
+
+#_________________________________________________________________________________________
+def run_job_locally (cmd_str, logger = None):
+ """
+ Runs specified command locally instead of drmaa
+ """
+ process = subprocess.Popen( cmd_str,
+ cwd = os.getcwd(),
+ shell = True,
+ stdin = subprocess.PIPE,
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE )
+
+ # process.stdin.close()
+ stdout, stderr = process.communicate()
+
+ if process.returncode != 0:
+ raise error_drmaa_job( "The locally run command was terminated by signal %i:\n"
+ "The original command was:\n%s\n"
+ "The stderr was: \n%s\n\n"
+ "The stdout was: \n%s\n\n" %
+ (-process.returncode, cmd_str, "".join( stderr), "".join( stdout)) )
+
+ return stdout.splitlines(True), stderr.splitlines(True)
+
+
+#_________________________________________________________________________________________
+
+# touch_output_files
+
+#_________________________________________________________________________________________
+def touch_output_files (cmd_str, output_files, logger = None):
+ """
+ Touches output files instead of actually running the command string
+ """
+
+ if not output_files or not len(output_files):
+ if logger:
+ logger.debug("No output files to 'touch' for command:\n%s")
+ return
+
+ # make sure is list
+ ltypes=(list, tuple)
+ if not isinstance(output_files, ltypes):
+ output_files = [output_files]
+ else:
+ output_files = list(output_files)
+
+ #
+ # flatten list of file names
+ # from http://rightfootin.blogspot.co.uk/2006/09/more-on-python-flatten.html
+ #
+ i = 0
+ while i < len(output_files):
+ while isinstance(output_files[i], ltypes):
+ if not output_files[i]:
+ output_files.pop(i)
+ i -= 1
+ break
+ else:
+ output_files[i:i + 1] = output_files[i]
+ i += 1
+
+
+ for f in output_files:
+ # ignore non strings
+ if not isinstance (f, path_str_type):
+ continue
+
+ # create file
+ if not os.path.exists(f):
+ # this should be guaranteed to close the new file immediately?
+ with open(f, 'w') as p: pass
+
+ # touch existing file
+ else:
+ os.utime(f, None)
+
+
+
+
+#_________________________________________________________________________________________
+
+# run_job
+
+#_________________________________________________________________________________________
+def run_job(cmd_str, job_name = None, job_other_options = None, job_script_directory = None,
+ job_environment = None, working_directory = None, logger = None,
+ drmaa_session = None, retain_job_scripts = False,
+ run_locally = False, output_files = None, touch_only = False, verbose = False):
+ """
+ Runs specified command either using drmaa, or locally or only in simulation (touch the output files only)
+ """
+
+ if touch_only:
+ touch_output_files (cmd_str, output_files, logger)
+ return "","",
+
+ if run_locally:
+ return run_job_locally (cmd_str, logger)
+
+
+ return run_job_using_drmaa (cmd_str, job_name, job_other_options, job_script_directory, job_environment, working_directory, retain_job_scripts, logger, drmaa_session, verbose)
diff --git a/ruffus/file_name_parameters.py b/ruffus/file_name_parameters.py
new file mode 100644
index 0000000..6902997
--- /dev/null
+++ b/ruffus/file_name_parameters.py
@@ -0,0 +1,1399 @@
+#!/usr/bin/env python
+from __future__ import print_function
+################################################################################
+#
+# file_name_parameters
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+
+"""
+
+********************************************
+:mod:`file_name_parameters` -- Overview
+********************************************
+
+
+.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk>
+
+ Handles file names for ruffus
+
+
+"""
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import os,copy
+import re,sys
+import glob
+from operator import itemgetter
+from itertools import groupby
+import itertools
+#from itertools import product
+#from itertools import permutations
+#from itertools import combinations
+#from itertools import combinations_with_replacement
+
+from collections import defaultdict
+from time import strftime, gmtime
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0,".")
+if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ path_str_type = str
+else:
+ path_str_type = basestring
+
+from .ruffus_exceptions import *
+#from file_name_parameters import *
+from .ruffus_utility import *
+
+from . import dbdict
+
+class t_extra_inputs:
+ (ADD_TO_INPUTS, REPLACE_INPUTS, KEEP_INPUTS) = list(range(3))
+
+class t_combinatorics_type:
+ ( COMBINATORICS_PRODUCT, COMBINATORICS_PERMUTATIONS,
+ COMBINATORICS_COMBINATIONS, COMBINATORICS_COMBINATIONS_WITH_REPLACEMENT) = list(range(4))
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import re
+
+#_________________________________________________________________________________________
+
+# get_readable_path_str
+
+#_________________________________________________________________________________________
+def get_readable_path_str(original_path, max_len):
+ """
+ Truncates path to max_len characters if necessary
+ If the result is a path within nested directory, will remove partially
+ truncated directories names
+ """
+ if len(original_path) < max_len:
+ return original_path
+ truncated_name = original_path[-(max_len - 5):]
+ if "/" not in truncated_name:
+ return "[...]" + truncated_name
+ return "[...]" + re.sub("^[^/]+", "", truncated_name)
+
+
+
+#_________________________________________________________________________________________
+
+# epoch_seconds_to_str
+
+#_________________________________________________________________________________________
+def epoch_seconds_to_str (epoch_seconds):
+ """
+ Converts seconds since epoch into nice string with date and time to 2 significant
+ digits for seconds
+ """
+ # returns 24 char long 25 May 2011 23:37:40.12
+ time_str = strftime("%d %b %Y %H:%M:%S", gmtime(epoch_seconds))
+ #
+ fraction_of_second_as_str = ("%.2f" % (epoch_seconds - int(epoch_seconds)))[1:]
+ # or fraction = ("%.2f" % (divmod(epoch_seconds, 1)[1]))[1:]
+ return (time_str + fraction_of_second_as_str)
+
+
+err_msg_no_regex_match = ("No jobs were run because no files names matched. "
+ "Please make sure that the regular expression is correctly specified.")
+err_msg_empty_files_parameter= ("@files() was empty, i.e. no files were specified. "
+ "Please make sure this is by design.")
+
+
+#_________________________________________________________________________________________
+
+# t_file_names_transform
+
+#_________________________________________________________________________________________
+class t_file_names_transform(object):
+ """
+ Does the work for generating output / "extra input" / "extra" filenames
+ input
+ - a set of file names (derived from tasks, globs, hard coded file names)
+ - a specification (e.g. a new suffix, a regular expression substitution pattern)
+ output
+ - a new file name
+
+ N.B. Is this level of abstraction adequate?
+ 1) On one hand, this is a simple extension of the current working design
+ 2) On the other, we throw away the nested structure of tasks / globs on one hand
+ and the nested structure of the outputs on the other hand.
+ """
+ def substitute (self, starting_file_names, pattern):
+ pass
+
+ # overriden only in t_suffix_file_names_transform
+ # only suffix() behaves differently for output and extra files...
+ def substitute_output_files (self, starting_file_names, pattern):
+ return self.substitute (starting_file_names, pattern)
+
+
+class t_suffix_file_names_transform(t_file_names_transform):
+ """
+ Does the work for generating output / "extra input" / "extra" filenames
+ replacing a specified suffix
+ """
+ def __init__ (self, enclosing_task, suffix_object, error_type, descriptor_string):
+ self.matching_regex = compile_suffix(enclosing_task, suffix_object, error_type, descriptor_string)
+ self.matching_regex_str = suffix_object.args[0]
+
+ def substitute (self, starting_file_names, pattern):
+ return regex_replace(starting_file_names[0], self.matching_regex_str, self.matching_regex, pattern)
+
+ def substitute_output_files (self, starting_file_names, pattern):
+ return regex_replace(starting_file_names[0], self.matching_regex_str, self.matching_regex, pattern, SUFFIX_SUBSTITUTE)
+
+
+class t_regex_file_names_transform(t_file_names_transform):
+ """
+ Does the work for generating output / "extra input" / "extra" filenames
+ replacing a specified regular expression
+ """
+ def __init__ (self, enclosing_task, regex_object, error_type, descriptor_string):
+ self.matching_regex = compile_regex(enclosing_task, regex_object, error_type, descriptor_string)
+ self.matching_regex_str = regex_object.args[0]
+
+ def substitute (self, starting_file_names, pattern):
+ return regex_replace(starting_file_names[0], self.matching_regex_str, self.matching_regex, pattern)
+
+
+
+class t_formatter_file_names_transform(t_file_names_transform):
+ """
+ Does the work for generating output / "extra input" / "extra" filenames
+ replacing a specified regular expression
+ """
+ def __init__ (self, enclosing_task, format_object, error_type, descriptor_string):
+ self.matching_regexes = []
+ self.matching_regex_strs = []
+ if len(format_object.args):
+ self.matching_regexes = compile_formatter(enclosing_task, format_object, error_type, descriptor_string)
+ self.matching_regex_strs = list(format_object.args)
+
+ def substitute (self, starting_file_names, pattern):
+ # note: uses all file names
+ return formatter_replace (starting_file_names, self.matching_regex_strs, self.matching_regexes, pattern)
+
+
+class t_nested_formatter_file_names_transform(t_file_names_transform):
+ """
+ Does the work for generating output / "extra input" / "extra" filenames
+ apply a whole series of regular expresions to a whole series of input
+ """
+ def __init__ (self, enclosing_task, format_objects, error_type, descriptor_string):
+ self.list_matching_regex = []
+ self.list_matching_regex_str= []
+
+ for format_object in format_objects:
+ if len(format_object.args):
+ self.list_matching_regex.append(compile_formatter(enclosing_task, format_object, error_type, descriptor_string))
+ self.list_matching_regex_str.append(list(format_object.args))
+ else:
+ self.list_matching_regex.append([])
+ self.list_matching_regex_str.append([])
+
+ def substitute (self, starting_file_names, pattern):
+ # note: uses all file names
+ return nested_formatter_replace (starting_file_names, self.list_matching_regex_str, self.list_matching_regex, pattern)
+
+
+#_________________________________________________________________________________________
+
+# t_params_tasks_globs_run_time_data
+
+#_________________________________________________________________________________________
+class t_params_tasks_globs_run_time_data(object):
+ """
+ After parameters are parsed into tasks, globs, runtime data
+ """
+ def __init__ (self, params, tasks, globs, runtime_data_names):
+ self.params = params
+ self.tasks = tasks
+ self.globs = globs
+ self.runtime_data_names = runtime_data_names
+
+ def __str__ (self):
+ return str(self.params)
+
+ def param_iter (self):
+ for p in self.params:
+ yield t_params_tasks_globs_run_time_data(p, self.tasks, self.globs,
+ self.runtime_data_names)
+
+
+ def unexpanded_globs (self):
+ """
+ do not expand globs
+ """
+ return t_params_tasks_globs_run_time_data(self.params, self.tasks, [],
+ self.runtime_data_names)
+
+
+ def single_file_to_list (self):
+ """
+ if parameter is a simple string, wrap that in a list unless it is glob
+ Useful for simple @transform cases
+ """
+ if isinstance(self.params, path_str_type) and not is_glob(self.params):
+ self.params = [self.params]
+ return True
+ return False
+
+
+ def file_names_transformed (self, filenames, file_names_transform):
+ """
+ return clone with the filenames / globs transformed by the supplied transform object
+ """
+ output_glob = file_names_transform.substitute(filenames, self.globs)
+ output_param = file_names_transform.substitute(filenames, self.params)
+ return t_params_tasks_globs_run_time_data(output_param, self.tasks, output_glob,
+ self.runtime_data_names)
+
+ #
+ # deprecated
+ #
+ def regex_replaced (self, filename, regex, regex_or_suffix = REGEX_SUBSTITUTE):
+ output_glob = regex_replace(filename, regex, self.globs, regex_or_suffix)
+ output_param = regex_replace(filename, regex, self.params, regex_or_suffix)
+ return t_params_tasks_globs_run_time_data(output_param, self.tasks, output_glob,
+ self.runtime_data_names)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# needs_update_func
+
+# functions which are called to see if a job needs to be updated
+#
+# Each task is a series of parallel jobs
+# each of which has the following pseudo-code
+#
+# for param in param_generator_func():
+# if needs_update_func(*param):
+# job_wrapper(*param)
+#
+# N.B. param_generator_func yields iterators of *sequences*
+# if you are generating single parameters, turn them into lists:
+#
+# for a in alist:
+# yield (a,)
+#
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#_________________________________________________________________________________________
+
+# needs_update_check_directory_missing
+
+# N.B. throws exception if this is an ordinary file, not a directory
+
+
+#_________________________________________________________________________________________
+def needs_update_check_directory_missing (*params, **kwargs):
+ """
+ Called per directory:
+ Does it exist?
+ Is it an ordinary file not a directory? (throw exception
+ """
+ if len(params) == 1:
+ dirs = params[0]
+ elif len(params) == 2:
+ dirs = params[1]
+ else:
+ raise Exception("Wrong number of arguments in mkdir check %s" % (params,))
+
+ missing_directories = []
+ for d in get_strings_in_nested_sequence(dirs):
+ #print >>sys.stderr, "check directory missing %d " % os.path.exists(d) # DEBUG
+ if not os.path.exists(d):
+ missing_directories.append(d)
+ continue
+ #return True, "Directory [%s] is missing" % d
+ if not os.path.isdir(d):
+ raise error_not_a_directory("%s already exists but as a file, not a directory" % d )
+
+ if len(missing_directories):
+ if len(missing_directories) > 1:
+ return True, "Directories [%s] are missing" % (", ".join(missing_directories))
+ else:
+ return True, "Directories [%s] is missing" % (missing_directories[0])
+ return False, "All directories exist"
+
+#_________________________________________________________________________________________
+
+# check_input_files_exist
+
+#_________________________________________________________________________________________
+def check_input_files_exist (*params):
+ """
+ If inputs are missing then there is no way a job can run successful.
+ Must throw exception.
+ This extra function is a hack to make sure input files exists right before
+ job is called for better error messages, and to save things from blowing
+ up inside the task function
+ """
+ if len(params):
+ input_files = params[0]
+ for f in get_strings_in_nested_sequence(input_files):
+ if not os.path.exists(f):
+ if os.path.lexists(f):
+ raise MissingInputFileError("No way to run job: "+
+ "Input file '%s' is a broken symbolic link." % f)
+ else:
+ raise MissingInputFileError("No way to run job: "+
+ "Input file '%s' does not exist" % f)
+
+
+#_________________________________________________________________________________________
+
+# needs_update_check_exist
+
+#_________________________________________________________________________________________
+def needs_update_check_exist (*params, **kwargs):
+ """
+ Given input and output files, see if all exist
+ Each can be
+
+ #. string: assumed to be a filename "file1"
+ #. any other type
+ #. arbitrary nested sequence of (1) and (2)
+
+ """
+ if "verbose_abbreviated_path" in kwargs:
+ verbose_abbreviated_path = kwargs["verbose_abbreviated_path"]
+ else:
+ verbose_abbreviated_path = -55
+
+ # missing output means build
+ if len(params) < 2:
+ return True, "i/o files not specified"
+
+
+ i, o = params[0:2]
+ i = get_strings_in_nested_sequence(i)
+ o = get_strings_in_nested_sequence(o)
+
+ #
+ # build: missing output file
+ #
+ if len(o) == 0:
+ return True, "Missing output file"
+
+ # missing input / output file means always build
+ missing_files = []
+ for io in (i, o):
+ for p in io:
+ if not os.path.exists(p):
+ missing_files.append(p)
+ if len(missing_files):
+ return True, "Missing file%s\n%s" % ("s" if len(missing_files) > 1 else "",
+ shorten_filenames_encoder (missing_files,
+ verbose_abbreviated_path))
+
+ #
+ # missing input -> build only if output absent
+ #
+ if len(i) == 0:
+ return False, "Missing input files"
+
+
+ return False, "Up to date"
+
+
+
+#_________________________________________________________________________________________
+
+# needs_update_check_modify_time
+
+#_________________________________________________________________________________________
+def needs_update_check_modify_time (*params, **kwargs):
+ """
+ Given input and output files, see if all exist and whether output files are later than input files
+ Each can be
+
+ #. string: assumed to be a filename "file1"
+ #. any other type
+ #. arbitrary nested sequence of (1) and (2)
+
+ """
+ # conditions for rerunning a job:
+ # 1. forced to rerun entire taskset
+ # 2. 1+ Output files don't exist
+ # 3. 1+ of input files is newer than 1+ output files -- ruffus does this level right now...
+ # 4. internal completion time for that file is out of date # incomplete runs will be rerun automatically
+ # 5. checksum of code that ran the file is out of date # changes to function body result in rerun
+ # 6. checksum of the args that ran the file are out of date # appropriate config file changes result in rerun
+ try:
+ task = kwargs['task']
+ except KeyError:
+ # allow the task not to be specified and fall back to classic
+ # file timestamp behavior (either this or fix all the test cases,
+ # which often don't have proper tasks)
+ class Namespace:
+ pass
+ task = Namespace()
+ task.checksum_level = CHECKSUM_FILE_TIMESTAMPS
+
+ if "verbose_abbreviated_path" in kwargs:
+ verbose_abbreviated_path = kwargs["verbose_abbreviated_path"]
+ else:
+ verbose_abbreviated_path = -55
+
+ try:
+ job_history = kwargs['job_history']
+ except KeyError:
+ # allow job_history not to be specified and reopen dbdict file redundantly...
+ # Either this or fix all the test cases
+ #job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)
+ print("Oops: Should only appear in test code", file=sys.stderr)
+ job_history = open_job_history (None)
+
+
+ # missing output means build
+ if len(params) < 2:
+ return True, ""
+
+ i, o = params[0:2]
+ i = get_strings_in_nested_sequence(i)
+ o = get_strings_in_nested_sequence(o)
+
+ #
+ # build: missing output file
+ #
+ if len(o) == 0:
+ return True, "Missing output file"
+
+ # missing input / output file means always build
+ missing_files = []
+ for io in (i, o):
+ for p in io:
+ if not os.path.exists(p):
+ missing_files.append(p)
+ if len(missing_files):
+ return True, "Missing file%s\n %s" % ("s" if len(missing_files) > 1 else "",
+ shorten_filenames_encoder (missing_files,
+ verbose_abbreviated_path))
+
+ # existing files, but from previous interrupted runs
+ if task.checksum_level >= CHECKSUM_HISTORY_TIMESTAMPS:
+ incomplete_files = []
+ func_changed_files = []
+ param_changed_files = []
+ #for io in (i, o):
+ # for p in io:
+ # if p not in job_history:
+ # incomplete_files.append(p)
+ for p in o:
+ if os.path.relpath(p) not in job_history:
+ incomplete_files.append(p)
+ if len(incomplete_files):
+ return True, "Previous incomplete run leftover%s:\n %s" % ("s" if len(incomplete_files) > 1 else "",
+ shorten_filenames_encoder (incomplete_files,
+ verbose_abbreviated_path))
+ # check if function that generated our output file has changed
+ for o_f_n in o:
+ rel_o_f_n = os.path.relpath(o_f_n)
+ old_chksum = job_history[rel_o_f_n]
+ new_chksum = JobHistoryChecksum(rel_o_f_n, None, params[2:], task)
+ if task.checksum_level >= CHECKSUM_FUNCTIONS_AND_PARAMS and \
+ new_chksum.chksum_params != old_chksum.chksum_params:
+ param_changed_files.append(rel_o_f_n)
+ elif task.checksum_level >= CHECKSUM_FUNCTIONS and \
+ new_chksum.chksum_func != old_chksum.chksum_func:
+ func_changed_files.append(rel_o_f_n)
+
+ if len(func_changed_files):
+ return True, "Pipeline function has changed:\n %s" % (shorten_filenames_encoder (func_changed_files,
+ verbose_abbreviated_path))
+ if len(param_changed_files):
+ return True, "Pipeline parameters have changed:\n %s" % (shorten_filenames_encoder (param_changed_files,
+ verbose_abbreviated_path))
+
+ #
+ # missing input -> build only if output absent or function is out of date
+ #
+ if len(i) == 0:
+ return False, "Missing input files"
+
+
+ #
+ # get sorted modified times for all input and output files
+ #
+ filename_to_times = [[], []]
+ file_times = [[], []]
+
+
+
+ #_____________________________________________________________________________________
+
+ # pretty_io_with_date_times
+
+ #_____________________________________________________________________________________
+ def pretty_io_with_date_times (filename_to_times):
+
+ # sort
+ for io in range(2) :
+ filename_to_times[io].sort()
+
+
+ #
+ # add asterisk for all files which are causing this job to be out of date
+ #
+ file_name_to_asterisk = dict()
+ oldest_output_mtime = filename_to_times[1][0][0]
+ for mtime, file_name in filename_to_times[0]:
+ file_name_to_asterisk[file_name] = "*" if mtime >= oldest_output_mtime else " "
+ newest_output_mtime = filename_to_times[0][-1][0]
+ for mtime, file_name in filename_to_times[1]:
+ file_name_to_asterisk[file_name] = "*" if mtime <= newest_output_mtime else " "
+
+
+
+ #
+ # try to fit in 100 - 15 = 85 char lines
+ # date time ~ 25 characters so limit file name to 55 characters
+ #
+ msg = "\n"
+ category_names = "Input", "Output"
+ for io in range(2):
+ msg += " %s files:\n" % category_names[io]
+ for mtime, file_name in filename_to_times[io]:
+ file_datetime_str = epoch_seconds_to_str(mtime)
+ msg += (" " + # indent
+ file_name_to_asterisk[file_name] + " " + # asterisked out of date files
+ file_datetime_str + ": " + # date time of file
+ shorten_filenames_encoder (file_name,
+ verbose_abbreviated_path) + "\n") # file name truncated to 55
+ return msg
+
+
+ #
+ # Ignore output file if it is found in the list of input files
+ # By definition they have the same timestamp,
+ # and the job will otherwise appear to be out of date
+ #
+ # Symbolic links followed
+ real_input_file_names = set()
+ for input_file_name in i:
+ rel_input_file_name = os.path.relpath(input_file_name)
+ real_input_file_names.add(os.path.realpath(input_file_name))
+ file_timestamp = os.path.getmtime(input_file_name)
+ if task.checksum_level >= CHECKSUM_HISTORY_TIMESTAMPS and rel_input_file_name in job_history:
+ old_chksum = job_history[rel_input_file_name]
+ mtime = max(file_timestamp, old_chksum.mtime)
+ else:
+ mtime = file_timestamp
+ filename_to_times[0].append((mtime, input_file_name))
+ file_times[0].append(mtime)
+
+
+ # for output files, we need to check modification time *in addition* to
+ # function and argument checksums...
+ for output_file_name in o:
+ rel_output_file_name = os.path.relpath(output_file_name)
+ real_file_name = os.path.realpath(output_file_name)
+ file_timestamp = os.path.getmtime(output_file_name)
+ if task.checksum_level >= CHECKSUM_HISTORY_TIMESTAMPS:
+ old_chksum = job_history[rel_output_file_name]
+ if old_chksum.mtime > file_timestamp and old_chksum.mtime - file_timestamp > 1.1:
+ mtime = file_timestamp
+ # use check sum time in preference if both are within one second
+ # (suggesting higher resolution
+ else:
+ mtime = old_chksum.mtime
+ else:
+ mtime = file_timestamp
+ if real_file_name not in real_input_file_names:
+ file_times[1].append(mtime)
+ filename_to_times[1].append((mtime, output_file_name))
+
+
+ #
+ # Debug: Force print modified file names and times
+ #
+ #if len(file_times[0]) and len (file_times[1]):
+ # print >>sys.stderr, pretty_io_with_date_times(filename_to_times), file_times, (max(file_times[0]) >= min(file_times[1]))
+ #else:
+ # print >>sys.stderr, i, o
+
+ #
+ # update if any input file >= (more recent) output file
+ #
+ if len(file_times[0]) and len (file_times[1]) and max(file_times[0]) >= min(file_times[1]):
+ return True, pretty_io_with_date_times(filename_to_times)
+ return False, "Up to date"
+
+
+#_________________________________________________________________________________________
+#
+# is_file_re_combining
+#
+#_________________________________________________________________________________________
+def is_file_re_combining (old_args):
+ """
+ Helper function for @files_re
+ check if parameters wrapped in combine
+ """
+ combining_all_jobs = False
+ orig_args = []
+ for arg in old_args:
+ if isinstance(arg, combine):
+ combining_all_jobs = True
+ if len(arg.args) == 1:
+ orig_args.append(arg.args[0])
+ else:
+ orig_args.append(arg[0].args)
+ else:
+ orig_args.append(arg)
+ return combining_all_jobs, orig_args
+
+
+#_________________________________________________________________________________________
+
+# file_names_from_tasks_globs
+
+#_________________________________________________________________________________________
+def file_names_from_tasks_globs(files_task_globs,
+ runtime_data, do_not_expand_single_job_tasks = False):
+ """
+ Replaces glob specifications and tasks with actual files / task output
+ """
+
+ #
+ # N.B. get_output_files() should never have the flattened flag == True
+ # do that later in get_strings_in_nested_sequence
+ #
+
+ # special handling for chaining tasks which conceptual have a single job
+ # i.e. @merge and @files/@parallel with single job parameters
+ if files_task_globs.params.__class__.__name__ == '_task' and do_not_expand_single_job_tasks:
+ return files_task_globs.params.get_output_files(True, runtime_data)
+
+
+ task_or_glob_to_files = dict()
+
+ # look up globs and tasks
+ for g in files_task_globs.globs:
+ # check whether still is glob pattern after transform
+ # {} are particularly suspicious...
+ if is_glob(g):
+ task_or_glob_to_files[g] = sorted(glob.glob(g))
+ for t in files_task_globs.tasks:
+ of = t.get_output_files(False, runtime_data)
+ task_or_glob_to_files[t] = of
+ for n in files_task_globs.runtime_data_names:
+ data_name = n.args[0]
+ if data_name in runtime_data:
+ task_or_glob_to_files[n] = runtime_data[data_name]
+ else:
+ raise error_missing_runtime_parameter("The inputs of this task depends on " +
+ "the runtime parameter " +
+ "'%s' which is missing " % data_name)
+
+
+
+ return expand_nested_tasks_or_globs(files_task_globs.params, task_or_glob_to_files)
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# param_factories
+
+# makes python generators which yield parameters for
+#
+# A) needs_update_func
+# B) job_wrapper
+
+# Each task is a series of parallel jobs
+# each of which has the following pseudo-code
+#
+# for param in param_generator_func():
+# if needs_update_func(*param):
+# act_func(*param)
+#
+# Test Usage:
+#
+#
+# param_func = xxx_factory(tasks, globs, orig_input_params, ...)
+#
+# for params in param_func():
+# i, o = params[0:1]
+# print " input_params = " , i
+# print "output = " , o
+#
+#
+#
+#
+#
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#_________________________________________________________________________________________
+
+# touch_file_factory
+
+#_________________________________________________________________________________________
+def touch_file_factory (orig_args, register_cleanup):
+ """
+ Creates function, which when called, will touch files
+ """
+ file_names = orig_args
+ # accepts unicode
+ if isinstance (orig_args, path_str_type):
+ file_names = [orig_args]
+ else:
+ # make copy so when original is modifies, we don't get confused!
+ file_names = list(orig_args)
+
+ def do_touch_file ():
+ for f in file_names:
+ if not os.path.exists(f):
+ with open(f, 'w') as ff:
+ pass
+ else:
+ os.utime(f, None)
+ register_cleanup(f, "touch")
+ return do_touch_file
+
+
+#_________________________________________________________________________________________
+
+# file_param_factory
+
+# orig_args = ["input", "output", 1, 2, ...]
+# orig_args = [
+# ["input0", "output0", 1, 2, ...] # job 1
+# [["input1a", "input1b"], "output1", 1, 2, ...] # job 2
+# ["input2", ["output2a", "output2b"], 1, 2, ...] # job 3
+# ["input3", "output3", 1, 2, ...] # job 4
+# ]
+#
+#_________________________________________________________________________________________
+def args_param_factory (orig_args):
+ """
+ Factory for functions which
+ yield tuples of inputs, outputs / extras
+
+ ..Note::
+
+ 1. Each job requires input/output file names
+ 2. Input/output file names can be a string, an arbitrarily nested sequence
+ 3. Non-string types are ignored
+ 3. Either Input or output file name must contain at least one string
+
+ """
+ def iterator(runtime_data):
+ for job_param in orig_args:
+ yield job_param, job_param
+ return iterator
+
+#_________________________________________________________________________________________
+
+# file_param_factory
+
+# orig_args = ["input", "output", 1, 2, ...]
+# orig_args = [
+# ["input0", "output0", 1, 2, ...] # job 1
+# [["input1a", "input1b"], "output1", 1, 2, ...] # job 2
+# ["input2", ["output2a", "output2b"], 1, 2, ...] # job 3
+# ["input3", "output3", 1, 2, ...] # job 4
+# ]
+#
+#_________________________________________________________________________________________
+def files_param_factory (input_files_task_globs, flatten_input,
+ do_not_expand_single_job_tasks, output_extras):
+ """
+ Factory for functions which
+ yield tuples of inputs, outputs / extras
+
+ ..Note::
+
+ 1. Each job requires input/output file names
+ 2. Input/output file names can be a string, an arbitrarily nested sequence
+ 3. Non-string types are ignored
+ 3. Either Input or output file name must contain at least one string
+
+ """
+ def iterator(runtime_data):
+ # substitute inputs
+ #input_params = file_names_from_tasks_globs(input_files_task_globs, runtime_data, False)
+
+ if input_files_task_globs.params == []:
+ if "ruffus_WARNING" not in runtime_data:
+ runtime_data["ruffus_WARNING"] = defaultdict(set)
+ runtime_data["ruffus_WARNING"][iterator].add(err_msg_empty_files_parameter)
+ return
+
+ for input_spec, output_extra_param in zip(input_files_task_globs.param_iter(), output_extras):
+ input_param = file_names_from_tasks_globs(input_spec, runtime_data, do_not_expand_single_job_tasks)
+ if flatten_input:
+ yield_param = (get_strings_in_nested_sequence(input_param),) + output_extra_param
+ else:
+ yield_param = (input_param, ) + output_extra_param
+ yield yield_param, yield_param
+ return iterator
+
+def files_custom_generator_param_factory (generator):
+ """
+ Factory for @files taking custom generators
+ wraps so that the generator swallows the extra runtime_data argument
+
+ """
+ def iterator(runtime_data):
+ for params in generator():
+ yield params, params
+ return iterator
+
+#_________________________________________________________________________________________
+
+# split_param_factory
+
+#_________________________________________________________________________________________
+def split_param_factory (input_files_task_globs, output_files_task_globs, *extra_params):
+ """
+ Factory for task_split
+ """
+ def iterator(runtime_data):
+ # flattened = False
+ # do_not_expand_single_job_tasks = True
+
+ #
+ # substitute tasks / globs at runtime. No glob subsitution for logging
+ #
+ input_param = file_names_from_tasks_globs(input_files_task_globs, runtime_data, True)
+ output_param = file_names_from_tasks_globs(output_files_task_globs, runtime_data)
+ output_param_logging = file_names_from_tasks_globs(output_files_task_globs.unexpanded_globs(), runtime_data)
+
+ yield (input_param, output_param) + extra_params, (input_param, output_param_logging) + extra_params
+
+
+
+ return iterator
+
+
+
+#_________________________________________________________________________________________
+
+# input_param_to_file_name_list
+
+#_________________________________________________________________________________________
+def input_param_to_file_name_list (input_params):
+ """
+ Common function for
+ collate_param_factory
+ transform_param_factory
+ subdivide_param_factory
+ Creates adapter object
+ Converts (on the fly) collection / iterator of input params
+ ==> generator of flat list of strings (file_names)
+ """
+ for per_job_input_param in input_params:
+ flattened_list_of_file_names = get_strings_in_nested_sequence(per_job_input_param)
+ yield per_job_input_param, flattened_list_of_file_names
+
+
+#_________________________________________________________________________________________
+
+# input_param_to_file_name_list
+
+#_________________________________________________________________________________________
+def list_input_param_to_file_name_list (input_params):
+ """
+ Common function for
+ product_param_factory
+ Creates adapter object
+ Converts (on the fly) collection / iterator of nested (input params)
+ ==> generator of flat list of strings (file_names)
+ """
+ for per_job_input_param_list in input_params:
+ list_of_flattened_list_of_file_names = [ get_strings_in_nested_sequence(ii) for ii in per_job_input_param_list]
+ yield per_job_input_param_list, list_of_flattened_list_of_file_names
+
+
+#_________________________________________________________________________________________
+
+# yield_io_params_per_job
+
+#_________________________________________________________________________________________
+def yield_io_params_per_job (input_params,
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ extra_specs,
+ runtime_data,
+ iterator,
+ expand_globs_in_output = False):
+ """
+ Helper function for
+ transform_param_factory and
+ collate_param_factory and
+ subdivide_param_factory
+
+
+ *********************************************************
+ * *
+ * Bad (non-orthogonal) design here. Needs refactoring *
+ * *
+ *********************************************************
+
+ subdivide_param_factory requires globs patterns to be expanded
+
+ yield (function call parameters, display parameters)
+
+ all others
+
+ yield function call parameters
+
+
+ This means that
+
+ all but @subdivide have
+
+ for y in yield_io_params_per_job (...):
+ yield y, y
+
+ subdivide_param_factory has:
+
+ return yield_io_params_per_job
+
+ We would make everything more orthogonal but the current code makes collate easier to write...
+
+ collate_param_factory
+
+ for output_extra_params, grouped_params in groupby(sorted(io_params_iter, key = get_output_extras), key = get_output_extras):
+
+
+
+
+ """
+ #
+ # Add extra warning if no regular expressions match:
+ # This is a common class of frustrating errors
+ #
+ no_regular_expression_matches = True
+
+ for orig_input_param, filenames in input_params:
+
+
+ try:
+
+ #
+ # Should run job even if there are no file names, so long as there are input parameters...??
+ #
+ # if not orig_input_param:
+ if not filenames:
+ continue
+
+ #
+ # extra input has a mixture of input and output parameter behaviours:
+ # 1) If it contains tasks, the files from these are passed through unchanged
+ # 2) If it contains strings which look like strings,
+ # these are transformed using regular expression, file component substitution etc.
+ # just like output params
+ #
+ # So we do (2) first, ignoring tasks, then (1)
+ if extra_input_files_task_globs != None:
+
+ extra_inputs = extra_input_files_task_globs.file_names_transformed (filenames, file_names_transform)
+
+ #
+ # add or replace existing input parameters
+ #
+ if replace_inputs == t_extra_inputs.REPLACE_INPUTS:
+ input_param = file_names_from_tasks_globs(extra_inputs, runtime_data)
+ elif replace_inputs == t_extra_inputs.ADD_TO_INPUTS:
+ input_param = (orig_input_param,) + file_names_from_tasks_globs(extra_inputs, runtime_data)
+ else:
+ input_param = orig_input_param
+
+
+
+ # extras
+ extra_params = tuple( file_names_transform.substitute(filenames, p) for p in extra_specs)
+
+
+ if expand_globs_in_output:
+ #
+ # do regex substitution to complete glob pattern
+ # before glob matching
+ #
+ output_pattern_transformed = output_pattern.file_names_transformed (filenames, file_names_transform)
+ output_param = file_names_from_tasks_globs(output_pattern_transformed, runtime_data)
+ output_param_logging= file_names_from_tasks_globs(output_pattern_transformed.unexpanded_globs(), runtime_data)
+ yield ( (input_param, output_param ) + extra_params,
+ (input_param, output_param_logging ) + extra_params)
+ else:
+
+ # output
+ output_param = file_names_transform.substitute_output_files(filenames, output_pattern)
+ yield (input_param, output_param) + extra_params
+
+ no_regular_expression_matches = False
+
+ # match failures are ignored
+ except error_input_file_does_not_match:
+ if runtime_data != None:
+ if not "MATCH_FAILURE" in runtime_data:
+ runtime_data["MATCH_FAILURE"] = []
+ runtime_data["MATCH_FAILURE"].append(str(sys.exc_info()[1]).replace("\n", "").strip())
+ continue
+
+ # all other exceptions including malformed regexes are raised
+ except Exception:
+ #print sys.exc_info()
+ raise
+
+
+ #
+ # Add extra warning if no regular expressions match:
+ # This is a common class of frustrating errors
+ #
+ if no_regular_expression_matches == True:
+ if runtime_data != None:
+ if "ruffus_WARNING" not in runtime_data:
+ runtime_data["ruffus_WARNING"] = defaultdict(set)
+ runtime_data["ruffus_WARNING"][iterator].add(err_msg_no_regex_match)
+
+
+
+#_________________________________________________________________________________________
+
+# subdivide_param_factory
+
+#_________________________________________________________________________________________
+def subdivide_param_factory (input_files_task_globs,
+ flatten_input,
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_files_task_globs,
+ *extra_specs):
+ """
+ Factory for task_split (advanced form)
+ """
+ def iterator(runtime_data):
+
+ #
+ # Convert input file names, globs, and tasks -> a list of (nested) file names
+ # Each element of the list corresponds to the input parameters of a single job
+ #
+ input_params = file_names_from_tasks_globs(input_files_task_globs, runtime_data)
+
+ if flatten_input:
+ input_params = get_strings_in_nested_sequence(input_params)
+
+ if not len(input_params):
+ return []
+
+ return yield_io_params_per_job (input_param_to_file_name_list(sorted(input_params, key = lambda x: str(x))),
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_files_task_globs,
+ extra_specs,
+ runtime_data,
+ iterator,
+ True)
+
+ return iterator
+
+
+
+
+#_________________________________________________________________________________________
+
+# combinatorics_param_factory
+
+#_________________________________________________________________________________________
+def combinatorics_param_factory(input_files_task_globs,
+ flatten_input,
+ combinatorics_type,
+ k_tuple,
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ *extra_specs):
+ """
+ Factory for task_combinations_with_replacement, task_combinations, task_permutations
+ """
+ def iterator(runtime_data):
+
+ #
+ # Convert input file names, globs, and tasks -> a list of (nested) file names
+ # Each element of the list corresponds to the input parameters of a single job
+ #
+ input_params = file_names_from_tasks_globs(input_files_task_globs, runtime_data)
+
+ if not len(input_params):
+ return
+
+ if flatten_input:
+ input_params = get_strings_in_nested_sequence(input_params)
+
+ if combinatorics_type == t_combinatorics_type.COMBINATORICS_PERMUTATIONS:
+ combinatoric_iter = itertools.permutations(input_params, k_tuple)
+ elif combinatorics_type == t_combinatorics_type.COMBINATORICS_COMBINATIONS:
+ combinatoric_iter = itertools.combinations(input_params, k_tuple)
+ elif combinatorics_type == t_combinatorics_type.COMBINATORICS_COMBINATIONS_WITH_REPLACEMENT:
+ combinatoric_iter = itertools.combinations_with_replacement(input_params, k_tuple)
+ else:
+ raise Exception("Unknown combinatorics type %d" % combinatorics_type)
+
+ for y in yield_io_params_per_job (list_input_param_to_file_name_list(combinatoric_iter),
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ extra_specs,
+ runtime_data,
+ iterator):
+ yield y, y
+
+ return iterator
+
+
+#_________________________________________________________________________________________
+
+# product_param_factory
+
+#_________________________________________________________________________________________
+def product_param_factory ( list_input_files_task_globs,
+ flatten_input,
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ *extra_specs):
+ """
+ Factory for task_product
+ """
+ def iterator(runtime_data):
+
+ #
+ # Convert input file names, globs, and tasks -> a list of (nested) file names
+ # Each element of the list corresponds to the input parameters of a single job
+ #
+ input_params_list = [ file_names_from_tasks_globs(ftg, runtime_data) for ftg in list_input_files_task_globs]
+
+ #
+ # ignore if empty list in any of all versus all
+ #
+ if not len(input_params_list):
+ return
+
+ for input_params in input_params_list:
+ if not len(input_params):
+ return
+
+ if flatten_input:
+ input_params_list = [get_strings_in_nested_sequence(ii) for ii in input_params_list]
+
+ for y in yield_io_params_per_job (list_input_param_to_file_name_list(itertools.product(*input_params_list)),
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ extra_specs,
+ runtime_data,
+ iterator):
+ yield y, y
+
+ return iterator
+
+
+
+#_________________________________________________________________________________________
+
+# transform_param_factory
+
+#_________________________________________________________________________________________
+def transform_param_factory (input_files_task_globs,
+ flatten_input,
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ *extra_specs):
+ """
+ Factory for task_transform
+ """
+ def iterator(runtime_data):
+
+ #
+ # Convert input file names, globs, and tasks -> a list of (nested) file names
+ # Each element of the list corresponds to the input parameters of a single job
+ #
+ input_params = file_names_from_tasks_globs(input_files_task_globs, runtime_data)
+
+ if flatten_input:
+ input_params = get_strings_in_nested_sequence(input_params)
+
+ if not len(input_params):
+ return
+
+
+ for y in yield_io_params_per_job (input_param_to_file_name_list(sorted(input_params, key = lambda x: str(x))),
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ extra_specs,
+ runtime_data,
+ iterator):
+ yield y, y
+
+ return iterator
+
+
+#_________________________________________________________________________________________
+
+# collate_param_factory
+
+#_________________________________________________________________________________________
+def collate_param_factory (input_files_task_globs,
+ flatten_input,
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ *extra_specs):
+ """
+ Factory for task_collate
+
+ Looks exactly like @transform except that all [input] which lead to the same [output / extra] are combined together
+ """
+ #
+ def iterator(runtime_data):
+
+ #
+ # Convert input file names, globs, and tasks -> a list of (nested) file names
+ # Each element of the list corresponds to the input parameters of a single job
+ #
+ input_params = file_names_from_tasks_globs(input_files_task_globs, runtime_data)
+
+ if flatten_input:
+ input_params = get_strings_in_nested_sequence(input_params)
+
+ if not len(input_params):
+ return
+
+ io_params_iter = yield_io_params_per_job( input_param_to_file_name_list(sorted(input_params, key = lambda x: str(x))),
+ file_names_transform,
+ extra_input_files_task_globs,
+ replace_inputs,
+ output_pattern,
+ extra_specs,
+ runtime_data,
+ iterator)
+
+ #
+ # group job params if their output/extra params are identical
+ #
+ # sort by first converted to string, and then grouped itself
+ # identical things must be adjacent and sorting by strings guarantees that
+ get_output_extras = lambda x: x[1:]
+ get_output_extras_str = lambda x: str(x[1:])
+ for output_extra_params, grouped_params in groupby(sorted(io_params_iter, key = get_output_extras_str), key = get_output_extras):
+ #
+ # yield the different input params grouped into a tuple, followed by all the common params
+ # i.e. (input1, input2, input3), common_output, common_extra1, common_extra2...
+ #
+
+ # Use group by to avoid successive duplicate input_param (remember we have sorted)
+ # This works even with unhashable items!
+
+ params = (tuple(input_param for input_param, ignore in
+ groupby(g[0] for g in grouped_params)),) + output_extra_params
+
+
+ # the same params twice, once for use, once for display, identical in this case
+ yield params, params
+
+ return iterator
+
+
+#_________________________________________________________________________________________
+
+# merge_param_factory
+
+#_________________________________________________________________________________________
+def merge_param_factory (input_files_task_globs,
+ output_param,
+ *extra_params):
+ """
+ Factory for task_merge
+ """
+ #
+ def iterator(runtime_data):
+ # flattened = False
+ # do_not_expand_single_job_tasks = True
+ input_param = file_names_from_tasks_globs(input_files_task_globs, runtime_data, True)
+ yield_param = (input_param, output_param) + extra_params
+ yield yield_param, yield_param
+
+ return iterator
+
+
+#_________________________________________________________________________________________
+
+# originate_param_factory
+
+#_________________________________________________________________________________________
+def originate_param_factory (list_output_files_task_globs, extras):
+ """
+ Factory for task_originate
+ """
+ #
+ def iterator(runtime_data):
+ for output_files_task_globs in list_output_files_task_globs:
+ output_param = file_names_from_tasks_globs(output_files_task_globs, runtime_data)
+ output_param_logging = file_names_from_tasks_globs(output_files_task_globs.unexpanded_globs(), runtime_data)
+ yield (None, output_param) + tuple(extras), (None, output_param_logging) + tuple(extras)
+
+ return iterator
+
diff --git a/ruffus/graph.py b/ruffus/graph.py
new file mode 100644
index 0000000..1f61e63
--- /dev/null
+++ b/ruffus/graph.py
@@ -0,0 +1,1151 @@
+#!/usr/bin/env python
+from __future__ import print_function
+################################################################################
+#
+# graph.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+ graph.py
+
+ provides support for diacyclic graph
+ with topological_sort
+
+"""
+import sys, re, os
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+from collections import defaultdict
+from itertools import chain
+from .print_dependencies import *
+import tempfile
+import subprocess
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# class node
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+class graph_error(Exception):
+ def __init__(self, message):
+ self.message = message
+ def __str__ (self):
+ return self.message
+
+class error_duplicate_node_name(graph_error):
+ pass
+
+class node (object):
+ """
+ node
+ designed for diacyclic graphs but can hold anything
+ contains lists of nodes and
+ dictionary to look up node name from node
+ """
+
+ _all_nodes = list()
+ _name_to_node = dict()
+ _global_node_index = 0
+
+ one_to_one = 0
+ many_to_many = 1
+ one_to_many = 2
+ many_to_one = 3
+
+ @staticmethod
+ def get_leaves ():
+ for n in node._all_nodes:
+ if len(n._inward) == 0:
+ yield n
+
+ @staticmethod
+ def get_roots ():
+ for n in node._all_nodes:
+ if len(n._outward) == 0:
+ yield n
+
+
+ @staticmethod
+ def count_nodes ():
+ return len(_all_nodes)
+
+ @staticmethod
+ def dump_tree_as_str ():
+ """
+ dumps entire tree
+ """
+ return ("%d nodes " % node.count_nodes()) + "\n" + \
+ "\n".join([x.fullstr() for x in node._all_nodes])
+
+
+ @staticmethod
+ def lookup_node_from_name (name):
+ return node._name_to_node[name]
+
+ @staticmethod
+ def is_node (name):
+ return name in node._name_to_node
+
+
+ #_____________________________________________________________________________________
+
+ # init
+
+ #_____________________________________________________________________________________
+ def __init__ (self, name, **args):
+ """
+ each node has
+ _name
+ _inward : lists of incoming edges
+ _outward: lists of outgoing edges
+ """
+ #
+ # make sure node name is unique
+ #
+ if name in node._name_to_node:
+ raise error_duplicate_node_name("[%s] has already been added" % name)
+
+ self.__dict__.update(args)
+ self._inward = list()
+ self._outward= list()
+ self.args = args
+ self._name = name
+ self._signal = False
+ self._node_index = node._global_node_index
+ node._global_node_index += 1
+
+ #
+ # for looking up node for name
+ #
+ node._all_nodes.append(self)
+ node._name_to_node[name] = self
+
+ #_____________________________________________________________________________________
+
+ # add_child
+
+ #_____________________________________________________________________________________
+ def add_child(self, child, no_duplicates = True):
+ """
+ connect edges
+ """
+ # do not add duplicates
+ if no_duplicates and child in self._outward:
+ return child
+
+ self._outward.append(child)
+ child._inward.append(self)
+ return child
+
+ #_____________________________________________________________________________________
+
+ # inward/outward
+
+ #_____________________________________________________________________________________
+ def outward (self):
+ """
+ just in case we need to return inward when we mean outward!
+ (for reversed graphs)
+ """
+ return self._outward
+
+ def inward (self):
+ """
+ just in case we need to return inward when we mean outward!
+ (for reversed graphs)
+ """
+ return self._inward
+
+
+ #_____________________________________________________________________________________
+
+ # fullstr
+
+ #_____________________________________________________________________________________
+ def fullstr(self):
+ """
+ Full dump. Normally edges are not printed out
+ Everything is indented except name
+ """
+ self_desc = list()
+ for k,v in sorted(iter(self.__dict__.items()), key = lambda x_v: (0,x_v[0],x_v[1]) if x_v[0] == "_name" else (1,x_v[0],x_v[1])):
+ indent = " " if k != "_name" else ""
+ if k in ("_inward", "_outward"):
+ v = ",".join([x._name for x in v])
+ self_desc.append(indent + str(k) + "=" + str(v))
+ else:
+ self_desc.append(indent + str(k) + "=" + str(v))
+ return "\n".join(self_desc)
+
+ #_____________________________________________________________________________________
+
+ # __str__
+
+ #_____________________________________________________________________________________
+ def __str__ (self):
+ """
+ Dump.
+ Print everything except lists of edges
+ Everything is indented except name
+ """
+ self_desc = list()
+ for k,v in sorted(self.__dict__.items(), reverse=True):
+ indent = " " if k != "_name" else ""
+ if k[0] == '_':
+ continue
+ else:
+ self_desc.append(indent + str(k) + "=" + str(v))
+ return "\n".join(self_desc)
+
+
+
+ #_____________________________________________________________________________________
+
+ # signal
+ #
+ #_____________________________________________________________________________________
+ def signal (self, extra_data_for_signal = None):
+ """
+ Signals whether depth first search ends without this node
+ """
+ return self._signal
+
+
+
+
+
+
+#_____________________________________________________________________________________
+
+# node_to_json
+#
+#
+#_____________________________________________________________________________________
+class node_to_json(json.JSONEncoder):
+ """
+ output node using json
+ """
+ def default(self, obj):
+ print(str(obj))
+ if isinstance(obj, node):
+ return obj._name, {
+ "index": obj._node_index,
+ "signal": obj._signal,
+ "inward": [n._name for n in obj._inward],
+ "outward": [n._name for n in obj._outward],
+ }
+ return json.JSONEncoder.default(self, obj)
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# topological_sort_visitor
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+class topological_sort_visitor (object):
+ """
+ topological sort
+ used with DFS to find all nodes in topologically sorted order
+ All finds all DAG breaking cycles
+
+ """
+
+ IGNORE_NODE_SIGNAL = 0
+ NOTE_NODE_SIGNAL = 1
+ END_ON_SIGNAL = 2
+
+ #_____________________________________________________________________________________
+
+ # init
+
+ #_____________________________________________________________________________________
+ def __init__ (self, forced_dfs_nodes,
+ node_termination = END_ON_SIGNAL,
+ extra_data_for_signal = None):
+ """
+ list of saved results
+ """
+ self._forced_dfs_nodes = set(forced_dfs_nodes)
+ self._node_termination = node_termination
+
+ self._start_nodes = set()
+ self._back_edges = set()
+ self._back_nodes = set()
+ self._signalling_nodes = set()
+
+ # keep order for tree traversal later
+ self._examined_edges = list()
+ # keep order for topological sorted results
+ self._finished_nodes = list()
+
+ self._extra_data_for_signal = extra_data_for_signal
+
+
+ def combine_with (self, other):
+ """
+ combine the results of two visitors
+ (add other to self)
+ """
+ self._back_edges .update(other._back_edges)
+ self._back_nodes .update(other._back_nodes)
+ extra_finished_nodes = set(other._finished_nodes) - set(self._finished_nodes)
+ self._finished_nodes .extend(extra_finished_nodes)
+
+
+
+ #_____________________________________________________________________________________
+
+ # __str__
+
+ #_____________________________________________________________________________________
+ def __str__ (self):
+ """
+ for diagnostics
+ """
+ signalling_str = get_nodes_str ("Signalling", self._signalling_nodes)
+ finished_str = get_nodes_str ("Finished", self._finished_nodes)
+ forced_str = get_nodes_str ("Forced to run", self._forced_dfs_nodes)
+ start_str = get_nodes_str ("Start", self._start_nodes)
+ back_edges_str = get_edges_str ("back", self._back_edges)
+ return (""
+ + finished_str
+ + start_str
+ + back_edges_str
+ + signalling_str
+ + finished_str
+ )
+
+ #_____________________________________________________________________________________
+
+ # not_dag
+
+ #_____________________________________________________________________________________
+ def not_dag (self):
+ """
+ back edges add circularity
+ """
+ return len(self._back_edges)
+
+ #_____________________________________________________________________________________
+
+ # dag_violating_edges
+
+ #_____________________________________________________________________________________
+ def dag_violating_edges (self):
+ """
+ back edges add circularity
+ """
+ return self._back_edges
+
+
+
+ #_____________________________________________________________________________________
+
+ # dag_violating_nodes
+
+ #_____________________________________________________________________________________
+ def dag_violating_nodes (self):
+ """
+ all nodes involved in cycless
+ """
+ return self._back_nodes
+
+ #_____________________________________________________________________________________
+
+ # identify_dag_violating_nodes_and_edges
+ #
+ #_____________________________________________________________________________________
+ def identify_dag_violating_nodes_and_edges (self):
+ """
+ find all nodes and edges in any cycles
+
+ All dag violating cycles are defined by the back edge identified in DFS.
+ All paths which go the other way: start at the to_node and end up at the from_node
+ are therefore also part of the cycle
+
+ """
+ if not len(self._back_edges):
+ return
+ cnt_examined_edges = len(self._examined_edges)
+
+ # add this to _back_edges at the end
+ cycle_edges = set()
+
+ #
+ # each cycle
+ # starts from the to_node of each back_edge and
+ # ends with the from_node of each back_edge
+ #
+ for cycle_to_node, cycle_from_node in self._back_edges:
+ start_search_from = 0
+ while 1:
+ #
+ # find start of cycle
+ for i, (f,t,n) in enumerate(self._examined_edges[start_search_from:]):
+ if f == cycle_from_node:
+ break
+
+ # no more cycles for this cycle_from_node/cycle_to_node pair
+ else:
+ break
+
+
+ #
+ # cycle end might be within the same pair
+ # if so, don't search the current (not the next) edge for the cycle end
+ #
+ # Otherwise incrementing search position avoids infinite loop
+ #
+ start_search_from = cycle_start = start_search_from + i
+ if self._examined_edges[cycle_start][1] != cycle_to_node:
+ start_search_from += 1
+
+ for i, (f,t,n) in enumerate(self._examined_edges[start_search_from:]):
+
+ #
+ # found end of cycle
+ #
+ if t == cycle_to_node:
+ cycle_end = start_search_from + i + 1
+ #
+ # ignore backtracked nodes which will not be part of the cycle
+ # we are essentially doing tree traversal here
+ #
+ backtracked_nodes = set()
+ for f,t,n in self._examined_edges[cycle_start:cycle_end]:
+ if t == None:
+ backtracked_nodes.add(n)
+ for f,t,n in self._examined_edges[cycle_start:cycle_end]:
+ if f == None or f in backtracked_nodes or t in backtracked_nodes:
+ continue
+ cycle_edges.add((f,t))
+ self._back_nodes.add(f)
+ self._back_nodes.add(t)
+ start_search_from = cycle_end
+ break
+
+ # if cycle_from_node comes around again, this is not a cycle
+ if cycle_from_node == f:
+ if not i:
+ i += 1
+ start_search_from = start_search_from + i
+ break
+
+
+ continue
+
+ # no more cycles for this cycle_from_node/cycle_to_node pair
+ else:
+ break
+
+ self._back_edges.update(cycle_edges)
+
+
+ #_____________________________________________________________________________________
+
+ # not_dag
+
+ #_____________________________________________________________________________________
+ def topological_sorted (self):
+ """
+ _finished_nodes
+ """
+ return self._finished_nodes
+
+
+
+
+ #_____________________________________________________________________________________
+
+ # terminate_before
+
+ #_____________________________________________________________________________________
+ def terminate_before(self, node):
+ """
+ Allow node to terminate this path in DFS without including itself
+ (see terminate_at)
+
+ If node in _forced_dfs_nodes that overrides what the node wants
+ """
+
+ #
+ # If _node_termination = IGNORE_NODE_TERMINATION
+ # always go through whole tree
+ #
+ if self._node_termination == self.IGNORE_NODE_SIGNAL:
+ return False
+
+ #
+ # If _node_termination = NOTE_NODE_TERMINATION
+ # always go through whole tree but remember
+ # which nodes want to terminate
+ #
+ # Note that _forced_dfs_nodes is ignored
+ #
+ if self._node_termination == self.NOTE_NODE_SIGNAL:
+ if node.signal(self._extra_data_for_signal):
+ self._signalling_nodes.add(node)
+ return False
+
+ #
+ # _forced_dfs_nodes always overrides node preferences
+ # but let us save what the node says anyway for posterity
+ #
+ if node in self._forced_dfs_nodes:
+ ## Commented out code lets us save self_terminating_nodes even when
+ ## they have been overridden by _forced_dfs_nodes
+ #if node.signal():
+ # self._signalling_nodes.add(node)
+ return False
+
+ #
+ # OK. Go by what the node wants then
+ #
+ if node.signal(self._extra_data_for_signal):
+ self._signalling_nodes.add(node)
+ return True
+ return False
+
+
+
+
+ #_____________________________________________________________________________________
+
+ # call_backs
+
+ #_____________________________________________________________________________________
+ def discover_vertex(self, node):
+ pass
+ def start_vertex(self, node):
+ self._start_nodes.add(node)
+ def finish_vertex(self, node):
+ """
+ Save
+ 1) topologically sorted nodes
+ 2) as "None" (back) edges which allows _examined_edges to be traversed
+ like a tree
+
+ """
+ self._examined_edges.append((None, None, node))
+ self._finished_nodes.append(node)
+
+ def examine_edge(self, node_from, node_to):
+ """
+ Save edges as we encounter then so we can look for loops
+
+ """
+ self._examined_edges.append((node_from, node_to, None))
+ def back_edge(self, node_from, node_to):
+ self._back_edges.add((node_from, node_to))
+ def tree_edge(self, node_from, node_to):
+ pass
+ def forward_or_cross_edge(self, node_from, node_to):
+ pass
+ def terminate_at (self, node):
+ """
+ Terminate this line of DFS but include myself
+ """
+ return False
+
+#
+#_________________________________________________________________________________________
+
+# debug_print_visitor
+
+#_________________________________________________________________________________________
+class debug_print_visitor (object):
+ """
+ log progress through DFS: for debugging
+
+ """
+ def terminate_before(self, node):
+ return False
+ def terminate_at (self, node):
+ return False
+ def start_vertex(self, node):
+ print("s start vertex %s" % (node._name))
+ def finish_vertex(self, node):
+ print(" v finish vertex %s" % (node._name))
+ def discover_vertex(self, node):
+ print(" | discover vertex %s" % (node._name))
+ def examine_edge(self, node_from, node_to):
+ print(" -- examine edge %s -> %s" % (node_from._name, node_to._name))
+ def back_edge(self, node_from, node_to):
+ print(" back edge %s -> %s" % (node_from._name, node_to._name))
+ def tree_edge(self, node_from, node_to):
+ print(" - tree edge %s -> %s" % (node_from._name, node_to._name))
+ def forward_or_cross_edge(self, node_from, node_to):
+ print(" - forward/cross edge %s -> %s" % (node_from._name, node_to._name))
+
+
+
+
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#_________________________________________________________________________________________
+
+# depth first search
+
+#_________________________________________________________________________________________
+#
+#
+#
+WHITE = 0 # virgin
+GRAY = 1 # processing
+BLACK = 2 # finished
+
+def depth_first_visit(u, visitor, colours, outedges_func):
+ """
+ depth_first_visit
+ unused callbacks are commented out
+ """
+ # start processing this node: so gray
+ colours[u] = GRAY
+
+ stack = list()
+
+ #
+ # unused callback
+ #
+ #visitor.discover_vertex(u)
+
+ curr_edges = outedges_func(u)
+
+
+ if visitor.terminate_before(u):
+ colours[u] = BLACK
+ return
+ # If this vertex terminates the search, we push empty range
+ if visitor.terminate_at(u):
+ stack.append((u, curr_edges, len(curr_edges)))
+ else:
+ stack.append((u, curr_edges, 0))
+
+
+ while len(stack):
+ u, curr_edges, curr_edge_pos = stack.pop()
+ while curr_edge_pos < len(curr_edges):
+ v = curr_edges[curr_edge_pos]
+ visitor.examine_edge(u, v)
+ v_colour = colours[v]
+
+ if visitor.terminate_before(v):
+ colours[v] = BLACK
+ curr_edge_pos += 1
+ continue
+
+ if v_colour == WHITE:
+ #
+ # unused callback
+ #
+ #visitor.tree_edge(u, v)
+ curr_edge_pos += 1
+ stack.append((u, curr_edges, curr_edge_pos))
+ u = v
+ colours[u] = GRAY
+ #
+ # unused callback
+ #
+ #visitor.discover_vertex(u)
+ curr_edges = outedges_func(u)
+ curr_edge_pos = 0
+
+
+ if visitor.terminate_at(u):
+ break
+ elif v_colour == GRAY:
+ visitor.back_edge(u, v)
+ curr_edge_pos += 1
+ else:
+ #
+ # unused callback
+ #
+ #visitor.forward_or_cross_edge(u, v)
+ curr_edge_pos += 1
+ colours[u] = BLACK
+ visitor.finish_vertex(u)
+
+
+def depth_first_search(starting_nodes, visitor, outedges_func = node.outward):
+ """
+ depth_first_search
+ go through all starting points and DFV on each of them
+ if they haven't been seen before
+ """
+ colours = defaultdict(int) # defaults to WHITE
+ if len(starting_nodes):
+ for start in starting_nodes:
+ if colours[start] == WHITE:
+ visitor.start_vertex(start)
+ depth_first_visit(start, visitor, colours, outedges_func)
+ else:
+
+ #
+ # go through all nodes, maintaining order
+ #
+ for start in node._all_nodes:
+ if colours[start] == WHITE:
+ visitor.start_vertex(start)
+ depth_first_visit(start, visitor, colours, outedges_func)
+
+
+
+#_________________________________________________________________________________________
+
+# topologically_sorted_nodes
+
+
+
+#_________________________________________________________________________________________
+def topologically_sorted_nodes( to_leaves,
+ force_start_from = [],
+ gather_all_non_signalled = True,
+ test_all_signals = False,
+ extra_data_for_signal = None,
+ checksum_level = None):
+ """
+ Get all nodes which are children of to_leaves
+ in topological sorted order
+
+ Defaults to including all nodes which are non-signalled
+ i.e. includes the *last* non-signalling node on each branch
+
+
+ Otherwise stops at each branch just before signalling node
+ i.e. includes the last non-signalling *run* on each branch
+
+
+ force_start_from
+ Optionally specify all the child nodes which *have* to
+ be included in the list at least
+ This will override any node signals
+
+ force_start_from = True to get the whole tree irrespective of signalling
+
+
+ Rewritten to minimise calls to node.signal()
+
+ """
+
+ #
+ # got through entire tree, looking for signalling nodes,
+ # usually for debugging or for printing
+ #
+ if test_all_signals:
+ v = topological_sort_visitor([],
+ topological_sort_visitor.NOTE_NODE_SIGNAL,
+ extra_data_for_signal)
+ depth_first_search(to_leaves, v, node.outward)
+ signalling_nodes = v._signalling_nodes
+ else:
+ signalling_nodes = set()
+
+ if gather_all_non_signalled:
+ #
+ # get whole tree, ignoring signalling
+ #
+ v = topological_sort_visitor([],
+ topological_sort_visitor.IGNORE_NODE_SIGNAL,
+ None)
+ depth_first_search(to_leaves, v, node.outward)
+
+ #
+ # not dag: no further processing
+ #
+ if v.not_dag():
+ v.identify_dag_violating_nodes_and_edges ()
+ return (v.topological_sorted(), v._signalling_nodes, v.dag_violating_edges(),
+ v.dag_violating_nodes())
+
+
+ #
+ # return entire tree
+ #
+ if force_start_from == True:
+ return (v.topological_sorted(), v._signalling_nodes, v.dag_violating_edges(),
+ v.dag_violating_nodes())
+
+
+
+ #
+ # If we include these nodes anyway,
+ # why bother to check if they do not signal?
+ # Expensive signal checking should be minimised
+ #
+ nodes_to_include = set()
+ for n in force_start_from:
+ if n in nodes_to_include:
+ continue
+ nodes_to_include.add(n)
+ nodes_to_include.update(get_parent_nodes([n]))
+
+
+ reversed_nodes = v.topological_sorted()
+ for n in reversed_nodes:
+ if n in nodes_to_include:
+ continue
+
+ if not n.signal(extra_data_for_signal):
+ nodes_to_include.add(n)
+ nodes_to_include.update(get_parent_nodes([n]))
+ else:
+ signalling_nodes.add(n)
+ #sys.stderr.write(json.dumps(n, cls=node_to_json, sort_keys=1) + "\n")
+
+
+ return ([n for n in v.topological_sorted() if n in nodes_to_include],
+ signalling_nodes,
+ [],[])
+
+ else:
+
+ if force_start_from == True:
+ #
+ # get whole tree, ignoring signalling
+ #
+ v = topological_sort_visitor([],
+ topological_sort_visitor.IGNORE_NODE_SIGNAL,
+ extra_data_for_signal)
+ else:
+ #
+ # End at each branch without including signalling node
+ # but ignore signalling for forced_nodes_and_dependencies
+ #
+
+ # Get all parents of forced nodes if necessary
+ forced_nodes_and_dependencies = []
+ if len(force_start_from):
+ forced_nodes_and_dependencies = get_parent_nodes(force_start_from)
+
+ v = topological_sort_visitor( forced_nodes_and_dependencies,
+ topological_sort_visitor.END_ON_SIGNAL,
+ extra_data_for_signal)
+
+
+ #
+ # Forward graph iteration
+ #
+ depth_first_search(to_leaves, v, node.outward)
+
+ if v.not_dag():
+ v.identify_dag_violating_nodes_and_edges ()
+
+ signalling_nodes.update(v._signalling_nodes)
+ return (v.topological_sorted(), signalling_nodes, v.dag_violating_edges(), v.dag_violating_nodes())
+
+
+#
+def debug_print_nodes(to_leaves):
+ v = debug_print_visitor()
+ depth_first_search(to_leaves, v, node.outward)
+
+
+
+#_________________________________________________________________________________________
+
+# graph_printout
+
+#_________________________________________________________________________________________
+def graph_colour_demo_printout (stream,
+ output_format,
+ size = '11,8',
+ dpi = '120'):
+ """
+ Demo of the different colour schemes
+ """
+
+ if output_format == 'dot':
+ write_colour_scheme_demo_in_dot_format(stream)
+ return
+
+ # print to dot file
+ #temp_dot_file = tempfile.NamedTemporaryFile(suffix='.dot', delete=False)
+ fh, temp_dot_file_name = tempfile.mkstemp(suffix='.dot')
+ temp_dot_file = os.fdopen(fh, "w")
+
+ write_colour_scheme_demo_in_dot_format(temp_dot_file)
+ temp_dot_file.close()
+
+ print_dpi = ("-Gdpi='%s'" % dpi) if output_format != "svg" else ""
+ run_dot = os.popen("dot -Gsize='%s' %s -T%s < %s" % (size, print_dpi, output_format, temp_dot_file_name))
+
+ #
+ # wierd bug fix for firefox and svg
+ #
+ result_str = run_dot.read()
+ err = run_dot.close()
+ if err:
+ raise RuntimeError("dot failed to run with exit code %d" % err)
+ if output_format == "svg":
+ result_str = result_str.replace("0.12", "0.0px")
+ stream.write(result_str)
+#_________________________________________________________________________________________
+
+# graph_printout_in_dot_format
+
+#_________________________________________________________________________________________
+def graph_printout_in_dot_format ( stream,
+ to_leaves,
+ force_start_from = [],
+ draw_vertically = True,
+ ignore_upstream_of_target = False,
+ skip_signalling_nodes = False,
+ gather_all_non_signalled = True,
+ test_all_signals = True,
+ no_key_legend = False,
+ minimal_key_legend = True,
+ user_colour_scheme = None,
+ pipeline_name = "Pipeline:",
+ extra_data_for_signal = None):
+ """
+ print out pipeline dependencies in dot formatting
+ """
+
+ (topological_sorted,
+ signalling_nodes,
+ dag_violating_edges,
+ dag_violating_nodes) = topologically_sorted_nodes(to_leaves, force_start_from,
+ gather_all_non_signalled,
+ test_all_signals,
+ extra_data_for_signal)
+
+ #
+ # N.B. For graph:
+ # upstream = parent
+ # dependents/downstream
+ # = children
+ #
+ #
+ nodes_to_display = get_reachable_nodes(to_leaves, not ignore_upstream_of_target)
+
+ #
+ # print out dependencies in dot format
+ #
+ write_flowchart_in_dot_format(topological_sorted,
+ signalling_nodes,
+ dag_violating_edges,
+ dag_violating_nodes,
+ stream,
+ to_leaves,
+ force_start_from,
+ nodes_to_display,
+ draw_vertically,
+ skip_signalling_nodes,
+ no_key_legend,
+ minimal_key_legend,
+ user_colour_scheme,
+ pipeline_name)
+
+#_________________________________________________________________________________________
+
+# graph_printout
+
+#_________________________________________________________________________________________
+def graph_printout (stream,
+ output_format,
+ to_leaves,
+ force_start_from = [],
+ draw_vertically = True,
+ ignore_upstream_of_target = False,
+ skip_signalling_nodes = False,
+ gather_all_non_signalled = True,
+ test_all_signals = True,
+ no_key_legend = False,
+ minimal_key_legend = True,
+ user_colour_scheme = None,
+ pipeline_name = "Pipeline:",
+ size = (11,8),
+ dpi = 120,
+ extra_data_for_signal = None):
+ """
+ print out pipeline dependencies in a variety of formats, using the programme "dot"
+ an intermediary
+ """
+
+ if output_format == 'dot':
+ graph_printout_in_dot_format ( stream,
+ to_leaves,
+ force_start_from,
+ draw_vertically,
+ ignore_upstream_of_target,
+ skip_signalling_nodes,
+ gather_all_non_signalled,
+ test_all_signals,
+ no_key_legend,
+ minimal_key_legend,
+ user_colour_scheme,
+ pipeline_name,
+ extra_data_for_signal)
+ return
+
+ # print to dot file
+ #temp_dot_file = tempfile.NamedTemporaryFile(suffix='.dot', delete=False)
+ fh, temp_dot_file_name = tempfile.mkstemp(suffix='.dot')
+ temp_dot_file = os.fdopen(fh, "w")
+
+ graph_printout_in_dot_format ( temp_dot_file,
+ to_leaves,
+ force_start_from,
+ draw_vertically,
+ ignore_upstream_of_target,
+ skip_signalling_nodes,
+ gather_all_non_signalled,
+ test_all_signals,
+ no_key_legend,
+ minimal_key_legend,
+ user_colour_scheme,
+ pipeline_name,
+ extra_data_for_signal)
+ temp_dot_file.close()
+
+ if isinstance(size, tuple):
+ print_size = "(%d,%d)" % size
+ elif isinstance(size, (str)):
+ print_size = size
+ else:
+ raise Exception("Flowchart print size [%s] should be specified as a tuple of X,Y in inches" % str(size))
+
+ #
+ # N.B. Resolution doesn't seem to play nice with SVG and is ignored
+ #
+ print_dpi = ("-Gdpi='%s'" % dpi) if output_format != "svg" else ""
+ cmd = "dot -Gsize='%s' %s -T%s < %s" % (print_size, print_dpi, output_format, temp_dot_file_name)
+
+
+ proc = subprocess.Popen(cmd, shell = True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ result_str, error_str = proc.communicate()
+ retcode = proc.returncode
+ if retcode:
+ raise subprocess.CalledProcessError(retcode, cmd + "\n" + "\n".join([result_str, error_str]))
+
+
+
+ #run_dot = os.popen(cmd)
+ #result_str = run_dot.read()
+ #err = run_dot.close()
+ #
+ #if err:
+ # raise RuntimeError("dot failed to run with exit code %d" % err)
+
+
+ #
+ # wierd workaround for bug / bad interaction between firefox and svg:
+ # Font sizes have "px" appended.
+ #
+
+
+ if output_format == "svg":
+ result_str = result_str.replace("0.12", "0.0px")
+ stream.write(result_str)
+
+
+
+#_________________________________________________________________________________________
+
+# get_parent_nodes
+
+#_________________________________________________________________________________________
+def get_parent_nodes (nodes):
+ """
+ Get all parent nodes by DFS in the inward direction,
+ Ignores signals
+ """
+ parent_visitor = topological_sort_visitor([],
+ topological_sort_visitor.IGNORE_NODE_SIGNAL)
+ depth_first_search(nodes, parent_visitor, node.inward)
+ return parent_visitor.topological_sorted()
+
+
+#_________________________________________________________________________________________
+
+# get_reachable_nodes
+
+#_________________________________________________________________________________________
+def get_reachable_nodes(nodes, parents_as_well = True):
+ """
+ Get all nodes which are parents and children of nodes
+ recursing through the entire tree
+
+ 1) specify parents_as_well = False
+ to only get children and not parents of nodes
+ """
+
+ # look for parents of nodes and start there instead
+ if parents_as_well:
+ nodes = get_parent_nodes (nodes)
+
+ child_visitor = topological_sort_visitor([],
+ topological_sort_visitor.IGNORE_NODE_SIGNAL)
+ depth_first_search(nodes, child_visitor, node.outward)
+ return child_visitor.topological_sorted()
+
+
+#_________________________________________________________________________________________
+
+# Helper functions to dump edges and nodes
+
+#_________________________________________________________________________________________
+def get_edges_str (name, edges):
+ """
+ helper function to dump edges as a list of names
+ """
+ edges_str = " %d %s edges\n" % (len(edges), name)
+ edges_str += " " + ", ".join([x_y[0]._name + "->" + x_y[1]._name for x_y in edges]) + "\n"
+ return edges_str
+def get_nodes_str (name, nodes):
+ """
+ helper function to dump nodes as a list of names
+ """
+ nodes_str = " %s nodes = %d\n" % (name, len(nodes))
+ nodes_str += " " + ", ".join([x._name for x in nodes]) + "\n"
+ return nodes_str
+
+
diff --git a/ruffus/print_dependencies.py b/ruffus/print_dependencies.py
new file mode 100644
index 0000000..895f051
--- /dev/null
+++ b/ruffus/print_dependencies.py
@@ -0,0 +1,623 @@
+#!/usr/bin/env python
+################################################################################
+#
+# print_dependencies.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+ print_dependencies.py
+
+ provides support for dependency trees
+
+"""
+
+#
+# Number of pre-canned colour schemes
+#
+CNT_COLOUR_SCHEMES = 8
+
+
+import types
+from .adjacent_pairs_iterate import adjacent_pairs_iterate
+from collections import defaultdict
+def _get_name (node):
+ """
+ Get name for node
+ use display_name or _name
+ """
+ if hasattr(node, "display_name"):
+ return node.display_name
+ elif hasattr(node, "_name"):
+ return node._name
+ else:
+ raise Exception("Unknown node type [%s] has neither _name or display_name" % str(node))
+
+#_________________________________________________________________________________________
+
+# Helper functions for dot format
+
+#_________________________________________________________________________________________
+def attributes_to_str (attributes, name):
+ """
+ helper function for dot format
+ turns dictionary into a=b, c=d...
+ """
+
+ # remove ugly __main__. qualifier
+ name = name.replace("__main__.", "")
+
+ # if a label is specified, that overrides the node name
+ if "label" not in attributes:
+ attributes["label"] = name
+
+ # remove any quotes
+ if attributes["label"][0] == '<':
+ attributes["label"] = attributes["label"][1:-1]
+ html_label = True
+ else:
+ html_label = False
+ if attributes["label"][0] == '"':
+ attributes["label"] = attributes["label"][1:-1]
+
+ # add suffix / prefix
+ if "label_prefix" in attributes:
+ attributes["label"] = attributes["label_prefix"] + attributes["label"]
+ del attributes["label_prefix"]
+ if "label_suffix" in attributes:
+ attributes["label"] = attributes["label"] + attributes["label_suffix"]
+ del attributes["label_suffix"]
+
+ # restore quotes
+ if html_label:
+ attributes["label"] = '<' + attributes["label"] + '>'
+ else:
+ attributes["label"] = '"' + attributes["label"] + '"'
+
+ # support for html labels
+ #if "<" in name and ">" in name:
+ # attributes["label"] = '<' + name + '>'
+ #else:
+ # attributes["label"] = '"' + name + '"'
+
+ return "[" + ", ".join ("%s=%s" % (k,v) for k,v in sorted(attributes.items())) + "];\n"
+
+
+
+#_________________________________________________________________________________________
+#
+# get_arrow_str_for_legend_key
+#_________________________________________________________________________________________
+def get_arrow_str_for_legend_key (from_task_type, to_task_type, n1, n2, colour_scheme):
+ """
+ Get dot format for arrows inside legend key
+ """
+ if "Vicious cycle" in (from_task_type, to_task_type):
+ return ("%s -> %s[color=%s, arrowtype=normal];\n" % (n1, n2, colour_scheme["Vicious cycle"]["linecolor"]) +
+ "%s -> %s[color=%s, arrowtype=normal];\n" % (n2, n1, colour_scheme["Vicious cycle"]["linecolor"]))
+ if from_task_type in ("Final target", "Task to run",
+ "Up-to-date task forced to rerun",
+ "Explicitly specified task"):
+ return "%s -> %s[color=%s, arrowtype=normal];\n" % (n1, n2, colour_scheme["Task to run"]["linecolor"])
+ elif from_task_type in ("Up-to-date task", "Down stream","Up-to-date Final target"):
+ return "%s -> %s[color=%s, arrowtype=normal];\n" % (n1, n2, colour_scheme["Up-to-date"]["linecolor"])
+ #
+ # shouldn't be here!!
+ #
+ else:
+ return "%s -> %s[color=%s, arrowtype=normal];\n" % (n1, n2, colour_scheme["Up-to-date"]["linecolor"])
+
+
+#_________________________________________________________________________________________
+#
+# get_default_colour_scheme
+#_________________________________________________________________________________________
+def get_default_colour_scheme(default_colour_scheme_index = 0):
+ """
+ A selection of default colour schemes "inspired" by entries in
+ http://kuler.adobe.com/#create/fromacolor
+ """
+
+ if default_colour_scheme_index ==0:
+ bluey_outline = '"#0044A0"'
+ bluey = '"#EBF3FF"'
+ greeny_outline = '"#006000"'
+ greeny = '"#B8CC6E"'
+ orangey = '"#EFA03B"'
+ orangey_outline= greeny_outline
+ ruddy = '"#FF3232"'
+ elif default_colour_scheme_index ==1:
+ bluey_outline = '"#000DDF"'
+ bluey = 'transparent'
+ greeny_outline = '"#4B8C2E"'
+ greeny = '"#9ED983"'
+ orangey = '"#D98100"'
+ orangey_outline= '"#D9D911"'
+ ruddy = '"#D93611"'
+ elif default_colour_scheme_index ==2:
+ bluey_outline = '"#4A64A5"'
+ bluey = 'transparent'
+ greeny_outline = '"#4A92A5"'
+ greeny = '"#99D1C1"'
+ orangey = '"#D2C24A"'
+ orangey_outline= greeny_outline
+ ruddy = '"#A54A64"'
+ elif default_colour_scheme_index ==3:
+ bluey_outline = '"#BFB5FF"'
+ bluey = 'transparent'
+ greeny_outline = '"#7D8A2E"'
+ greeny = '"#C9D787"'
+ orangey = '"#FFF1DC"'
+ orangey_outline= greeny_outline
+ ruddy = '"#FF3E68"'
+ elif default_colour_scheme_index ==4:
+ bluey_outline = '"#004460"'
+ bluey = 'transparent'
+ greeny_outline = '"#4B6000"'
+ greeny = '"#B8CC6E"'
+ orangey = '"#FFF0A3"'
+ orangey_outline= greeny_outline
+ ruddy = '"#F54F29"'
+ elif default_colour_scheme_index ==5:
+ bluey_outline = '"#1122FF"'
+ bluey = '"#AABBFF"'
+ greeny_outline = '"#007700"'
+ greeny = '"#44FF44"'
+ orangey = '"#EFA03B"'
+ orangey_outline= '"#FFCC3B"'
+ ruddy = '"#FF0000"'
+ elif default_colour_scheme_index ==6:
+ bluey_outline = '"#0044A0"'
+ bluey = '"#EBF3FF"'
+ greeny_outline = 'black'
+ greeny = '"#6cb924"'
+ orangey = '"#ece116"'
+ orangey_outline= greeny_outline
+ ruddy = '"#FF3232"'
+ else:
+ bluey_outline = '"#87BAE4"'
+ bluey = 'transparent'
+ greeny_outline = '"#87B379"'
+ greeny = '"#D3FAE3"'
+ orangey = '"#FDBA40"'
+ orangey_outline= greeny_outline
+ ruddy = '"#b9495e"'
+ default_colour_scheme = defaultdict(dict)
+ default_colour_scheme["Vicious cycle"]["linecolor"] = ruddy
+ default_colour_scheme["Pipeline"]["fontcolor"] = ruddy
+ default_colour_scheme["Key"]["fontcolor"] = "black"
+ default_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ default_colour_scheme["Task to run"]["linecolor"] = bluey_outline
+ default_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ default_colour_scheme["Final target"]["fillcolor"] = orangey
+ default_colour_scheme["Final target"]["fontcolor"] = "black"
+ default_colour_scheme["Final target"]["color"] = "black"
+ default_colour_scheme["Final target"]["dashed"] = 0
+ default_colour_scheme["Vicious cycle"]["fillcolor"] = ruddy
+ default_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ default_colour_scheme["Vicious cycle"]["color"] = "white"
+ default_colour_scheme["Vicious cycle"]["dashed"] = 0
+ default_colour_scheme["Up-to-date task"]["fillcolor"] = greeny
+ default_colour_scheme["Up-to-date task"]["fontcolor"] = greeny_outline
+ default_colour_scheme["Up-to-date task"]["color"] = greeny_outline
+ default_colour_scheme["Up-to-date task"]["dashed"] = 0
+ default_colour_scheme["Down stream"]["fillcolor"] = "white"
+ default_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ default_colour_scheme["Down stream"]["color"] = "gray"
+ default_colour_scheme["Down stream"]["dashed"] = 0
+ default_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ default_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ default_colour_scheme["Explicitly specified task"]["color"] = "black"
+ default_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ default_colour_scheme["Task to run"]["fillcolor"] = bluey
+ default_colour_scheme["Task to run"]["fontcolor"] = bluey_outline
+ default_colour_scheme["Task to run"]["color"] = bluey_outline
+ default_colour_scheme["Task to run"]["dashed"] = 0
+ default_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ default_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = bluey_outline
+ default_colour_scheme["Up-to-date task forced to rerun"]["color"] = bluey_outline
+ default_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ default_colour_scheme["Up-to-date Final target"]["fillcolor"] = orangey
+ default_colour_scheme["Up-to-date Final target"]["fontcolor"] = orangey_outline
+ default_colour_scheme["Up-to-date Final target"]["color"] = orangey_outline
+ default_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+
+
+ if default_colour_scheme_index ==6:
+ default_colour_scheme["Vicious cycle"]["fontcolor"] = 'black'
+ default_colour_scheme["Task to run"]["fillcolor"] = '"#5f52ee"'
+ default_colour_scheme["Task to run"]["fontcolor"] = "lightgrey"
+ default_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#EFA03B"'
+
+ return default_colour_scheme
+
+
+#_________________________________________________________________________________________
+#
+# get_dot_format_for_task_type
+#_________________________________________________________________________________________
+def get_dot_format_for_task_type (task_type, attributes, colour_scheme, used_formats):
+ """
+ Look up appropriate colour and style for each type of task
+ """
+ used_formats.add(task_type)
+ for color_type in ("fontcolor", "fillcolor", "color"):
+ attributes[color_type] = colour_scheme[task_type][color_type]
+ attributes["style"]="filled"
+ if colour_scheme[task_type]["dashed"]:
+ attributes["style"]="dashed"
+
+
+#_________________________________________________________________________________________
+#
+# write_legend_key
+#_________________________________________________________________________________________
+def write_legend_key (stream, used_task_types, minimal_key_legend, colour_scheme, key_name = "Key:", subgraph_index = 1):
+ """
+ Write legend/key to dependency tree graph
+ """
+ if not len(used_task_types):
+ return
+
+
+ stream.write( 'subgraph clusterkey%d\n' % subgraph_index)
+ stream.write( '{\n')
+
+ stream.write( 'rank="min";\n')
+ stream.write( 'style=filled;\n')
+ stream.write( 'fontsize=20;\n')
+ stream.write( 'color=%s;\n' % (colour_scheme["Key"]["fillcolor"]))
+ stream.write( 'label = "%s";\n' % key_name)
+ stream.write( 'fontcolor = %s;' % (colour_scheme["Key"]["fontcolor"]))
+ stream.write( 'node[margin="0.2,0.2", fontsize="14"];\n')
+
+
+ #
+ # Only include used task types
+ #
+ all_task_types = [
+ "Vicious cycle" ,
+ "Down stream" ,
+ "Up-to-date task" ,
+ "Explicitly specified task" ,
+ "Task to run" ,
+ "Up-to-date task forced to rerun" ,
+ "Up-to-date Final target" ,
+ "Final target" ,]
+ if not minimal_key_legend:
+ used_task_types |= set(all_task_types)
+ wrapped_task_types = [
+ "Vicious cycle" ,
+ "Down stream" ,
+ "Up-to-date task" ,
+ "Explicitly specified task" ,
+ "Task to run" ,
+ "Up-to-date task\\nforced to rerun" ,
+ "Up-to-date\\nFinal target" ,
+ "Final target" ,]
+ wrapped_task_types = dict(zip(all_task_types, wrapped_task_types))
+
+
+ def outputkey (key, task_type, stream):
+ ignore_used_task_types = set()
+ attributes = dict()
+ attributes["shape"]="box3d"
+ #attributes["shape"] = "rect"
+ get_dot_format_for_task_type (task_type, attributes, colour_scheme, ignore_used_task_types)
+ #attributes["fontsize"] = '15'
+ stream.write(key + attributes_to_str(attributes, wrapped_task_types[task_type]))
+
+
+ sorted_used_task_types = []
+ for t in all_task_types:
+ if t in used_task_types:
+ sorted_used_task_types.append(t)
+
+ # print first key type
+ outputkey("k1_%d" % subgraph_index, sorted_used_task_types[0], stream)
+
+ for i, (from_task_type, to_task_type) in enumerate(adjacent_pairs_iterate(sorted_used_task_types)):
+ from_key = 'k%d_%d' % (i + 1, subgraph_index)
+ to_key = 'k%d_%d' % (i + 2, subgraph_index)
+ # write key
+ outputkey(to_key, to_task_type, stream)
+ # connection between keys
+ stream.write(get_arrow_str_for_legend_key (from_task_type, to_task_type, from_key, to_key, colour_scheme))
+
+
+ stream.write("}\n")
+
+
+#_________________________________________________________________________________________
+
+# write_colour_scheme_demo_in_dot_format
+
+#_________________________________________________________________________________________
+def write_colour_scheme_demo_in_dot_format(stream):
+ """
+ Write all the colour schemes in different colours
+ """
+ stream.write( 'digraph "Colour schemes"\n{\n')
+ stream.write( 'size="8,11";\n')
+ stream.write( 'splines=true;\n')
+ stream.write( 'fontsize="30";\n')
+ stream.write( 'ranksep = 0.3;\n')
+ stream.write( 'node[fontsize="20"];\n')
+ for colour_scheme_index in range(CNT_COLOUR_SCHEMES):
+ colour_scheme = get_default_colour_scheme(colour_scheme_index)
+ write_legend_key (stream, set("test"), False, colour_scheme, "Colour Scheme %d" % colour_scheme_index, colour_scheme_index)
+ stream.write("}\n")
+
+
+#_________________________________________________________________________________________
+
+# write_flowchart_in_dot_format
+
+#_________________________________________________________________________________________
+def write_flowchart_in_dot_format( jobs_to_run,
+ up_to_date_jobs,
+ dag_violating_edges,
+ dag_violating_nodes,
+ stream,
+ target_jobs,
+ forced_to_run_jobs = [],
+ all_jobs = None,
+ vertical = True,
+ skip_uptodate_tasks = False,
+ no_key_legend = False,
+ minimal_key_legend = True,
+ user_colour_scheme = None,
+ pipeline_name = "Pipeline:"):
+ """
+ jobs_to_run = pipeline jobs which are not up to date or have dependencies
+ which are not up to date
+
+ vertical = print flowchart vertically
+ minimal_key_legend = only print used task types in key legend
+ user_colour_scheme = dictionary for overriding default colours
+ Colours can be names e.g. "black" or quoted hex e.g. '"#F6F4F4"'
+ Default values will be used unless specified
+
+ key = "colour_scheme_index": index of default colour scheme
+ key = "Final target"
+ "Explicitly specified task"
+ "Task to run"
+ "Down stream"
+ "Up-to-date Final target"
+ "Up-to-date task forced to rerun"
+ "Up-to-date task"
+ "Vicious cycle"
+ Specifies colours for each task type
+
+ Subkey = "fillcolor"
+ "fontcolor"
+ "color"
+ "dashed" = 0/1
+
+
+ key = "Vicious cycle"
+ "Task to run"
+ "Up-to-date"
+
+ Subkey = "linecolor"
+ Specifying colours for arrows between tasks
+
+ key = "Pipeline"
+
+ Subkey = "fontcolor"
+ Specifying flowchart title colour
+
+ key = "Key"
+
+ Subkey = "fontcolor"
+ "fillcolor"
+ Specifies legend colours
+ """
+ if user_colour_scheme == None:
+ colour_scheme = get_default_colour_scheme()
+ else:
+ if "colour_scheme_index" in user_colour_scheme:
+ colour_scheme_index = user_colour_scheme["colour_scheme_index"]
+ else:
+ colour_scheme_index = 0
+ colour_scheme = get_default_colour_scheme(colour_scheme_index)
+ for k, v in user_colour_scheme.items():
+ if k not in colour_scheme:
+ continue
+ if isinstance(v, dict):
+ colour_scheme[k].update(v)
+ else:
+ colour_scheme[k] = v
+
+ up_to_date_jobs = set(up_to_date_jobs)
+
+ #
+ # cases where child points back to ancestor
+ #
+ dag_violating_dependencies = set(dag_violating_edges)
+
+
+
+ stream.write( 'digraph "%s"\n{\n' % pipeline_name)
+ stream.write( 'size="8,11";\n')
+ stream.write( 'splines=true;\n')
+ stream.write( 'fontsize="30";\n')
+ stream.write( 'ranksep = 0.3;\n')
+ stream.write( 'node[fontsize="20"];\n')
+ stream.write( 'graph[clusterrank="local"];\n')
+
+ if not vertical:
+ stream.write( 'rankdir="LR";\n')
+ stream.write( 'subgraph clustertasks\n'
+ "{\n")
+ stream.write( 'rank="min";\n')
+ stream.write( 'fontcolor = %s;\n' % colour_scheme["Pipeline"]["fontcolor"])
+ stream.write( 'label = "%s";\n' % pipeline_name)
+ #if vertical:
+ # stream.write( 'edge[minlen=2];\n')
+ delayed_task_strings = list()
+ vicious_cycle_task_strings = list()
+
+ #
+ # all jobs should be specified
+ # this is a bad fall-back
+ # because there is no guarantee that we are printing what we want to print
+ if all_jobs == None:
+ all_jobs = node.all_nodes
+
+
+ used_task_types = set()
+
+ #
+ # defined duplicately in graph. Bad practice
+ #
+ one_to_one = 0
+ many_to_many = 1
+ one_to_many = 2
+ many_to_one = 3
+
+ for n in all_jobs:
+ attributes = dict()
+ attributes["shape"]="box3d"
+ #attributes["shape"] = "rect"
+ if hasattr(n, "single_multi_io"):
+ if n.single_multi_io == one_to_many:
+ attributes["shape"] = "house"
+ attributes["peripheries"] = 2
+ elif n.single_multi_io == many_to_one:
+ attributes["shape"] = "invhouse"
+ attributes["height"] = 1.1
+ attributes["peripheries"] = 2
+
+ #
+ # circularity violating DAG: highlight in red
+ #
+ if n in dag_violating_nodes:
+ get_dot_format_for_task_type ("Vicious cycle", attributes, colour_scheme, used_task_types)
+ vicious_cycle_task_strings.append('t%d' % n._node_index + attributes_to_str(attributes, _get_name(n)))
+ #
+ # these jobs will be run
+ #
+ elif n in jobs_to_run:
+
+ #
+ # up to date but forced to run: outlined in blue
+ #
+ if n in forced_to_run_jobs:
+ get_dot_format_for_task_type ("Explicitly specified task", attributes, colour_scheme, used_task_types)
+
+ #
+ # final target: outlined in orange
+ #
+ elif n in target_jobs:
+ get_dot_format_for_task_type("Final target", attributes, colour_scheme, used_task_types)
+
+ #
+ # up to date dependency but forced to run: outlined in green
+ #
+ elif n in up_to_date_jobs:
+ get_dot_format_for_task_type ("Up-to-date task forced to rerun" , attributes, colour_scheme, used_task_types)
+
+ else:
+ get_dot_format_for_task_type ("Task to run", attributes, colour_scheme, used_task_types)
+
+ #
+ # graphviz attributes override other definitions
+ # presume the user knows what she is doing!
+ #
+ if(hasattr(n,'graphviz_attributes')):
+ for k in n.graphviz_attributes:
+ attributes[k]=n.graphviz_attributes[k]
+ stream.write('t%d' % n._node_index + attributes_to_str(attributes, _get_name(n)))
+
+ else:
+ #
+ # these jobs are up to date and will not be re-run
+ #
+
+ if not skip_uptodate_tasks:
+ if n in target_jobs:
+ get_dot_format_for_task_type ("Up-to-date Final target" , attributes, colour_scheme, used_task_types)
+
+ elif n in up_to_date_jobs:
+ get_dot_format_for_task_type ("Up-to-date task" , attributes, colour_scheme, used_task_types)
+
+ #
+ # these jobs will be ignored: gray with gray dependencies
+ #
+ else:
+ get_dot_format_for_task_type ("Down stream" , attributes, colour_scheme, used_task_types)
+ delayed_task_strings.append('t%d' % n._node_index + attributes_to_str(attributes, _get_name(n)))
+ for o in n.outward():
+ delayed_task_strings.append('t%d -> t%d[color=%s, arrowtype=normal];\n' % (o._node_index, n._node_index, colour_scheme["Up-to-date"]["linecolor"]))
+ continue
+
+ #
+ # graphviz attributes override other definitions
+ # presume the user knows what she is doing!
+ #
+ if(hasattr(n,'graphviz_attributes')):
+ for k in n.graphviz_attributes:
+ attributes[k]=n.graphviz_attributes[k]
+ stream.write('t%d' % n._node_index + attributes_to_str(attributes, _get_name(n)))
+
+ #
+ # write edges
+ #
+ unconstrained = False
+ for o in sorted(n.outward(), reverse=True, key = lambda x: x._node_index):
+ #
+ # circularity violating DAG: highlight in red: should never be a constraint
+ # in drawing the graph
+ #
+ if (n, o) in dag_violating_dependencies:
+ constraint_str = ", constraint=false" if o._node_index > n._node_index else ""
+ vicious_cycle_task_strings.append('t%d -> t%d[color=%s %s];\n' % (o._node_index, n._node_index, colour_scheme["Vicious cycle"]["linecolor"], constraint_str))
+ continue
+ elif not o in jobs_to_run or not n in jobs_to_run:
+ if not skip_uptodate_tasks:
+ edge_str = 't%d -> t%d[color=%s, arrowtype=normal];\n' % (o._node_index, n._node_index, colour_scheme["Up-to-date"]["linecolor"])
+ if unconstrained:
+ delayed_task_strings.append(edge_str)
+ else:
+ stream.write(edge_str)
+ else:
+ stream.write('t%d -> t%d[color=%s];\n' % (o._node_index, n._node_index, colour_scheme["Task to run"]["linecolor"]))
+ unconstrained = True
+
+ for l in delayed_task_strings:
+ stream.write(l)
+
+ #
+ # write vicious cycle at end so not constraint in drawing graph
+ #
+ for l in vicious_cycle_task_strings:
+ stream.write(l)
+ stream.write( '}\n')
+
+
+ if not no_key_legend:
+ write_legend_key (stream, used_task_types, minimal_key_legend, colour_scheme)
+ stream.write("}\n")
+
+
diff --git a/ruffus/proxy_logger.py b/ruffus/proxy_logger.py
new file mode 100644
index 0000000..317beed
--- /dev/null
+++ b/ruffus/proxy_logger.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python
+################################################################################
+#
+# proxy_logger.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+****************************************************************************
+Create proxy for logging for use with multiprocessing
+****************************************************************************
+
+These can be safely sent (marshalled) across process boundaries
+
+
+===========
+Example 1
+===========
+
+ Set up logger from config file::
+
+ from proxy_logger import *
+ args={}
+ args["config_file"] = "/my/config/file"
+
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", args)
+
+
+===========
+Example 2
+===========
+
+ Log to file ``"/my/lg.log"`` in the specified format (Time / Log name / Event type / Message).
+
+ Delay file creation until first log.
+
+ Only log ``Debug`` messages
+
+ Other alternatives for the logging threshold (``args["level"]``) include
+
+ * ``logging.DEBUG``
+ * ``logging.INFO``
+ * ``logging.WARNING``
+ * ``logging.ERROR``
+ * ``logging.CRITICAL``
+
+ ::
+
+ from proxy_logger import *
+ args={}
+ args["file_name"] = "/my/lg.log"
+ args["formatter"] = "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
+ args["delay"] = True
+ args["level"] = logging.DEBUG
+
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", args)
+
+===========
+Example 3
+===========
+
+ Rotate log files every 20 Kb, with up to 10 backups.
+ ::
+
+ from proxy_logger import *
+ args={}
+ args["file_name"] = "/my/lg.log"
+ args["rotating"] = True
+ args["maxBytes"]=20000
+ args["backupCount"]=10
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", args)
+
+
+
+==============
+To use:
+==============
+
+ ::
+
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", args)
+
+ with logging_mutex:
+ my_log.debug('This is a debug message')
+ my_log.info('This is an info message')
+ my_log.warning('This is a warning message')
+ my_log.error('This is an error message')
+ my_log.critical('This is a critical error message')
+ my_log.log(logging.DEBUG, 'This is a debug message')
+
+ Note that the logging function ``exception()`` is not included because python
+ stack trace information is not well-marshalled
+ (`pickle <http://docs.python.org/library/pickle.html>`_\ d) across processes.
+
+"""
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import sys,os
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Shared logging
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import multiprocessing
+import multiprocessing.managers
+
+
+import logging
+import logging.handlers
+
+
+
+#
+# setup_logger
+#
+def setup_std_shared_logger(logger_name, args):
+ """
+ This function is a simple around wrapper around the python
+ `logging <http://docs.python.org/library/logging.html>`_ module.
+
+ This *logger_factory* example creates logging objects which can
+ then be managed by proxy via ``ruffus.proxy_logger.make_shared_logger_and_proxy()``
+
+ This can be:
+
+ * a `disk log file <http://docs.python.org/library/logging.html#filehandler>`_
+ * a automatically backed-up `(rotating) log <http://docs.python.org/library/logging.html#rotatingfilehandler>`_.
+ * any log specified in a `configuration file <http://docs.python.org/library/logging.html#configuration-file-format>`_
+
+ These are specified in the ``args`` dictionary forwarded by ``make_shared_logger_and_proxy()``
+
+ :param logger_name: name of log
+ :param args: a dictionary of parameters forwarded from ``make_shared_logger_and_proxy()``
+
+ Valid entries include:
+
+ .. describe:: "level"
+
+ Sets the `threshold <http://docs.python.org/library/logging.html#logging.Handler.setLevel>`_ for the logger.
+
+ .. describe:: "config_file"
+
+ The logging object is configured from this `configuration file <http://docs.python.org/library/logging.html#configuration-file-format>`_.
+
+ .. describe:: "file_name"
+
+ Sets disk log file name.
+
+ .. describe:: "rotating"
+
+ Chooses a `(rotating) log <http://docs.python.org/library/logging.html#rotatingfilehandler>`_.
+
+ .. describe:: "maxBytes"
+
+ Allows the file to rollover at a predetermined size
+
+ .. describe:: "backupCount"
+
+ If backupCount is non-zero, the system will save old log files by appending the extensions ``.1``, ``.2``, ``.3`` etc., to the filename.
+
+ .. describe:: "delay"
+
+ Defer file creation until the log is written to.
+
+ .. describe:: "formatter"
+
+ `Converts <http://docs.python.org/library/logging.html#formatter-objects>`_ the message to a logged entry string.
+ For example,
+ ::
+
+ "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
+
+
+
+ """
+
+ #
+ # Log file name with logger level
+ #
+ new_logger = logging.getLogger(logger_name)
+ if "level" in args:
+ new_logger.setLevel(args["level"])
+
+ if "config_file" in args:
+ logging.config.fileConfig(args["config_file"])
+
+ else:
+ if "file_name" not in args:
+ raise Exception("Missing file name for log. Remember to set 'file_name'")
+ log_file_name = args["file_name"]
+
+ if "rotating" in args:
+ rotating_args = {}
+ # override default
+ rotating_args["maxBytes"]=args.get("maxBytes", 100000)
+ rotating_args["backupCount"]=args.get("backupCount", 5)
+ handler = logging.handlers.RotatingFileHandler( log_file_name, **rotating_args)
+ else:
+ defer_loggin = "delay" in args
+ handler = logging.handlers.RotatingFileHandler( log_file_name, delay=defer_loggin)
+
+ # %(name)s
+ # %(levelno)s
+ # %(levelname)s
+ # %(pathname)s
+ # %(filename)s
+ # %(module)s
+ # %(funcName)s
+ # %(lineno)d
+ # %(created)f
+ # %(relativeCreated)d
+ # %(asctime)s
+ # %(msecs)d
+ # %(thread)d
+ # %(threadName)s
+ # %(process)d
+ # %(message)s
+ #
+ # E.g.: "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
+ #
+ if "formatter" in args:
+ my_formatter = logging.Formatter(args["formatter"])
+ handler.setFormatter(my_formatter)
+
+ new_logger.addHandler(handler)
+
+ #
+ # This log object will be wrapped in proxy
+ #
+ return new_logger
+
+
+#
+# Proxy object for logging
+# Logging messages will be marshalled (forwarded) to the process where the
+# shared log lives
+#
+class LoggerProxy(multiprocessing.managers.BaseProxy):
+ def debug(self, *args, **kwargs):
+ return self._callmethod('debug', args, kwargs)
+ def log(self, *args, **kwargs):
+ return self._callmethod('log', args, kwargs)
+ def info(self, *args, **kwargs):
+ return self._callmethod('info', args, kwargs)
+ def warning(self, *args, **kwargs):
+ return self._callmethod('warning', args, kwargs)
+ def error(self, *args, **kwargs):
+ return self._callmethod('error', args, kwargs)
+ def critical(self, *args, **kwargs):
+ return self._callmethod('critical', args, kwargs)
+ def log(self, *args, **kwargs):
+ return self._callmethod('log', args, kwargs)
+ def __str__ (self):
+ return "<LoggingProxy>"
+
+#
+# Register the setup_logger function as a proxy for setup_logger
+#
+# We use SyncManager as a base class so we can get a lock proxy for synchronising
+# logging later on
+#
+class LoggingManager(multiprocessing.managers.SyncManager):
+ """
+ Logging manager sets up its own process and will create the real Log object there
+ We refer to this (real) log via proxies
+ """
+ pass
+
+
+
+
+def make_shared_logger_and_proxy (logger_factory, logger_name, args):
+ """
+ Make a `logging <http://docs.python.org/library/logging.html>`_ object
+ called "\ ``logger_name``\ " by calling ``logger_factory``\ (``args``\ )
+
+ This function will return a proxy to the shared logger which can be copied to jobs
+ in other processes, as well as a mutex which can be used to prevent simultaneous logging
+ from happening.
+
+ :param logger_factory: functions which creates and returns an object with the
+ `logging <http://docs.python.org/library/logging.html>`_ interface.
+ ``setup_std_shared_logger()`` is one example of a logger factory.
+ :param logger_name: name of log
+ :param args: parameters passed (as a single argument) to ``logger_factory``
+ :returns: a proxy to the shared logger which can be copied to jobs in other processes
+ :returns: a mutex which can be used to prevent simultaneous logging from happening
+
+ """
+ #
+ # make shared log and proxy
+ #
+ manager = LoggingManager()
+ manager.register( 'setup_logger',
+ logger_factory,
+ proxytype=LoggerProxy,
+ exposed = ( 'critical', 'log',
+ 'info', 'debug', 'warning', 'error'))
+ manager.start()
+ logger_proxy = manager.setup_logger(logger_name, args)
+
+ #
+ # make sure we are not logging at the same time in different processes
+ #
+ logging_mutex = manager.Lock()
+
+ return logger_proxy, logging_mutex
+
+
+
+import unittest, os,sys
+from .proxy_logger import *
+import traceback
+
+
+class Test_Logging(unittest.TestCase):
+
+
+
+ def test_rotating_log(self):
+ """
+ test rotating via proxy
+ """
+ open("/tmp/lg.log", "w").close()
+ args={}
+ args["file_name"] = "/tmp/lg.log"
+ args["rotating"] = True
+ args["maxBytes"]=20000
+ args["backupCount"]=10
+ #args["level"]= logging.INFO
+ (my_log,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", args)
+ with logging_mutex:
+ my_log.debug('This is a debug message')
+ my_log.info('This is an info message')
+ my_log.warning('This is a warning message')
+ my_log.error('This is an error message')
+ my_log.critical('This is a critical error message')
+ my_log.log(logging.ERROR, 'This is a debug message')
+ self.assert_(open("/tmp/lg.log") .read() == \
+"""This is a warning message
+This is an error message
+This is a critical error message
+This is a debug message
+""")
+
+
+#
+# debug code not run if called as a module
+#
+if __name__ == '__main__':
+ if sys.argv.count("--debug"):
+ sys.argv.remove("--debug")
+ unittest.main()
+
diff --git a/ruffus/ruffus_exceptions.py b/ruffus/ruffus_exceptions.py
new file mode 100644
index 0000000..f49fe2c
--- /dev/null
+++ b/ruffus/ruffus_exceptions.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python
+"""
+
+ exceptions.py
+
+"""
+
+################################################################################
+#
+# exceptions.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+from collections import defaultdict
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Exceptions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#if __name__ != '__main__':
+# import task
+
+class error_task(Exception):
+ def __init__(self, *errmsg):
+ Exception.__init__(self, *errmsg)
+
+ # list of associated tasks
+ self.tasks = set()
+
+ # error message
+ self.main_msg = ""
+
+ def get_main_msg(self):
+ """
+ Make main message with lists of task names
+ Prefix with new lines for added emphasis
+ """
+ # turn tasks names into 'def xxx(...): format
+ task_names = "\n".join(t.get_task_name(True) for t in self.tasks)
+ if len(self.main_msg):
+ return "\n\n" + self.main_msg + " for\n\n%s\n" % task_names
+ else:
+ return "\n\n%s\n" % task_names
+
+
+ def __str__(self):
+ #indent
+ msg = self.get_main_msg() + " ".join(map(str, self.args))
+ return " " + msg.replace("\n", "\n ")
+
+ def specify_task (self, task, main_msg):
+ self.tasks.add(task)
+ self.main_msg = main_msg
+ return self
+
+class error_task_contruction(error_task):
+ """
+ Exceptions when contructing pipeline tasks
+ """
+
+ def __init__(self, task, main_msg, *errmsg):
+ error_task.__init__(self, *errmsg)
+ self.specify_task (task, main_msg)
+
+class RethrownJobError(error_task):
+ """
+ Wrap up one or more exceptions rethrown across process boundaries
+
+ See multiprocessor.Server.handle_request/serve_client for an analogous function
+ """
+ def __init__(self, job_exceptions=[]):
+ error_task.__init__(self)
+ self.args = list(job_exceptions)
+
+ def __len__(self):
+ return len(self.args)
+
+ def append(self, job_exception):
+ self.args = self.args + (job_exception, )
+
+ def task_to_func_name (self, task_name):
+ if "mkdir " in task_name:
+ return task_name
+
+ return "def %s(...):" % task_name.replace("__main__.", "")
+
+
+ def get_nth_exception_str (self, nn = -1):
+ if nn == -1:
+ nn = len(self.args) - 1
+ task_name, job_name, exception_name, exception_value, exception_stack = self.args[nn]
+ message = "\nException #%d\n" % (nn + 1)
+ message += " '%s%s' raised in ...\n" % (exception_name, exception_value)
+ if task_name:
+ message += " Task = %s\n %s\n\n" % (self.task_to_func_name(task_name), job_name)
+ message += "%s\n" % (exception_stack, )
+ return message.replace("\n", "\n ")
+
+ def __str__(self):
+ message = ["\nOriginal exception%s:\n" % ("s" if len(self.args) > 1 else "")]
+ for ii in range(len(self.args)):
+ message += self.get_nth_exception_str (ii)
+ #
+ # For each exception:
+ # turn original exception stack message into an indented string
+ #
+ return (self.get_main_msg()).replace("\n", "\n ") + "".join(message)
+
+class error_input_file_does_not_match(error_task):
+ pass
+class fatal_error_input_file_does_not_match(error_task):
+ pass
+
+class task_FilesArgumentsError(error_task):
+ pass
+class task_FilesreArgumentsError(error_task):
+ pass
+class MissingInputFileError(error_task):
+ pass
+class JobSignalledBreak(error_task):
+ pass
+class PostTaskArgumentError(error_task):
+ pass
+
+class JobsLimitArgumentError(error_task):
+ pass
+
+class error_task_get_output(error_task_contruction):
+ pass
+class error_task_transform_inputs_multiple_args(error_task_contruction):
+ pass
+class error_task_transform(error_task_contruction):
+ pass
+class error_task_product(error_task_contruction):
+ pass
+class error_task_mkdir(error_task_contruction):
+ pass
+class error_task_permutations(error_task_contruction):
+ pass
+class error_task_combinations(error_task_contruction):
+ pass
+class error_task_combinations_with_replacement(error_task_contruction):
+ pass
+class error_task_merge(error_task_contruction):
+ pass
+class error_task_subdivide(error_task_contruction):
+ pass
+class error_task_originate(error_task_contruction):
+ pass
+class error_task_collate(error_task_contruction):
+ pass
+class error_task_collate_inputs_multiple_args(error_task_contruction):
+ pass
+class error_task_split(error_task_contruction):
+ pass
+class error_task_files_re(error_task_contruction):
+ pass
+class error_task_files(error_task_contruction):
+ pass
+class error_task_parallel(error_task_contruction):
+ pass
+class error_making_directory(error_task):
+ pass
+class error_duplicate_task_name(error_task):
+ pass
+class error_decorator_args(error_task):
+ pass
+class error_task_name_lookup_failed(error_task):
+ pass
+class error_task_decorator_takes_no_args(error_task):
+ pass
+class error_function_is_not_a_task(error_task):
+ pass
+class error_circular_dependencies(error_task):
+ pass
+class error_not_a_directory(error_task):
+ pass
+class error_missing_output(error_task):
+ pass
+class error_job_signalled_interrupt(error_task):
+ pass
+class error_node_not_task(error_task):
+ pass
+class error_missing_runtime_parameter(error_task):
+ pass
+class error_unescaped_regular_expression_forms(error_task):
+ pass
+class error_checksum_level(error_task):
+ pass
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Testing
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+if __name__ == '__main__':
+ import unittest
+
+ #
+ # minimal task object to test exceptions
+ #
+ class task:
+ class _task (object):
+ """
+ dummy task
+ """
+ action_mkdir = 1
+ def __init__(self, _name, _action_type = 0):
+ self._action_type = _action_type
+ self._name = _name
+
+
+ class Test_exceptions(unittest.TestCase):
+
+ # self.assertEqual(self.seq, range(10))
+ # self.assert_(element in self.seq)
+ # self.assertRaises(ValueError, random.sample, self.seq, 20)
+
+
+
+ def test_error_task(self):
+ """
+ test
+ """
+ fake_task1 = task._task("task1")
+ fake_task2 = task._task("task2")
+ fake_mkdir_task3 = task._task("task3", task._task.action_mkdir)
+ fake_mkdir_task4 = task._task("task4", task._task.action_mkdir)
+ e = error_task()
+ e.specify_task (fake_task1 , "Some message 0")
+ e.specify_task (fake_task2 , "Some message 1")
+ e.specify_task (fake_mkdir_task3, "Some message 2")
+ e.specify_task (fake_mkdir_task4, "Some message 3")
+ self.assertEqual(str(e),
+"""
+
+ Some message 3 for
+
+ 'def task1(...):'
+ 'def task2(...):'
+ task3
+ task4
+ """)
+
+ def test_RethrownJobError(self):
+ """
+ test
+ """
+ #job_name, exception_name, exception_value, exception_stack
+ exception_data = [
+ [
+ "task1",
+ "[[temp_branching_dir/a.2, a.1] -> temp_branching_dir/a.3]",
+ "ruffus.task.MissingInputFileError",
+ "(instance value)",
+ "Traceback (most recent call last):\n File \"what.file.py\", line 333, in some_func\n somecode(sfasf)\n"
+ ],
+ [
+ "task1",
+ "[None -> [temp_branching_dir/a.1, temp_branching_dir/b.1, temp_branching_dir/c.1]]",
+ "exceptions.ZeroDivisionError:",
+ "(1)",
+ "Traceback (most recent call last):\n File \"anotherfile.py\", line 345, in other_func\n badcode(rotten)\n"
+ ]
+
+ ]
+ e = RethrownJobError(exception_data)
+ fake_task1 = task._task("task1")
+ fake_task2 = task._task("task2")
+ fake_mkdir_task3 = task._task("task3", task._task.action_mkdir)
+ fake_mkdir_task4 = task._task("task4", task._task.action_mkdir)
+ e.specify_task (fake_task1 , "Exceptions running jobs")
+ e.specify_task (fake_task2 , "Exceptions running jobs")
+ e.specify_task (fake_mkdir_task3, "Exceptions running jobs")
+ e.specify_task (fake_mkdir_task4, "Exceptions running jobs")
+ self.assertEqual(str(e),
+"""
+
+ Exceptions running jobs for
+
+ 'def task1(...):'
+ 'def task2(...):'
+ task3
+ task4
+
+ Original exceptions:
+
+ Exception #1
+ ruffus.task.MissingInputFileError(instance value):
+ for task1.[[temp_branching_dir/a.2, a.1] -> temp_branching_dir/a.3]
+
+ Traceback (most recent call last):
+ File "what.file.py", line 333, in some_func
+ somecode(sfasf)
+
+
+ Exception #2
+ exceptions.ZeroDivisionError:(1):
+ for task1.[None -> [temp_branching_dir/a.1, temp_branching_dir/b.1, temp_branching_dir/c.1]]
+
+ Traceback (most recent call last):
+ File "anotherfile.py", line 345, in other_func
+ badcode(rotten)
+
+ """)
+
+
+
+#
+# debug code not run if called as a module
+#
+if __name__ == '__main__':
+ if sys.argv.count("--debug"):
+ sys.argv.remove("--debug")
+ unittest.main()
+
+
+
+
diff --git a/ruffus/ruffus_utility.py b/ruffus/ruffus_utility.py
new file mode 100644
index 0000000..6285cf9
--- /dev/null
+++ b/ruffus/ruffus_utility.py
@@ -0,0 +1,1434 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+if sys.hexversion < 0x03000000:
+ from future_builtins import zip
+################################################################################
+#
+# ruffus_utility.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+
+"""
+
+********************************************
+:mod:`ruffus_utility` -- Overview
+********************************************
+
+
+.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk>
+
+ Common utility functions
+
+
+"""
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import os,copy
+import re
+import types
+import glob
+from functools import reduce
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0,".")
+from .ruffus_exceptions import *
+#import task
+import collections
+from collections import defaultdict
+import multiprocessing.managers
+import hashlib
+import marshal
+try:
+ import cPickle as pickle
+except:
+ import pickle as pickle
+import operator
+from . import dbdict
+from itertools import chain
+if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ path_str_type = str
+else:
+ path_str_type = basestring
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Constants
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#
+# file to store history out to
+#
+RUFFUS_HISTORY_FILE = '.ruffus_history.sqlite'
+# If DEFAULT_RUFFUS_HISTORY_FILE is specified in the environment variables, use that instead
+if "DEFAULT_RUFFUS_HISTORY_FILE" in os.environ:
+ RUFFUS_HISTORY_FILE = os.environ["DEFAULT_RUFFUS_HISTORY_FILE"]
+
+
+CHECKSUM_FILE_TIMESTAMPS = 0 # only rerun when the file timestamps are out of date (classic mode)
+CHECKSUM_HISTORY_TIMESTAMPS = 1 # also rerun when the history shows a job as being out of date
+CHECKSUM_FUNCTIONS = 2 # also rerun when function body has changed
+CHECKSUM_FUNCTIONS_AND_PARAMS = 3 # also rerun when function parameters or function body change
+
+CHECKSUM_REGENERATE = 2 # regenerate checksums
+
+#_________________________________________________________________________________________
+#
+# get_default_checksum_level
+#
+#_________________________________________________________________________________________
+def get_default_checksum_level ():
+ """
+ Use the checksum level from the environmental variable DEFAULT_RUFFUS_CHECKSUM_LEVEL
+ Otherwise default to CHECKSUM_HISTORY_TIMESTAMPS
+ """
+
+ #
+ # environmental variable not set
+ #
+ if "DEFAULT_RUFFUS_CHECKSUM_LEVEL" not in os.environ:
+ return CHECKSUM_HISTORY_TIMESTAMPS
+
+
+
+ #
+ # lookup value from list of CHECKSUM_XXX constants
+ #
+ checksum_level = None
+ env_checksum_level = os.environ["DEFAULT_RUFFUS_CHECKSUM_LEVEL"]
+ if len(env_checksum_level) == 1 and env_checksum_level in "0123":
+ checksum_level = int(env_checksum_level)
+ else:
+ global_var = globals()
+ for key in global_var:
+ if key.startswith('CHECKSUM') and global_var[key] == env_checksum_level:
+ checksum_level = value
+
+ #
+ # check environmental variable is valid string
+ #
+ if checksum_level == None:
+ raise error_checksum_level(("The environmental value "
+ "DEFAULT_RUFFUS_CHECKSUM_LEVEL should be: [0-3 | "
+ "CHECKSUM_FILE_TIMESTAMPS | "
+ "CHECKSUM_HISTORY_TIMESTAMPS | "
+ "CHECKSUM_FUNCTIONS | "
+ "CHECKSUM_FUNCTIONS_AND_PARAMS] (rather than '%s') ")
+ % (env_checksum_level,))
+
+ return checksum_level
+
+
+#_________________________________________________________________________________________
+
+# open_job_history
+
+#_________________________________________________________________________________________
+def get_default_history_file_name ():
+ history_file = RUFFUS_HISTORY_FILE
+ #
+ # try path expansion using the main script name
+ #
+ try:
+ import __main__ as main
+ path_parts = path_decomposition (os.path.abspath(main.__file__))
+ history_file = history_file.format(**path_parts)
+ except Exception:
+ pass
+ return history_file
+
+def open_job_history (history_file):
+ """
+ Given a history file name, opens the correspond sqllite db file and returns the handle
+ """
+ if not history_file:
+ history_file = get_default_history_file_name ()
+
+ return dbdict.open(history_file, picklevalues=True)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+class JobHistoryChecksum:
+ """Class to remember exactly how an output file was created and when."""
+
+ def __str__(self):
+ from time import strftime, gmtime
+ if hasattr(self, "params"):
+ return str([self.outfile,
+ strftime("%d %b %Y %H:%M:%S", gmtime(self.mtime)),
+ self.params,
+ self.task_name
+ ])
+ else:
+ return strftime("%d %b %Y %H:%M:%S", gmtime(self.mtime))
+
+ def __init__(self, outfile, mtime, params, task):
+ # filename and modification time
+ self.outfile = outfile
+ self.mtime = mtime
+
+ # Uncomment next two lines to debug:
+ #self.params = params
+ #self.task_name = task._name
+
+ # checksum exact params used to generate this output file
+ self.chksum_params = hashlib.md5(pickle.dumps(params)).hexdigest()
+ # checksum the function bytecode as well as the function context
+ # Don't use func_code alone-- changing the line number of the function,
+ # what global variables are available, etc would all change the checksum
+ if sys.hexversion >= 0x03000000:
+ code = task.user_defined_work_func.__code__
+ func_defaults = task.user_defined_work_func.__defaults__
+ else:
+ code = task.user_defined_work_func.func_code
+ func_defaults = task.user_defined_work_func.func_defaults
+ func_code = marshal.dumps(code.co_code)
+
+
+ #
+ # pickle code very defensively, but hopefully without breaking Jake Biesinger's pipelines!
+ #
+ attributes_to_pickle = [func_defaults,
+ code.co_argcount,
+ code.co_consts,
+ code.co_names,
+ code.co_nlocals,
+ code.co_varnames]
+
+ pickle_results = []
+ for aa in attributes_to_pickle:
+ # Can't cpickle nested functions: typically blows up with func_code.co_consts
+ try:
+ pickle_results.append(pickle.dumps(aa))
+ continue
+ except:
+ pass
+ # Marshal seems to be less sensitive: try that
+ try:
+ pickle_results.append(marshal.dumps(aa))
+ continue
+ except:
+ pass
+ # Just make a string out of the attribute
+ try:
+ pickle_results.append(str(aa))
+ continue
+ except:
+ pass
+ # OK give up, do nothing: On your head it is
+
+
+ func_extras = reduce(operator.add, pickle_results)
+ self.chksum_func = hashlib.md5(func_code + func_extras).hexdigest()
+
+
+
+#_________________________________________________________________________________________
+#
+# parameter_list_as_string
+#
+#_________________________________________________________________________________________
+def parameter_list_as_string (parameters):
+ """
+ Input list of parameters
+ Turn this into a string for display
+
+ E.g.
+
+ """
+ if parameters == None:
+ return ""
+ elif not isinstance(parameters, list):
+ raise Exception("Unexpected parameter list %s" % (parameters,))
+ else:
+ return str(parameters)[1:-1]
+
+#_________________________________________________________________________________________
+#
+# path_decomposition
+#
+#_________________________________________________________________________________________
+def path_decomposition (orig_path):
+ """
+ returns a dictionary identifying the components of a file path:
+ This has the following keys
+ basename: (any) base (file) name of the path not including the extension. No slash included
+ ext: (any) extension of the path including the "."
+ path: a list of subpaths created by removing subdirectory names
+ subdir: a list of subdirectory names from the most nested to the root
+ For example
+ apath = "/a/b/c/d/filename.txt"
+ { 'basename': 'filename',
+ 'ext': '.txt'
+ 'path': ['/a/b/c/d', '/a/b/c', '/a/b', '/a', '/'], ,
+ 'subdir': ['d', 'c', 'b', 'a', '/']
+ }
+ "{path[2]}/changed/{subdir[0]}".format(**res) = '/a/b/changed/d'
+ "{path[3]}/changed/{subdir[1]}".format(**res) = '/a/changed/c'
+ """
+ def recursive_split (a_path):
+ """
+ split the path into its subdirectories recursively
+ """
+ if not len(a_path):
+ return [[],[]]
+ if a_path == "/" or a_path == "//":
+ return [ [a_path] , [a_path]]
+ sub_path_part, sub_dir_part = os.path.split(a_path)
+ if sub_dir_part:
+ sub_path_parts, sub_dir_parts = recursive_split (sub_path_part)
+ return [ [a_path] + sub_path_parts,
+ [sub_dir_part] + sub_dir_parts]
+ else:
+ return [ [] , ["/"]]
+ #
+ if not len(orig_path):
+ return {'path': [], 'basename': '', 'ext': '', 'subdir': []}
+
+ # stop normpath from being too clever and removing initial ./ and terminal slash, turning paths into filenames
+ if orig_path in [ "./", "/."]:
+ a_path = orig_path
+ else:
+ a_path = os.path.normpath(orig_path)
+ if orig_path[0:2] == "./" and a_path[0:2] != "./":
+ a_path = "./" + a_path
+
+ if orig_path[-1] == "/" and a_path[-1:] != "/":
+ a_path += "/"
+
+ path_part, file_part = os.path.split(a_path)
+ file_part, ext_part = os.path.splitext(file_part)
+ subpaths, subdirs = recursive_split (path_part)
+ return {'basename': file_part,
+ 'ext': ext_part,
+ 'subpath': subpaths,
+ 'subdir': subdirs,
+ 'path': path_part}
+
+
+#_________________________________________________________________________________________
+#
+# get_nth_nested_level_of_path
+#
+#_________________________________________________________________________________________
+def get_nth_nested_level_of_path (orig_path, n_levels):
+ """
+ Return path with up to N levels of subdirectories
+ 0 = full path
+ N = 1 : basename
+ N = 2 : basename + one subdirectory
+
+ For example
+ 0 /test/this/now/or/not.txt
+ 1 not.txt
+ 2 or/not.txt
+ 3 now/or/not.txt
+ 4 this/now/or/not.txt
+ 5 test/this/now/or/not.txt
+ 6 /test/this/now/or/not.txt
+ 7 /test/this/now/or/not.txt
+ """
+ if not n_levels or n_levels < 0:
+ return orig_path
+ res = path_decomposition(orig_path)
+ basename = res["basename"] + res["ext"]
+ shortened_path = os.path.join(*(list(reversed(res["subdir"][0:(n_levels - 1)]))+[basename]))
+ if len(shortened_path) < len(orig_path):
+ return ".../" + shortened_path
+
+
+
+#_________________________________________________________________________________________
+#
+# swap_nesting_order
+#
+#_________________________________________________________________________________________
+def swap_nesting_order (orig_coll):
+ """
+ Reverse nested order so that coll[3]['a'] becomes coll['a'][3]
+ """
+ new_dict = defaultdict(dict)
+ new_list = []
+ for ii, ii_item in enumerate(orig_coll):
+ for jj, value in ii_item.items():
+ if isinstance(jj, int):
+ # resize
+ new_list += [{}]*(jj + 1 - len(new_list))
+ new_list[jj][ii] = value
+ else:
+ new_dict[jj][ii] = value
+ return new_list, new_dict
+
+#_________________________________________________________________________________________
+#
+# swap_doubly_nested_order
+#
+#_________________________________________________________________________________________
+def swap_doubly_nested_order (orig_coll):
+ """
+ Reverse nested order so that coll[3]['a'] becomes coll['a'][3]
+ """
+ new_dict = dict()
+ new_list = []
+ for ii, ii_item in enumerate(orig_coll):
+ for jj, jj_item in enumerate(ii_item):
+ for kk, value in jj_item.items():
+ if isinstance(kk, int):
+ # resize
+ new_list += [{}]*(kk + 1 - len(new_list))
+ if ii not in new_list[kk]:
+ new_list[kk][ii] = dict()
+ new_list[kk][ii][jj] = value
+ else:
+ if kk not in new_dict:
+ new_dict[kk] = dict()
+ if ii not in new_dict[kk]:
+ new_dict[kk][ii] = dict()
+ new_dict[kk][ii][jj] = value
+
+ return new_list, new_dict
+
+
+#_________________________________________________________________________________________
+#
+# regex_match_str
+#
+#_________________________________________________________________________________________
+def regex_match_str(test_str, compiled_regex):
+ """
+ Returns result of regular expression match in a dictionary
+ combining both named and unnamed captures
+ """
+ if compiled_regex:
+ if isinstance(compiled_regex, path_str_type):
+ compiled_regex = re.compile(compiled_regex)
+ mm = compiled_regex.search(test_str)
+ # Match failed
+ if mm == None:
+ return False
+ else:
+ # No capture
+ if mm.lastindex == None:
+ return {0: mm.group(0)}
+ # Combined named and unnamed captures
+ else:
+ # no dictionary comprehensions in python 2.6 :-(
+ #matchdicts.append({i : mm.group(i) for i in (range(mm.lastindex) + mm.groupdict().keys())})
+ return dict((i, mm.group(i)) for i in (chain( iter(range(mm.lastindex + 1)),
+ iter(mm.groupdict().keys()))))
+
+ else:
+ return None
+
+
+#_________________________________________________________________________________________
+#
+# path_decomposition_regex_match
+#
+#_________________________________________________________________________________________
+def path_decomposition_regex_match (test_str, compiled_regex):
+ """
+ Returns a dictionary identifying the components of a file path.
+
+ This includes both the components of a path:
+ basename: (any) base (file) name of the path not including the extension. No slash included
+ ext: (any) extension of the path including the "."
+ path: a list of subpaths created by removing subdirectory names
+ subdir: a list of subdirectory names from the most nested to the root
+ and regular expression matches
+ The keys are the index or name of the capturing group.
+
+
+ If compiled_regexes is not specified, return path decomposition only
+
+ If compiled_regexes is specified, and the corresponding regular expression does not match,
+ the entire match fails
+
+ For example
+
+ path_decomposition_regex_match("/a/b/c/sample1.bam", r"(.*)(?P<id>\d+)\..+")
+
+ {
+ 0: '/a/b/c/sample1.bam', // captured by index
+ 1: '/a/b/c/sample', // captured by index
+ 'id': '1' // captured by name
+ 'ext': '.bam',
+ 'subdir': ['c', 'b', 'a', '/'],
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'basename': 'sample1',
+ },
+
+ path_decomposition_regex_match("dbsnp15.vcf", r"(.*)(?P<id>\d+)\..+")
+ {
+ 0: 'dbsnp15.vcf', // captured by index
+ 1: 'dbsnp1', // captured by index
+ 'id': '5' // captured by name
+ 'ext': '.vcf',
+ 'subdir': [],
+ 'path': [],
+ 'basename': 'dbsnp15',
+ },
+
+
+ // fail
+ path_decomposition_regex_match("/test.txt", r"(.*)(?P<id>\d+)\..+")
+ {}
+
+ // path components only
+ path_decomposition_regex_match("/test.txt", None)
+ {
+ 'ext': '.txt',
+ 'subdir': ['/']
+ 'subpath': ['/'],
+ 'path': '/',
+ 'basename': 'test',
+ }
+
+ """
+ pp = path_decomposition(test_str)
+
+ # regular expression not specified
+ # just path
+ if compiled_regex == None:
+ return pp
+
+ rr = regex_match_str(test_str, compiled_regex)
+
+ # regular expression match failed
+ # nothing
+ if rr == False:
+ return {}
+
+ #
+ # regular expression matches override file decomposition values in
+ # case of clashes between predefined keys such as "basename" and
+ # regular expression named capture groups
+ #
+ pp.update(rr)
+ return pp
+
+
+#_________________________________________________________________________________________
+#
+# check_compiled_regexes
+#
+#_________________________________________________________________________________________
+def check_compiled_regexes (compiled_regexes, expected_num):
+ """
+ check compiled_regexes are of the right type and number
+ """
+ if compiled_regexes == None:
+ return [None] * expected_num
+
+ if not isinstance(compiled_regexes, list):
+ raise Exception("Expecting list of None and strings")
+
+ # pad compiled_regexes with None
+ if len(compiled_regexes) < expected_num:
+ compiled_regexes.extend([None] * (expected_num - len(compiled_regexes)))
+
+ # Turn strings to regular expression just in case
+ # We don't want to do this here because the error messages are not very nice:
+ # There is not much context left
+ compiled_regexes = [re.compile(rr) if isinstance(rr, path_str_type) else rr for rr in compiled_regexes]
+
+ # check types
+ regex_types = type(re.compile("")), type(None)
+ for rr in compiled_regexes:
+ if not isinstance(rr, regex_types):
+ raise Exception("Unexpected type %s ('%s') specified in regular expression list. Expecting string or compiled regular expression" % (type(rr), rr))
+
+ return compiled_regexes
+
+
+#_________________________________________________________________________________________
+#
+# get_all_paths_components
+#
+#_________________________________________________________________________________________
+def get_all_paths_components(paths, compiled_regexes):
+ """
+ For each path in a list,
+ """
+ #
+ # merge regular expression matches and path decomposition
+ #
+ compiled_regexes = check_compiled_regexes (compiled_regexes, len(paths))
+ return [path_decomposition_regex_match (pp, rr) for (pp, rr) in zip(paths, compiled_regexes)]
+
+
+
+#_________________________________________________________________________________________
+#
+# apply_func_to_sequence
+#
+#_________________________________________________________________________________________
+def apply_func_to_sequence(seq, func, tuple_of_conforming_types = (path_str_type,), tuple_of_sequences_types = (list, tuple,set)):
+ """
+ Recurses into list/tuple/set sequences to apply func to conforming types
+ Non-conforming types are left alone
+ """
+ if isinstance(seq, tuple_of_conforming_types):
+ return func(seq)
+ elif isinstance(seq, tuple_of_sequences_types):
+ return type(seq)(apply_func_to_sequence(pp, func, tuple_of_conforming_types, tuple_of_sequences_types) for pp in seq)
+ else:
+ return seq
+
+
+#_________________________________________________________________________________________
+#
+# t_regex_replace
+#
+#_________________________________________________________________________________________
+class t_regex_replace(object):
+ def __init__ (self, filename, regex_str, compiled_regex, regex_or_suffix):
+ self.regex_or_suffix = regex_or_suffix
+ self.compiled_regex = compiled_regex
+ self.regex_str = regex_str
+ self.filename = filename
+ def __call__(self, p):
+ #
+ # check if substitution pattern is mis-specified
+ #
+ if "\1"in p or "\2" in p :
+ raise error_unescaped_regular_expression_forms("['%s'] " % (p.replace("\1", r"\1").replace("\2", r"\2")) +
+ "The special regular expression characters "
+ r"\1 and \2 need to be 'escaped' in python. "
+ r"The easiest option is to use python 'raw' strings "
+ r"e.g. r'\1_in_a string\2'. See http://docs.python.org/library/re.html.")
+ #
+ # For suffix(), replaces the suffix part by adding leading r"\1" to the substitution pattern
+ #
+ # If r"\1" is specified, then we presume you know what you are doing...
+ #
+ if self.regex_or_suffix == SUFFIX_SUBSTITUTE:
+ if r"\1" not in p and r"\g<1>" not in p:
+ match_p = r"\g<1>" + p
+ else:
+ match_p = p
+
+ # throw exception if doesn't match regular expression at all
+ (res_str, cnt_replacements) = self.compiled_regex.subn(match_p, self.filename)
+ if cnt_replacements == 0:
+ raise error_input_file_does_not_match("File '%s' does not match suffix('%s') and pattern '%s'" % (self.filename, self.regex_str, p))
+ return res_str
+
+ #
+ # Normal substitution
+ #
+ # For suffix(), complete replacement by the specified pattern text
+ # only substitute if r"\1" or r"\g<1>" is specified
+ #
+ #
+ err_str = ""
+ try:
+ (res_str, cnt_replacements) = self.compiled_regex.subn(p, self.filename)
+ if cnt_replacements > 0:
+ return res_str
+ except re.error:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ err_str = str(exceptionValue)
+ raise fatal_error_input_file_does_not_match("File '%s' does not match regex('%s') and pattern '%s':\n\t%s\n" % (self.filename, self.regex_str, p, err_str))
+ except IndexError:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ err_str = str(exceptionValue)
+ raise fatal_error_input_file_does_not_match("File '%s' does not match regex('%s') and pattern '%s':\n\t%s\n" % (self.filename, self.regex_str, p, err_str))
+
+ #except (re.error, IndexError):
+ #err_str = str(sys.exc_info()[1]),
+
+ raise error_input_file_does_not_match("File '%s' does not match regex('%s') and pattern '%s'\n%s\n" % (self.filename, self.regex_str, p, err_str))
+
+#_________________________________________________________________________________________
+#
+# t_formatter_replace
+#
+#_________________________________________________________________________________________
+class t_formatter_replace(object):
+ def __init__ (self, filenames, regex_strings, compiled_regexes = None):
+ self.filenames = filenames
+ # get the full absolute, normalised paths
+ filenames = [os.path.abspath(f) for f in filenames]
+ self.path_regex_components = get_all_paths_components(filenames, compiled_regexes)
+ self.display_regex_strings = parameter_list_as_string(regex_strings)
+
+
+
+ def __call__(self, p):
+ # swapped nesting order makes the syntax easier to explain:
+ # The first level of indirection is always the path component
+ # So basename[0] is the file name for the first file
+ # This looks better than the normal 0[basename]
+
+ # some contortions because format decodes {0} as an offset into a list and not not a lookup into a dict...
+ dl, dd = swap_nesting_order(self.path_regex_components)
+ try:
+ return p.format(*dl, **dd)
+ except (KeyError, IndexError):
+ raise error_input_file_does_not_match("Field '%s' in ('%s') using formatter(%s) fails to match Files '%s'."
+ "."
+ % ( str(sys.exc_info()[1]),
+ p,
+ self.display_regex_strings,
+ self.filenames))
+
+
+#_________________________________________________________________________________________
+#
+# t_nested_formatter_replace
+#
+#_________________________________________________________________________________________
+class t_nested_formatter_replace(object):
+ """
+ Like t_formatter_replace but with one additional level of nesting
+ I.e. everything is a list comprehension!
+ For combinatorics @decorators
+ """
+ def __init__ (self, filenames, regex_strings, compiled_regexes):
+ # make sure that we have the same level of nestedness for regular expressions and file names etc.
+ if len(filenames) != len(regex_strings) or len(filenames) != len(compiled_regexes):
+ raise Exception("Logic Error.")
+ self.filenames = filenames
+ # get the full absolute, normalised paths
+ filenames = [[os.path.abspath(f) for f in filegroups] for filegroups in filenames]
+ self.path_regex_components = [get_all_paths_components(f, r) for (f,r) in zip(filenames, compiled_regexes)]
+ self.display_regex_strs = [parameter_list_as_string(ss) for ss in regex_strings]
+
+
+ def __call__(self, p):
+ # swapped nesting order makes the syntax easier to explain:
+ # The first level of indirection is always the path component
+ # So basename[0] is the file name for the first file
+ # This looks better than the normal 0[basename]
+
+ # some contortions because format decodes {0} as an offset into a list and not not a lookup into a dict...
+ dl, dd = swap_doubly_nested_order(self.path_regex_components)
+ try:
+ return p.format(*dl, **dd)
+ except (KeyError, IndexError):
+ formatter_str = ", ".join("formatter(%s)" % ss for ss in self.display_regex_strs)
+ raise error_input_file_does_not_match("Unmatched field %s in ('%s') using %s fails to match Files '%s'"
+ "."
+ % ( str(sys.exc_info()[1]),
+ p,
+ formatter_str,
+ self.filenames))
+
+
+#_________________________________________________________________________________________
+#
+# regex_replace
+#
+#_________________________________________________________________________________________
+
+#
+# Perform normal regular expression substitution
+#
+REGEX_SUBSTITUTE = 0
+#
+# An extra peculiar mode to help suffix along:
+# Suffix regular expression have an implicit capture for everything up to the specified
+# suffix text
+
+#
+# By default, replaces the suffix part by adding leading r"\1" to the substitution pattern
+# If r"\1" is already specified in the pattern, then we presume you know what
+# you are doing, and will let you get along with it
+#
+SUFFIX_SUBSTITUTE = 1
+
+#
+# REGEX_SUBSTITUTE is used for suffix() matches in 'extra' arguments (additional to output)
+# which are strings
+#
+# Complete replacement happens. If you wish to retain the prefix text
+# before the suffix, you can do so by adding r"\1"
+#
+
+def regex_replace(filename, regex_str, compiled_regex, substitution_patterns, regex_or_suffix = REGEX_SUBSTITUTE):
+ return apply_func_to_sequence(substitution_patterns, t_regex_replace(filename, regex_str, compiled_regex, regex_or_suffix))
+
+def formatter_replace (filenames, regex_str, compiled_regex, substitution_patterns):
+ return apply_func_to_sequence(substitution_patterns, t_formatter_replace(filenames, regex_str, compiled_regex))
+
+def nested_formatter_replace (filenames, regex_strings, compiled_regexes, substitution_patterns):
+ return apply_func_to_sequence(substitution_patterns, t_nested_formatter_replace(filenames, regex_strings, compiled_regexes))
+
+
+#_________________________________________________________________________________________
+
+# non_str_sequence
+
+#_________________________________________________________________________________________
+def non_str_sequence (arg):
+ """
+ Whether arg is a sequence.
+ We treat strings / dicts however as a singleton not as a sequence
+
+ """
+ #will only dive into list and set, everything else is not regarded as a sequence
+ #loss of flexibility but more conservative
+ #if (isinstance(arg, (basestring, dict, multiprocessing.managers.DictProxy))):
+ if (not isinstance(arg, (list, tuple, set))):
+ return False
+ try:
+ test = iter(arg)
+ return True
+ except TypeError:
+ return False
+
+#_________________________________________________________________________________________
+
+# get_strings_in_nested_sequence_aux
+
+# helper function for next function
+
+#_________________________________________________________________________________________
+def get_strings_in_nested_sequence_aux(p, l = None):
+ """
+ Unravels arbitrarily nested sequence and returns lists of strings
+ """
+ if l == None:
+ l = []
+ if isinstance(p, path_str_type):
+ l.append(p)
+ elif non_str_sequence (p):
+ for pp in p:
+ get_strings_in_nested_sequence_aux(pp, l)
+ return l
+
+
+#_________________________________________________________________________________________
+
+# non_str_sequence
+
+#_________________________________________________________________________________________
+def get_strings_in_nested_sequence (p, first_only = False):
+ """
+ Traverses nested sequence and for each element, returns first string encountered
+ """
+ if p == None:
+ return []
+
+ #
+ # string is returned as list of single string
+ #
+ if isinstance(p, path_str_type):
+ return [p]
+
+ #
+ # Get all strings flattened into list
+ #
+ if not first_only:
+ return get_strings_in_nested_sequence_aux(p)
+
+
+ #
+ # Get all first string in each element
+ #
+ elif non_str_sequence (p):
+ filenames = []
+ for pp in p:
+ l = get_strings_in_nested_sequence_aux(pp)
+ if len(l):
+ filenames.append(l[0])
+ return filenames
+
+ return []
+
+#_________________________________________________________________________________________
+
+# get_first_string_in_nested_sequence
+
+#_________________________________________________________________________________________
+def get_first_string_in_nested_sequence (p):
+ if p == None:
+ return None
+
+ #
+ # string is returned as list of single string
+ #
+ if isinstance(p, path_str_type):
+ return p
+
+ #
+ # Get all first string in each element
+ #
+ elif non_str_sequence (p):
+ filenames = []
+ for pp in p:
+ l = get_strings_in_nested_sequence_aux(pp)
+ if len(l):
+ return l[0]
+
+ return None
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Encoders: turn objects and filenames into a more presentable format
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+def ignore_unknown_encoder(obj):
+ if non_str_sequence (obj):
+ return "[%s]" % ", ".join(map(ignore_unknown_encoder, obj))
+ try:
+ s= str(obj)
+ if " object" in s and s[0] == '<' and s[-1] == '>':
+ pos = s.find(" object")
+ s = "<" + s[1:pos].replace("__main__.", "") + ">"
+ return s.replace('"', "'")
+ except:
+ return "<%s>" % str(obj.__class__).replace('"', "'")
+
+#_________________________________________________________________________________________
+#
+# shorten_filenames_encoder
+#________________________________________________________________________________________
+def shorten_filenames_encoder (obj, n_levels = 2):
+ """
+ Convert a set of parameters into a string
+ Paths with > N levels of nested-ness are truncated
+ """
+
+ #
+ # if < 0, nest by 2
+ #
+ if n_levels < 0:
+ desired_len = - n_levels
+ prev_encoded_len = 0
+ #
+ # try more and more nestedness up to 9 if that fits inside desired length
+ # stop when increasing nestedness makes no difference
+ #
+ for nestedness in range(1, 20):
+ res = shorten_filenames_encoder (obj, nestedness)
+ if len(res) > desired_len or "..." not in res:
+ break
+ prev_encoded_len = len(res)
+ desired_len = max(4, desired_len - 5)
+ offset = len(res) - desired_len
+ if offset < 0:
+ return res
+ return "<???> " + res[offset:]
+
+
+
+ #
+ # Recurse into lists and tuples
+ #
+ if non_str_sequence (obj):
+ return "[%s]" % ", ".join(map(shorten_filenames_encoder, obj, [n_levels] * len(obj)))
+
+ #
+ # Only shorten strings
+ #
+ if not isinstance(obj, path_str_type):
+ return ignore_unknown_encoder(obj)
+
+
+ #
+ # level = 0 means return full absolute path
+ #
+ if not n_levels:
+ return ignore_unknown_encoder(os.path.abspath(obj))
+
+ #
+ # Shorten both relative and absolute (full) paths
+ #
+
+ # if within bounds, return that
+ if obj[1:].count('/') < n_levels:
+ return ignore_unknown_encoder(obj)
+
+ # use relative path if that has <= nested level
+ rel_path = os.path.relpath(obj)
+ if rel_path.count('/') <= n_levels:
+ #print >>sys.stderr, "relative path only one nested level"
+ return ignore_unknown_encoder(rel_path)
+
+ # get last N nested levels
+ #print >>sys.stderr, "full path last N nested level"
+ return ignore_unknown_encoder(get_nth_nested_level_of_path (obj, n_levels))
+
+
+
+
+
+#_________________________________________________________________________________________
+#
+# get_tasks_filename_globs_in_nested_sequence
+#
+#________________________________________________________________________________________
+glob_letters = set('*[]?')
+def is_glob(s):
+ """Check whether 's' contains ANY of glob chars"""
+ return len(glob_letters.intersection(s)) > 0
+
+
+
+#_________________________________________________________________________________________
+#
+# get_nested_tasks_or_globs
+#
+#________________________________________________________________________________________
+def get_nested_tasks_or_globs(p, treat_strings_as_tasks = False, runtime_data_names=None, tasks=None, globs = None):
+ """
+ Get any tasks or globs which are within parameter
+ tasks are returned as functions or function names
+ """
+ #
+ # create storage if this is not a recursive call
+ #
+ if globs == None:
+ runtime_data_names, tasks, globs = set(), set(), set()
+
+ #
+ # task function
+ #
+ if (isinstance(p, collections.Callable)):
+ #if (type(p) == types.FunctionType):
+ tasks.add(p)
+ elif isinstance(p, runtime_parameter):
+ runtime_data_names.add(p)
+
+ #
+ # output_from treats all arguments as tasks or task names
+ #
+ elif isinstance(p, output_from):
+ for pp in p.args:
+ get_nested_tasks_or_globs(pp, True, runtime_data_names, tasks, globs)
+
+ elif isinstance(p, path_str_type):
+ if treat_strings_as_tasks:
+ tasks.add(p)
+ elif is_glob(p):
+ globs.add(p)
+
+ elif non_str_sequence (p):
+ for pp in p:
+ get_nested_tasks_or_globs(pp, treat_strings_as_tasks, runtime_data_names, tasks, globs)
+ return tasks, globs, runtime_data_names
+
+#_________________________________________________________________________________________
+#
+# replace_func_names_with_tasks
+#
+#________________________________________________________________________________________
+def replace_func_names_with_tasks(p, func_or_name_to_task, treat_strings_as_tasks = False):
+ """
+ Replaces task functions or task name (strings) with the tasks they represent
+ func_or_name_to_task are a dictionary of function and task names to tasks
+
+ """
+ #
+ # Expand globs or tasks as a list only if they are top level
+ #
+ if isinstance(p, collections.Callable):
+ #if type(p) == types.FunctionType:
+ return func_or_name_to_task[p]
+
+ #
+ # output_from treats all arguments as tasks or task names
+ #
+ if isinstance(p, output_from):
+ if len(p.args) == 1:
+ return replace_func_names_with_tasks(p.args[0], func_or_name_to_task, True)
+ else:
+ return [replace_func_names_with_tasks(pp, func_or_name_to_task, True) for pp in p.args]
+
+ #
+ # strings become tasks if treat_strings_as_tasks
+ #
+ if isinstance(p, path_str_type):
+ if treat_strings_as_tasks:
+ return func_or_name_to_task[p]
+ return p
+
+ #
+ # No conversions within dictionaries
+ #
+ if isinstance(p, dict):
+ return p
+
+ #
+ # Other sequences are recursed down
+ #
+ elif non_str_sequence(p):
+ l = list()
+ for pp in p:
+
+ #
+ # To be intuitive:
+ # arguments wrapped by output_from are always treated "in-line"
+ # e.g. 1, output_from("a") => 1, task_a
+ # e.g. 1, output_from("a", 2) => 1, task_a, 2
+ #
+ if isinstance(pp, output_from):
+ if len(pp.args) > 1:
+ l.extend(tuple(replace_func_names_with_tasks(pp, func_or_name_to_task, True)))
+ elif len(pp.args) == 1:
+ l.append(replace_func_names_with_tasks(pp.args[0], func_or_name_to_task, True))
+ # else len(pp.args) == 0 !! do nothing
+
+ else:
+ l.append(replace_func_names_with_tasks(pp, func_or_name_to_task, treat_strings_as_tasks))
+ return type(p)(l)
+
+ #
+ # No expansions of non-string/non-sequences
+ #
+ else:
+ return p
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# compiling regular expressions
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#_________________________________________________________________________________________
+
+# suffix
+
+#_________________________________________________________________________________________
+class suffix(object):
+ def __init__ (self, *args):
+ self.args = args
+
+#_________________________________________________________________________________________
+
+# regex
+
+#_________________________________________________________________________________________
+class regex(object):
+ def __init__ (self, *args):
+ self.args = args
+
+#_________________________________________________________________________________________
+
+# regex
+
+#_________________________________________________________________________________________
+class formatter(object):
+ def __init__ (self, *args):
+ self.args = args
+
+#_________________________________________________________________________________________
+
+# wrap_exception_as_string
+
+#_________________________________________________________________________________________
+def wrap_exception_as_string ():
+ """
+ return exception as string to be rethrown
+ """
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ msg = "%s.%s" % (exceptionType.__module__, exceptionType.__name__)
+ exception_value = str(exceptionValue)
+ if len(exception_value):
+ return msg + ": (%s)" % exception_value
+ return msg
+
+
+#_________________________________________________________________________________________
+
+# compile_formatter
+
+#_________________________________________________________________________________________
+def compile_formatter(enclosing_task, formatter_obj, error_object, descriptor_string):
+ """
+ Given list of [string|None]
+ Return compiled regular expressions.
+ """
+
+ compiled_regexes = []
+ for ss in formatter_obj.args:
+ # ignore None
+ if ss is None:
+ compiled_regexes.append(None)
+ continue
+
+ formatter_args = str(formatter_obj.args)[1:-1]
+ # regular expression should be strings
+ if not isinstance(ss, path_str_type):
+ raise error_object(enclosing_task, ("{descriptor_string}: "
+ "formatter({formatter_args}) is malformed\n"
+ "formatter(...) should only be used to wrap "
+ 'regular expression strings or None (not "{ss}")')
+ .format(descriptor_string = descriptor_string,
+ formatter_args = formatter_args,
+ ss = ss)
+ )
+
+ try:
+ compiled_regexes.append(re.compile(ss))
+ except:
+ raise error_object(enclosing_task, ("{descriptor_string}: "
+ "in formatter({formatter_args}) \n"
+ 'regular expression "{ss}" is malformed\n'
+ "[{except_str}]")
+ .format(descriptor_string = descriptor_string,
+ formatter_args = formatter_args,
+ ss = ss,
+ except_str = wrap_exception_as_string())
+ )
+ return compiled_regexes
+
+
+
+#_________________________________________________________________________________________
+
+# compile_regex
+
+#_________________________________________________________________________________________
+def compile_regex(enclosing_task, regex, error_object, descriptor_string, regex_object_name = "regex"):
+ """
+ throw error unless regular expression compiles
+ """
+ if not len(regex.args) or len(regex.args) > 1 or not isinstance(regex.args[0], path_str_type):
+ regex_str = str(regex.args)
+ if len(regex.args) > 1:
+ regex_str = regex_str[1:-1]
+ elif len(regex.args) == 0:
+ regex_str = ''
+ else:
+ regex_str = regex_str
+ raise error_object(enclosing_task, ("{descriptor_string}: "
+ "{regex_object_name}({regex_str}) is malformed\n"
+ "{regex_object_name}(...) should only be used to wrap a single regular expression string")
+ .format(descriptor_string = descriptor_string,
+ regex_str = regex_str,
+ regex_object_name = regex_object_name)
+ )
+ try:
+ matching_regex = re.compile(regex.args[0])
+ return matching_regex
+ except:
+ raise error_object(enclosing_task, ("{descriptor_string}: "
+ "regular expression {regex_object_name}('{regex_str}') is malformed\n"
+ "[{except_str}]")
+ .format(descriptor_string = descriptor_string,
+ regex_object_name = regex_object_name,
+ regex_str = regex.args[0],
+ except_str = wrap_exception_as_string())
+ )
+
+#_________________________________________________________________________________________
+
+# compile_suffix
+
+#_________________________________________________________________________________________
+def compile_suffix(enclosing_task, regex, error_object, descriptor_string):
+ """
+ throw error unless regular expression compiles
+ """
+ if not len(regex.args):
+ raise error_object(enclosing_task, "%s: " % descriptor_string +
+ "suffix() is malformed.\n" +
+ "suffix(...) should be used to wrap a string matching the suffices of file names")
+ if len(regex.args) > 1 or not isinstance(regex.args[0], path_str_type):
+ raise error_object(enclosing_task, "%s: " % descriptor_string +
+ "suffix('%s') is malformed.\n" % (regex.args,) +
+ "suffix(...) should only be used to wrap a single string matching the suffices of file names")
+ try:
+ matching_regex = re.compile("(.*)" + re.escape(regex.args[0]) + "$")
+ return matching_regex
+ except:
+ raise error_object(enclosing_task, "%s: " % descriptor_string +
+ "suffix('%s') is somehow malformed\n" % regex.args[0] +
+ "[%s]" % wrap_exception_as_string())
+
+#_________________________________________________________________________________________
+
+# check_parallel_parameters
+
+#_________________________________________________________________________________________
+def check_parallel_parameters (enclosing_task, params, error_object):
+ """
+ Helper function for @files
+ Checks format of parameters and
+ whether there are input and output files specified for each job
+ """
+ if not len(params):
+ raise Exception("@parallel parameters is empty.")
+
+ for job_param in params:
+ if isinstance(job_param, path_str_type):
+ message = ("Wrong syntax for @parallel.\n"
+ "@parallel(%s)\n" % ignore_unknown_encoder(params) +
+ "If you are supplying parameters for a task "
+ "running as a single job, "
+ "either don't put enclosing brackets at all (with each parameter "
+ "separated by commas) or enclose all parameters as a nested list of "
+ "lists, e.g. [['input', 'output' ...]]. "
+ )
+ raise error_object(enclosing_task, message)
+
+
+
+#_________________________________________________________________________________________
+
+# check_files_io_parameters
+
+#_________________________________________________________________________________________
+def check_files_io_parameters (enclosing_task, params, error_object):
+ """
+ Helper function for @files
+ Checks format of parameters and
+ whether there are input and output files specified for each job
+ """
+ #if not len(params):
+ # raise Exception("@files I/O parameters is empty.")
+
+ try:
+ for job_param in params:
+ if isinstance(job_param, path_str_type):
+ raise TypeError
+
+ if len(job_param) < 1:
+ raise error_object(enclosing_task, "Missing input files for job " +
+ ignore_unknown_encoder(job_param))
+ if len(job_param) < 2:
+ raise error_object(enclosing_task, "Missing output files for job " +
+ ignore_unknown_encoder(job_param))
+ #if len(get_strings_in_nested_sequence(job_param[0:2])) == 0:
+ # raise error_object(enclosing_task, "Input or output file parameters should "
+ # "contain at least one or more file names strings. "
+ # "Consider using @parallel if you are not using files. " +
+ # ignore_unknown_encoder(job_param))
+ except TypeError:
+ #
+ # job_param was not a list
+ #
+ message = ("Wrong syntax for @files.\n at files(%s)\n" % ignore_unknown_encoder(params) +
+ "If you are supplying parameters for a task "
+ "running as a single job, "
+ "either don't put enclosing brackets at all (with each parameter "
+ "separated by commas) or enclose all parameters as a nested list of "
+ "lists, e.g. [['input', 'output' ...]]. "
+ )
+ raise error_object(enclosing_task, message)
+
+#_________________________________________________________________________________________
+#
+# expand_nested_tasks_or_globs
+#
+#________________________________________________________________________________________
+def expand_nested_tasks_or_globs(p, tasksglobs_to_filenames):
+ """
+ Expand globs and tasks "in-line", unless they are the top level, in which case turn
+ it into a list
+
+ N.B. Globs are only expanded if they are in tasksglobs_to_filenames
+ This function is called for @split descriptors which leave output globs untouched
+ for clarity. Thanks to Noah Spies for spotting this!
+ """
+
+ #
+ # Expand globs or tasks as a list only if they are top level
+ #
+ if ( (isinstance(p, path_str_type) and is_glob(p) and p in tasksglobs_to_filenames) or
+ p.__class__.__name__ == '_task' or
+ isinstance(p, runtime_parameter) ):
+ return tasksglobs_to_filenames[p]
+
+ #
+ # No expansions of strings and dictionaries
+ #
+ if isinstance(p, (path_str_type, dict)):
+ return p
+
+ #
+ # Other sequences are recursed down
+ #
+ elif non_str_sequence(p):
+ l = list()
+ for pp in p:
+ if (isinstance(pp, path_str_type) and pp in tasksglobs_to_filenames):
+ l.extend(tasksglobs_to_filenames[pp])
+ elif pp.__class__.__name__ == '_task' or isinstance(pp, runtime_parameter):
+ files = tasksglobs_to_filenames[pp]
+ # task may have produced a single output: in which case append
+ if non_str_sequence(files):
+ l.extend(files)
+ else:
+ l.append(files)
+ else:
+ l.append(expand_nested_tasks_or_globs(pp, tasksglobs_to_filenames))
+ return type(p)(l)
+
+ #
+ # No expansions of non-string/non-sequences
+ #
+ else:
+ return p
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# special markers used by @files_re
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+class combine(object):
+ def __init__ (self, *args):
+ self.args = args
+
+class output_from(object):
+ def __init__ (self, *args):
+ self.args = args
+
+class runtime_parameter(object):
+ def __init__ (self, *args):
+ if len(args) != 1 or not isinstance(args[0], path_str_type):
+ raise Exception("runtime_parameter takes the name of the run time parameter as a single string")
+ self.args = args
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Unit Testing code in test/test_ruffus_utility.py
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
diff --git a/ruffus/ruffus_version.py b/ruffus/ruffus_version.py
new file mode 100755
index 0000000..18f2197
--- /dev/null
+++ b/ruffus/ruffus_version.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+################################################################################
+#
+# version.py
+#
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+__version='2.5'
diff --git a/ruffus/task.py b/ruffus/task.py
new file mode 100644
index 0000000..09ac09c
--- /dev/null
+++ b/ruffus/task.py
@@ -0,0 +1,4146 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+#import signal
+if sys.hexversion < 0x03000000:
+ from future_builtins import zip, map
+################################################################################
+#
+#
+# task.py
+#
+# Copyright (c) 10/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+
+********************************************
+:mod:`ruffus.task` -- Overview
+********************************************
+
+.. moduleauthor:: Leo Goodstadt <ruffus at llew.org.uk>
+
+Initial implementation of @active_if by Jacob Biesinger
+
+============================
+Decorator syntax:
+============================
+
+ Pipelined tasks are created by "decorating" a function with the following syntax::
+
+ def func_a():
+ pass
+
+ @follows(func_a)
+ def func_b ():
+ pass
+
+
+ Each task is a single function which is applied one or more times to a list of parameters
+ (typically input files to produce a list of output files).
+
+ Each of these is a separate, independent job (sharing the same code) which can be
+ run in parallel.
+
+
+============================
+Running the pipeline
+============================
+ To run the pipeline::
+
+ pipeline_run(target_tasks, forcedtorun_tasks = [], multiprocess = 1,
+ logger = stderr_logger,
+ gnu_make_maximal_rebuild_mode = True,
+ cleanup_log = "../cleanup.log")
+
+ pipeline_cleanup(cleanup_log = "../cleanup.log")
+
+
+
+
+
+
+"""
+
+
+
+import os,sys,copy, multiprocessing
+#from collections import namedtuple
+import collections
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import logging
+import re
+from collections import defaultdict
+from multiprocessing import Pool
+from multiprocessing.pool import ThreadPool
+import traceback
+import types
+if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ from functools import reduce
+
+
+import textwrap
+import time
+from multiprocessing.managers import SyncManager
+from collections import namedtuple
+from contextlib import contextmanager
+try:
+ import cPickle as pickle
+except:
+ import pickle as pickle
+from . import dbdict
+
+
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0,".")
+
+from .graph import *
+from .print_dependencies import *
+from .ruffus_exceptions import *
+from .ruffus_utility import *
+from .file_name_parameters import *
+
+if sys.hexversion >= 0x03000000:
+ # everything is unicode in python3
+ path_str_type = str
+else:
+ path_str_type = basestring
+
+
+#
+# use simplejson in place of json for python < 2.6
+#
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+dumps = json.dumps
+
+if sys.hexversion >= 0x03000000:
+ import queue as queue
+else:
+ import Queue as queue
+
+class Ruffus_Keyboard_Interrupt_Exception (Exception):
+ pass
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#
+# light weight logging objects
+#
+#
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+class t_black_hole_logger:
+ """
+ Does nothing!
+ """
+ def info (self, message, *args, **kwargs):
+ pass
+ def debug (self, message, *args, **kwargs):
+ pass
+ def warning (self, message, *args, **kwargs):
+ pass
+ def error (self, message, *args, **kwargs):
+ pass
+
+
+class t_stderr_logger:
+ """
+ Everything to stderr
+ """
+ def __init__ (self):
+ self.unique_prefix = ""
+ def add_unique_prefix (self):
+ import random
+ random.seed()
+ self.unique_prefix= str(random.randint(0,1000)) + " "
+ def info (self, message):
+ sys.stderr.write(self.unique_prefix + message + "\n")
+ def warning (self, message):
+ sys.stderr.write("\n\n" + self.unique_prefix + "WARNING:\n " + message + "\n\n")
+ def error (self, message):
+ sys.stderr.write("\n\n" + self.unique_prefix + "ERROR:\n " + message + "\n\n")
+ def debug (self, message):
+ sys.stderr.write(self.unique_prefix + message + "\n")
+
+class t_stream_logger:
+ """
+ Everything to stderr
+ """
+ def __init__ (self, stream):
+ self.stream = stream
+ def info (self, message):
+ self.stream.write(message + "\n")
+ def warning (self, message):
+ sys.stream.write("\n\nWARNING:\n " + message + "\n\n")
+ def error (self, message):
+ sys.stream.write("\n\nERROR:\n " + message + "\n\n")
+ def debug (self, message):
+ self.stream.write(message + "\n")
+
+black_hole_logger = t_black_hole_logger()
+stderr_logger = t_stderr_logger()
+
+class t_verbose_logger:
+ def __init__ (self, verbose, verbose_abbreviated_path, logger, runtime_data):
+ self.verbose = verbose
+ self.logger = logger
+ self.runtime_data = runtime_data
+ self.verbose_abbreviated_path = verbose_abbreviated_path
+
+#_________________________________________________________________________________________
+#
+# logging helper function
+#
+#________________________________________________________________________________________
+def log_at_level (logger, message_level, verbose_level, msg):
+ """
+ writes to log if message_level > verbose level
+ Returns anything written in case we might want to drop down and output at a lower log level
+ """
+ if message_level <= verbose_level:
+ logger.info(msg)
+ return True
+ return False
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+# queue management objects
+
+# inserted into queue like job parameters to control multi-processing queue
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# fake parameters to signal in queue
+class all_tasks_complete:
+ pass
+
+class waiting_for_more_tasks_to_complete:
+ pass
+
+
+#
+# synchronisation data
+#
+#SyncManager()
+#syncmanager.start()
+
+#
+# do nothing semaphore
+#
+ at contextmanager
+def do_nothing_semaphore():
+ yield
+
+
+
+# EXTRA pipeline_run DEBUGGING
+EXTRA_PIPELINERUN_DEBUGGING = False
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# task_decorator
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+class task_decorator(object):
+ """
+ Adds task to the "pipeline_task" attribute of this function but
+ otherwise leaves function untouched
+ """
+ def __init__(self, *decoratorArgs, **decoratorNamedArgs):
+ """
+ saves decorator arguments
+ """
+ self.args = decoratorArgs
+ self.named_args = decoratorNamedArgs
+
+ def __call__(self, func):
+ """
+ calls func in task with the same name as the class
+ """
+ # add task as attribute of this function
+ if not hasattr(func, "pipeline_task"):
+ func.pipeline_task = _task.create_task(func)
+
+
+ # call the method called
+ # "task.task_decorator"
+ # where "task_decorator" is the name of this class
+ decorator_function_name = "task_" + self.__class__.__name__
+ task_decorator_function = getattr(func.pipeline_task, decorator_function_name)
+ task_decorator_function(self.args, **self.named_args)
+
+
+ #
+ # don't change the function so we can call it unaltered
+ #
+ return func
+
+
+#
+# Basic decorators
+#
+class follows(task_decorator):
+ pass
+
+class files(task_decorator):
+ pass
+
+
+
+
+
+
+#
+# Core
+#
+class split(task_decorator):
+ pass
+
+class transform(task_decorator):
+ pass
+
+class subdivide(task_decorator):
+ pass
+
+class originate(task_decorator):
+ pass
+
+class merge(task_decorator):
+ pass
+
+class posttask(task_decorator):
+ pass
+
+class jobs_limit(task_decorator):
+ pass
+
+
+#
+# Advanced
+#
+class collate(task_decorator):
+ pass
+
+class active_if(task_decorator):
+ pass
+
+#
+# Esoteric
+#
+class check_if_uptodate(task_decorator):
+ pass
+
+class parallel(task_decorator):
+ pass
+
+class graphviz(task_decorator):
+ pass
+
+#
+# Obsolete
+#
+class files_re(task_decorator):
+ pass
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# indicator objects
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#_________________________________________________________________________________________
+
+# mkdir
+
+#_________________________________________________________________________________________
+class mkdir(task_decorator):
+ #def __init__ (self, *args):
+ # self.args = args
+ pass
+
+#_________________________________________________________________________________________
+
+# touch_file
+
+#_________________________________________________________________________________________
+class touch_file(object):
+ def __init__ (self, *args):
+ self.args = args
+
+
+
+#_________________________________________________________________________________________
+
+# inputs
+
+#_________________________________________________________________________________________
+class inputs(object):
+ def __init__ (self, *args):
+ self.args = args
+
+#_________________________________________________________________________________________
+
+# add_inputs
+
+#_________________________________________________________________________________________
+class add_inputs(object):
+ def __init__ (self, *args):
+ self.args = args
+
+#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# job descriptors
+
+# given parameters, returns strings describing job
+# First returned parameter is string in strong form
+# Second returned parameter is a list of strings for input, output and extra parameters
+# intended to be reformatted with indentation
+# main use in error logging
+
+#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+def generic_job_descriptor (param, verbose_abbreviated_path, runtime_data):
+ if param in ([], None):
+ m = "Job"
+ else:
+ m = "Job = %s" % ignore_unknown_encoder(param)
+
+ return m, [m]
+
+def io_files_job_descriptor (param, verbose_abbreviated_path, runtime_data):
+ extra_param = ", " + shorten_filenames_encoder(param[2:], verbose_abbreviated_path)[1:-1] if len(param) > 2 else ""
+ out_param = shorten_filenames_encoder(param[1], verbose_abbreviated_path) if len(param) > 1 else "??"
+ in_param = shorten_filenames_encoder(param[0], verbose_abbreviated_path) if len(param) > 0 else "??"
+
+ return ("Job = [%s -> %s%s]" % (in_param, out_param, extra_param),
+ ["Job = [%s" % in_param, "-> " + out_param + extra_param + "]"])
+
+
+def io_files_one_to_many_job_descriptor (param, verbose_abbreviated_path, runtime_data):
+
+ extra_param = ", " + shorten_filenames_encoder(param[2:], verbose_abbreviated_path)[1:-1] if len(param) > 2 else ""
+ out_param = shorten_filenames_encoder(param[1], verbose_abbreviated_path) if len(param) > 1 else "??"
+ in_param = shorten_filenames_encoder(param[0], verbose_abbreviated_path) if len(param) > 0 else "??"
+
+ # start with input parameter
+ ret_params = ["Job = [%s" % in_param]
+
+ # add output parameter to list,
+ # processing one by one if multiple output parameters
+ if len(param) > 1:
+ if isinstance(param[1], (list, tuple)):
+ ret_params.extend("-> " + shorten_filenames_encoder(p, verbose_abbreviated_path) for p in param[1])
+ else:
+ ret_params.append("-> " + out_param)
+
+ # add extra
+ if len(param) > 2 :
+ ret_params.append(" , " + shorten_filenames_encoder(param[2:], verbose_abbreviated_path)[1:-1])
+
+ # add closing bracket
+ ret_params[-1] +="]"
+
+ return ("Job = [%s -> %s%s]" % (in_param, out_param, extra_param), ret_params)
+
+
+def mkdir_job_descriptor (param, verbose_abbreviated_path, runtime_data):
+ # input, output and parameters
+ if len(param) == 1:
+ m = "Make directories %s" % (shorten_filenames_encoder(param[0], verbose_abbreviated_path))
+ elif len(param) == 2:
+ m = "Make directories %s" % (shorten_filenames_encoder(param[1], verbose_abbreviated_path))
+ else:
+ return [], []
+ return m, [m]
+
+
+#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# job wrappers
+# registers files/directories for cleanup
+
+#8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#_________________________________________________________________________________________
+
+# generic job wrapper
+
+#_________________________________________________________________________________________
+def job_wrapper_generic(param, user_defined_work_func, register_cleanup, touch_files_only):
+ """
+ run func
+ """
+ assert(user_defined_work_func)
+ return user_defined_work_func(*param)
+
+#_________________________________________________________________________________________
+
+# job wrapper for all that deal with i/o files
+
+#_________________________________________________________________________________________
+def job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = False):
+ """
+ run func on any i/o if not up to date
+ """
+ assert(user_defined_work_func)
+
+ i,o = param[0:2]
+
+ if touch_files_only == 0:
+ # @originate only uses output files
+ if output_files_only:
+ ret_val = user_defined_work_func(*(param[1:]))
+ # all other decorators
+ else:
+ try:
+ ret_val = user_defined_work_func(*param)
+ # EXTRA pipeline_run DEBUGGING
+ if EXTRA_PIPELINERUN_DEBUGGING:
+ sys.stderr.write("w" * 36 + "[[ task() done ]]" + "w" * 27 + "\n")
+ except KeyboardInterrupt as e:
+ # Reraise KeyboardInterrupt as a normal Exception
+ # EXTRA pipeline_run DEBUGGING
+ if EXTRA_PIPELINERUN_DEBUGGING:
+ sys.stderr.write("E" * 36 + "[[ KeyboardInterrupt from task() ]]" + "E" * 9 + "\n")
+ raise Ruffus_Keyboard_Interrupt_Exception("KeyboardInterrupt")
+ except:
+ #sys.stderr.write("?? %s ??" % (tuple(param),))
+ raise
+ elif touch_files_only == 1:
+ #job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)
+
+ #
+ # touch files only
+ #
+ for f in get_strings_in_nested_sequence(o):
+ #
+ # race condition still possible...
+ #
+ with open(f, 'a') as ff:
+ os.utime(f, None)
+ #if not os.path.exists(f):
+ # open(f, 'w')
+ # mtime = os.path.getmtime(f)
+ #else:
+ # os.utime(f, None)
+ # mtime = os.path.getmtime(f)
+
+
+ #chksum = JobHistoryChecksum(f, mtime, param[2:], user_defined_work_func.pipeline_task)
+ #job_history[f] = chksum # update file times and job details in history
+
+
+
+ #
+ # register strings in output file for cleanup
+ #
+ for f in get_strings_in_nested_sequence(o):
+ register_cleanup(f, "file")
+
+
+#_________________________________________________________________________________________
+
+# job wrapper for all that only deals with output files
+
+#_________________________________________________________________________________________
+def job_wrapper_output_files(param, user_defined_work_func, register_cleanup, touch_files_only):
+ """
+ run func on any output file if not up to date
+ """
+ job_wrapper_io_files(param, user_defined_work_func, register_cleanup, touch_files_only, output_files_only = True)
+
+
+#_________________________________________________________________________________________
+
+# job wrapper for mkdir
+
+#_________________________________________________________________________________________
+def job_wrapper_mkdir(param, user_defined_work_func, register_cleanup, touch_files_only):
+ """
+ make directories if not exists
+ """
+ #
+ # Just in case, swallow file exist errors because some other makedirs might be subpath
+ # of this directory
+ # Should not be necessary because of "sorted" in task_mkdir
+ #
+ #
+ if len(param) == 1:
+ dirs = param[0]
+
+ # if there are two parameters, they are i/o, and the directories to be created are the output
+ elif len(param) == 2:
+ dirs = param[1]
+ else:
+ raise Exception("Wrong number of arguments in mkdir check %s" % (param,))
+
+ # get all file names in flat list
+ dirs = get_strings_in_nested_sequence (dirs)
+
+ for d in dirs:
+ try:
+ os.makedirs(d) # Please email the authors if an uncaught exception is raised here
+ register_cleanup(d, "makedirs")
+ except:
+ #
+ # ignore exception if
+ # exception == OSError + "File exists" or // Linux
+ # exception == WindowsError + "file already exists" // Windows
+ # Are other exceptions raised by other OS?
+ #
+ #
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ # exceptionType == OSError and
+ if "File exists" in str(exceptionValue):
+ continue
+ # exceptionType == WindowsError and
+ elif "file already exists" in str(exceptionValue):
+ continue
+ raise
+
+ # changed for compatibility with python 3.x
+ #except OSError, e:
+ # if "File exists" not in e:
+ # raise
+
+
+JOB_ERROR = 0
+JOB_SIGNALLED_BREAK = 1
+JOB_UP_TO_DATE = 2
+JOB_COMPLETED = 3
+
+#_________________________________________________________________________________________
+
+# t_job_result
+# Previously a collections.namedtuple (introduced in python 2.6)
+# Now using implementation from running
+# t_job_result = namedtuple('t_job_result', 'task_name state job_name return_value exception', verbose =1)
+# for compatibility with python 2.5
+
+#_________________________________________________________________________________________
+t_job_result = namedtuple('t_job_result', 'task_name state job_name return_value exception params', verbose =0)
+#class t_job_result(tuple):
+# 't_job_result(task_name, state, job_name, return_value, exception, params)'
+#
+# __slots__ = ()
+#
+# fields = ('task_name', 'state', 'job_name', 'return_value', 'exception', 'params')
+#
+# def __new__(cls, task_name, state, job_name, return_value, exception, params):
+# return tuple.__new__(cls, (task_name, state, job_name, return_value, exception, params))
+#
+# @classmethod
+# def make(cls, iterable, new=tuple.__new__, len=len):
+# 'Make a new t_job_result object from a sequence or iterable'
+# result = new(cls, iterable)
+# if len(result) != 6:
+# raise TypeError('Expected 6 arguments, got %d' % len(result))
+# return result
+#
+# def __repr__(self):
+# return 't_job_result(task_name=%r, state=%r, job_name=%r, return_value=%r, exception=%r, params=%r)' % self
+#
+# def asdict(t):
+# 'Return a new dict which maps field names to their values'
+# return {'task_name': t[0], 'state': t[1], 'job_name': t[2], 'return_value': t[3], 'exception': t[4], 'params':t[5]}
+#
+# def replace(self, **kwds):
+# 'Return a new t_job_result object replacing specified fields with new values'
+# result = self.make(list(map(kwds.pop, ('task_name', 'state', 'job_name', 'return_value', 'exception', 'params'), self)))
+# if kwds:
+# raise ValueError('Got unexpected field names: %r' % list(kwds.keys()))
+# return result
+#
+# def __getnewargs__(self):
+# return tuple(self)
+#
+# task_name = property(itemgetter(0))
+# state = property(itemgetter(1))
+# job_name = property(itemgetter(2))
+# return_value= property(itemgetter(3))
+# exception = property(itemgetter(4))
+# params = property(itemgetter(5))
+
+
+
+#_________________________________________________________________________________________
+
+# multiprocess_callback
+#
+#_________________________________________________________________________________________
+def run_pooled_job_without_exceptions (process_parameters):
+ """
+ handles running jobs in parallel
+ Make sure exceptions are caught here:
+ Otherwise, these will kill the thread/process
+ return any exceptions which will be rethrown at the other end:
+ See RethrownJobError / run_all_jobs_in_task
+ """
+ #signal.signal(signal.SIGINT, signal.SIG_IGN)
+ (param, task_name, job_name, job_wrapper, user_defined_work_func,
+ job_limit_semaphore, death_event, touch_files_only) = process_parameters
+
+ ##job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)
+ ##outfile = param[1] if len(param) > 1 else None # mkdir has no output
+ ##if not isinstance(outfile, list):
+ ## outfile = [outfile]
+ ##for o in outfile:
+ ## job_history.pop(o, None) # remove outfile from history if it exists
+
+ if job_limit_semaphore == None:
+ job_limit_semaphore = do_nothing_semaphore()
+
+ try:
+ with job_limit_semaphore:
+ # EXTRA pipeline_run DEBUGGING
+ if EXTRA_PIPELINERUN_DEBUGGING:
+ sys.stderr.write(">" * 36 + "[[ job_wrapper ]]" + ">" * 27 + "\n")
+ return_value = job_wrapper(param, user_defined_work_func, register_cleanup, touch_files_only)
+
+ #
+ # ensure one second between jobs
+ #
+ #if one_second_per_job:
+ # time.sleep(1.01)
+ # EXTRA pipeline_run DEBUGGING
+ if EXTRA_PIPELINERUN_DEBUGGING:
+ sys.stderr.write("<" * 36 + "[[ job_wrapper done ]]" + "<" * 22 + "\n")
+ return t_job_result(task_name, JOB_COMPLETED, job_name, return_value, None, param)
+ except KeyboardInterrupt as e:
+ # Reraise KeyboardInterrupt as a normal Exception. Should never be necessary here
+ # EXTRA pipeline_run DEBUGGING
+ if EXTRA_PIPELINERUN_DEBUGGING:
+ sys.stderr.write("E" * 36 + "[[ KeyboardInterrupt ]]" + "E" * 21 + "\n")
+ death_event.set()
+ raise Ruffus_Keyboard_Interrupt_Exception("KeyboardInterrupt")
+ except:
+ # EXTRA pipeline_run DEBUGGING
+ if EXTRA_PIPELINERUN_DEBUGGING:
+ sys.stderr.write("E" * 36 + "[[ Other Interrupt ]]" + "E" * 23 + "\n")
+ # Wrap up one or more exceptions rethrown across process boundaries
+ #
+ # See multiprocessor.Server.handle_request/serve_client for an analogous function
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ exception_stack = traceback.format_exc()
+ exception_name = exceptionType.__module__ + '.' + exceptionType.__name__
+ exception_value = str(exceptionValue)
+ if len(exception_value):
+ exception_value = "(%s)" % exception_value
+
+ if exceptionType == Ruffus_Keyboard_Interrupt_Exception:
+ death_event.set()
+ job_state = JOB_SIGNALLED_BREAK
+ elif exceptionType == JobSignalledBreak:
+ job_state = JOB_SIGNALLED_BREAK
+ else:
+ job_state = JOB_ERROR
+ return t_job_result(task_name, job_state, job_name, None,
+ [task_name,
+ job_name,
+ exception_name,
+ exception_value,
+ exception_stack], param)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Helper function
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#_________________________________________________________________________________________
+
+# register_cleanup
+
+# to do
+
+#_________________________________________________________________________________________
+def register_cleanup (file_name, operation):
+ pass
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# _task
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+class _task (node):
+ """
+ pipeline task
+ """
+
+ action_names = ["unspecified",
+ "task",
+ "task_files_re",
+ "task_split",
+ "task_merge",
+ "task_transform",
+ "task_collate",
+ "task_files_func",
+ "task_files",
+ "task_mkdir",
+ "task_parallel",
+ "task_active_if",
+ "task_product",
+ "task_permutations",
+ "task_combinations",
+ "task_combinations_with_replacement",
+ "task_subdivide",
+ "task_originate",
+ "task_graphviz",
+ ]
+ action_unspecified = 0
+ action_task = 1
+ action_task_files_re = 2
+ action_task_split = 3
+ action_task_merge = 4
+ action_task_transform = 5
+ action_task_collate = 6
+ action_task_files_func = 7
+ action_task_files = 8
+ action_mkdir = 9
+ action_parallel = 10
+ action_active_if = 11
+ action_task_product = 12
+ action_task_permutations = 13
+ action_task_combinations = 14
+ action_task_combinations_with_replacement = 15
+ action_task_subdivide = 16
+ action_task_originate = 17
+ action_task_graphviz = 18
+
+
+
+ multiple_jobs_outputs = 0
+ single_job_single_output = 1
+ job_single_matches_parent= 2
+
+ job_limit_semaphores = {}
+
+
+
+ #_________________________________________________________________________________________
+
+ # create_task / __init__
+
+ #_________________________________________________________________________________________
+ @staticmethod
+ def create_task(func):
+ """
+ Create task if the name as not been previously specified
+ Note that the task function may not have been created yet.
+ This allows us to create tasks and dependencies out of order
+ """
+ func_name = func.__name__
+ module_name = str(func.__module__)
+ task_name = module_name + "." + func_name
+
+ # Link to existing dependency if task name has previously been specified
+ if node.is_node(task_name):
+ t = node.lookup_node_from_name(task_name)
+ if t.user_defined_work_func != None:
+ raise error_duplicate_task_name("Same task name %s specified multiple times in the same module" % task_name)
+ # otherwise create new
+ else:
+ t = _task(module_name, func_name)
+
+ t.set_action_type (_task.action_task)
+ t.user_defined_work_func = func
+ assert(t._name == task_name)
+ # convert description into one line
+ if func.__doc__:
+ t._description = re.sub("\n\s+", " ", func.__doc__).strip()
+ else:
+ t._description = ""
+
+ return t
+
+ #_________________________________________________________________________________________
+
+ # get_action_name
+
+ #_________________________________________________________________________________________
+ def get_action_name (self):
+ return _task.action_names[self._action_type]
+
+ #_________________________________________________________________________________________
+
+ # __init__
+
+ #_________________________________________________________________________________________
+ def __init__ (self, module_name, func_name):
+ """
+ Does nothing because this might just be a dependency.
+ If it does not get initialised by a real task
+ (a task is depending on an unknown function/task),
+ throw an exception when running the pipeline
+
+ """
+ self._module_name = module_name
+ self._func_name = func_name
+
+ node.__init__ (self, module_name + "." + func_name)
+ self._action_type = _task.action_unspecified
+
+ # Each task has its own checksum level
+ # At the moment this is really so multiple pipelines in the same script can have
+ # different checksum levels
+ # Though set by pipeline_xxxx functions, have initial valid value so unit tests work :-|
+ self.checksum_level = CHECKSUM_FILE_TIMESTAMPS
+ self.param_generator_func = None
+ self.needs_update_func = None
+ self.job_wrapper = job_wrapper_generic
+
+ #
+ self.job_descriptor = generic_job_descriptor
+
+ # jobs which produce a single output.
+ # special handling for task.get_output_files for dependency chaining
+ self._single_job_single_output = self.multiple_jobs_outputs
+ self.single_multi_io = self.many_to_many
+
+ # function which is decorated and does the actual work
+ self.user_defined_work_func = None
+
+ # functions which will be called when task completes
+ self.posttask_functions = []
+
+ # give makedir automatically made parent tasks unique names
+ self.cnt_task_mkdir = 0
+
+ # whether only task function itself knows what output it will produce
+ # i.e. output is a glob or something similar
+ self.indeterminate_output = 0
+
+ # cache output file names here
+ self.output_filenames = None
+
+ self.semaphore_name = module_name + "." + func_name
+
+ # do not test for whether task is active
+ self.active_if_checks = None
+
+ # extra flag for outputfiles
+ self.is_active = True
+
+
+
+ #_________________________________________________________________________________________
+
+ # init_for_pipeline
+
+ #_________________________________________________________________________________________
+ def init_for_pipeline (self):
+ """
+ Initialize variables for pipeline run / printout
+
+ **********
+ BEWARE
+ **********
+
+ Because state is stored, ruffus is *not* reentrant.
+
+ **********
+ BEWARE
+ **********
+ """
+
+ # cache output file names here
+ self.output_filenames = None
+
+
+ #_________________________________________________________________________________________
+
+ # set_action_type
+
+ #_________________________________________________________________________________________
+ def set_action_type (self, new_action_type):
+ """
+ Save how this task
+ 1) tests whether it is up-to-date and
+ 2) handles input/output files
+
+ Checks that the task has not been defined with conflicting actions
+
+ """
+ if self._action_type not in (_task.action_unspecified, _task.action_task):
+ old_action = _task.action_names[self._action_type]
+ new_action = _task.action_names[new_action_type]
+ actions = " and ".join(list(set((old_action, new_action))))
+ task_name = "def %s(...)" % self._name.replace("__main__.", "")
+ raise error_decorator_args((" %s\n has duplicate task specifications: (%s)\n") %
+ (task_name, actions))
+ self._action_type = new_action_type
+ self._action_type_desc = _task.action_names[new_action_type]
+
+
+
+ #_________________________________________________________________________________________
+
+ # get_job_name
+
+ #_________________________________________________________________________________________
+ def get_job_name(self, descriptive_param, verbose_abbreviated_path, runtime_data):
+ """
+ Use job descriptor to return short name for job, including any parameters
+
+ runtime_data is not (yet) used but may be used to add context in future
+ """
+ return self.job_descriptor(descriptive_param, verbose_abbreviated_path, runtime_data)[0]
+
+
+ #_________________________________________________________________________________________
+
+ # get_task_name
+
+ #_________________________________________________________________________________________
+ def get_task_name(self, in_func_format = False):
+ """
+ Returns name of task function, removing __main__ namespace if necessary
+
+ if in_func_format is true, will return def task_func(...):
+
+ """
+
+ task_name = self._name.replace("__main__.", "")
+ if self._action_type != _task.action_mkdir and in_func_format:
+ return "def %s(...):" % task_name
+ else:
+ return task_name
+
+
+
+ #_________________________________________________________________________________________
+
+ # update_active_state
+
+ #_________________________________________________________________________________________
+ def update_active_state (self):
+ #
+ # If has an @active_if decorator, check if the task needs to be run
+ # @active_if parameters may be call back functions or booleans
+ #
+ if (self.active_if_checks != None and
+ any( not arg() if isinstance(arg, collections.Callable) else not arg
+ for arg in self.active_if_checks)):
+ # flip is active to false.
+ # ( get_output_files() will return empty if inactive )
+ # Remember each iteration of pipeline_printout pipeline_run will have
+ # another bite at changing this value
+ self.is_active = False
+ else:
+ # flip is active to True so that downstream dependencies will be correct
+ # ( get_output_files() will return empty if inactive )
+ # Remember each iteration of pipeline_printout pipeline_run will have
+ # another bite at changing this value
+ self.is_active = True
+
+
+
+ #_________________________________________________________________________________________
+
+ # printout
+
+ # This code will look so much better once we have job level dependencies
+ # pipeline_run has dependencies percolating up/down. Don't want to
+ # recreate all the logic here
+
+ #_________________________________________________________________________________________
+ def printout (self, runtime_data, force_rerun, job_history, task_is_out_of_date, verbose=1, verbose_abbreviated_path = 2, indent = 4):
+ """
+ Print out all jobs for this task
+
+ verbose =
+ level 1 : logs Out-of-date Task names
+ level 2 : logs All Tasks (including any task function docstrings)
+ level 3 : logs Out-of-date Jobs in Out-of-date Tasks, no explanation
+ level 4 : logs Out-of-date Jobs in Out-of-date Tasks, saying why they are out of date (include only list of up-to-date tasks)
+ level 5 : All Jobs in Out-of-date Tasks (include only list of up-to-date tasks)
+ level 6 : All jobs in All Tasks whether out of date or not
+
+ """
+
+ def get_job_names (param, indent_str):
+ job_names = self.job_descriptor(param, verbose_abbreviated_path, runtime_data)[1]
+ if len(job_names) > 1:
+ job_names = ([indent_str + job_names[0]] +
+ [indent_str + " " + jn for jn in job_names[1:]])
+ else:
+ job_names = ([indent_str + job_names[0]])
+ return job_names
+
+
+
+ if not verbose:
+ return []
+
+ indent_str = ' ' * indent
+
+ messages = []
+
+ # LOGGER: level 1 : logs Out-of-date Tasks (names and warnings)
+ messages.append("Task = " + self.get_task_name() + (" >>Forced to rerun<<" if force_rerun else ""))
+ if verbose ==1:
+ return messages
+
+ # LOGGER: level 2 : logs All Tasks (including any task function docstrings)
+ if verbose >= 2 and len(self._description):
+ messages.append(indent_str + '"' + self._description + '"')
+
+ #
+ # single job state
+ #
+ if verbose >= 10:
+ if self._single_job_single_output == self.single_job_single_output:
+ messages.append(" Single job single output")
+ elif self._single_job_single_output == self.multiple_jobs_outputs:
+ messages.append(" Multiple jobs Multiple outputs")
+ else:
+ messages.append(" Single jobs status depends on %s" % self._single_job_single_output._name)
+
+
+ # LOGGER: No job if less than 2
+ if verbose <= 2 :
+ return messages
+
+ # increase indent for jobs up to date status
+ indent_str += " " * 3
+
+ #
+ # If has an @active_if decorator, check if the task needs to be run
+ # @active_if parameters may be call back functions or booleans
+ #
+ if not self.is_active:
+ # LOGGER
+ if verbose <= 3:
+ return messages
+ messages.append(indent_str + "Task is inactive")
+ # add spacer line
+ messages.append("")
+ return messages
+
+ #
+ # No parameters: just call task function
+ #
+ if self.param_generator_func == None:
+ # LOGGER
+ if verbose <= 3:
+ return messages
+
+ #
+ # needs update func = None: always needs update
+ #
+ if not self.needs_update_func:
+ messages.append(indent_str + "Task needs update: No function to check if up-to-date or not")
+ return messages
+
+ if self.needs_update_func == needs_update_check_modify_time:
+ needs_update, msg = self.needs_update_func (task=self, job_history = job_history, verbose_abbreviated_path = verbose_abbreviated_path)
+ else:
+ needs_update, msg = self.needs_update_func ()
+
+ if needs_update:
+ messages.append(indent_str + "Task needs update: %s" % msg)
+ #
+ # Get rid of up-to-date messages:
+ # Superfluous for parts of the pipeline which are up-to-date and
+ # Misleading for parts of the pipeline which require updating:
+ # tasks might have to run based on dependencies anyway
+ #
+ #else:
+ # if task_is_out_of_date:
+ # messages.append(indent_str + "Task appears up-to-date but will rerun after its dependencies")
+ # else:
+ # messages.append(indent_str + "Task up-to-date")
+
+ else:
+ runtime_data["MATCH_FAILURE"] = []
+ #
+ # return messages description per job if verbose > 5 else whether up to date or not
+ #
+ cnt_jobs = 0
+ for param, descriptive_param in self.param_generator_func(runtime_data):
+ cnt_jobs += 1
+
+ #
+ # needs update func = None: always needs update
+ #
+ if not self.needs_update_func:
+ if verbose >= 5:
+ messages.extend(get_job_names (descriptive_param, indent_str))
+ messages.append(indent_str + " Jobs needs update: No function to check if up-to-date or not")
+ continue
+
+ if self.needs_update_func == needs_update_check_modify_time:
+ needs_update, msg = self.needs_update_func (*param, task=self, job_history = job_history, verbose_abbreviated_path = verbose_abbreviated_path)
+ else:
+ needs_update, msg = self.needs_update_func (*param)
+
+ if needs_update:
+ messages.extend(get_job_names (descriptive_param, indent_str))
+ if verbose >= 4:
+ per_job_messages = [(indent_str + s) for s in (" Job needs update: %s" % msg).split("\n")]
+ messages.extend(per_job_messages)
+ else:
+ messages.append(indent_str + " Job needs update")
+
+
+ # up to date: log anyway if verbose
+ else:
+ # LOGGER
+ if (task_is_out_of_date and verbose >= 5) or verbose >= 6:
+ messages.extend(get_job_names (descriptive_param, indent_str))
+ #
+ # Get rid of up-to-date messages:
+ # Superfluous for parts of the pipeline which are up-to-date and
+ # Misleading for parts of the pipeline which require updating:
+ # tasks might have to run based on dependencies anyway
+ #
+ #if not task_is_out_of_date:
+ # messages.append(indent_str + " Job up-to-date")
+
+
+ if cnt_jobs == 0:
+ messages.append(indent_str + "!!! No jobs for this task. "
+ "Are you sure there is not a error in your "
+ "code / regular expression?")
+ # LOGGER
+ if verbose >= 4 or (verbose and cnt_jobs == 0):
+ if runtime_data and "MATCH_FAILURE" in runtime_data:
+ for s in runtime_data["MATCH_FAILURE"]:
+ messages.append(indent_str + "Job Warning: File match failure: " + s)
+ runtime_data["MATCH_FAILURE"] = []
+ messages.append("")
+ return messages
+
+
+
+
+ #_____________________________________________________________________________________
+
+ # signal
+ #
+ # returns whether up to date
+ #
+ #_____________________________________________________________________________________
+ def signal (self, verbose_logger_job_history):
+ """
+ If up to date: signal = true
+ If true, depth first search will not pass through this node
+ """
+ if not verbose_logger_job_history:
+ raise Exception("verbose_logger_job_history is None")
+
+ verbose_logger = verbose_logger_job_history[0]
+ job_history = verbose_logger_job_history[1]
+
+ try:
+ logger = verbose_logger.logger
+ verbose = verbose_logger.verbose
+ runtime_data = verbose_logger.runtime_data
+ verbose_abbreviated_path = verbose_logger.verbose_abbreviated_path
+
+ log_at_level (logger, 10, verbose,
+ " Task = " + self.get_task_name())
+
+ #
+ # If job is inactive, always consider it up-to-date
+ #
+ if (self.active_if_checks != None and
+ any( not arg() if isinstance(arg, collections.Callable) else not arg
+ for arg in self.active_if_checks)):
+ log_at_level (logger, 10, verbose,
+ " Inactive task: treat as Up to date")
+ #print 'signaling that the inactive task is up to date'
+ return True
+
+ #
+ # Always needs update if no way to check if up to date
+ #
+ if self.needs_update_func == None:
+ log_at_level (logger, 10, verbose,
+ " No update function: treat as out of date")
+ return False
+
+ #
+ # if no parameters, just return the results of needs update
+ #
+ if self.param_generator_func == None:
+ if self.needs_update_func:
+ if self.needs_update_func == needs_update_check_modify_time:
+ needs_update, msg = self.needs_update_func (task=self, job_history = job_history, verbose_abbreviated_path = verbose_abbreviated_path)
+ else:
+ needs_update, msg = self.needs_update_func ()
+ log_at_level (logger, 10, verbose,
+ " Needs update = %s" % needs_update)
+ return not needs_update
+ else:
+ return True
+ else:
+ #
+ # return not up to date if ANY jobs needs update
+ #
+ for param, descriptive_param in self.param_generator_func(runtime_data):
+ if self.needs_update_func == needs_update_check_modify_time:
+ needs_update, msg = self.needs_update_func (*param, task=self, job_history = job_history, verbose_abbreviated_path = verbose_abbreviated_path)
+ else:
+ needs_update, msg = self.needs_update_func (*param)
+ if needs_update:
+ log_at_level (logger, 10, verbose, " Needing update:\n %s" % self.get_job_name(descriptive_param, verbose_abbreviated_path, runtime_data))
+ return False
+
+ #
+ # Percolate warnings from parameter factories
+ #
+ if (verbose >= 1 and "ruffus_WARNING" in runtime_data and
+ self.param_generator_func in runtime_data["ruffus_WARNING"]):
+ for msg in runtime_data["ruffus_WARNING"][self.param_generator_func]:
+ logger.warning(" 'In Task %s' %s " % (self.get_task_name(True), msg))
+
+ log_at_level (logger, 10, verbose, " All jobs up to date")
+
+
+
+
+ return True
+
+ #
+ # removed for compatibility with python 3.x
+ #
+ # rethrow exception after adding task name
+ #except error_task, inst:
+ # inst.specify_task(self, "Exceptions in dependency checking")
+ # raise
+
+ except:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+
+ #
+ # rethrow exception after adding task name
+ #
+ if exceptionType == error_task:
+ exceptionValue.specify
+ inst.specify_task(self, "Exceptions in dependency checking")
+ raise
+
+ exception_stack = traceback.format_exc()
+ exception_name = exceptionType.__module__ + '.' + exceptionType.__name__
+ exception_value = str(exceptionValue)
+ if len(exception_value):
+ exception_value = "(%s)" % exception_value
+ errt = RethrownJobError([(self._name,
+ "",
+ exception_name,
+ exception_value,
+ exception_stack)])
+ errt.specify_task(self, "Exceptions generating parameters")
+ raise errt
+
+
+
+ #_____________________________________________________________________________________
+
+ # get_output_files
+ #
+ #
+ #_____________________________________________________________________________________
+ def get_output_files (self, do_not_expand_single_job_tasks, runtime_data):
+ """
+ Cache output files
+
+ If flattened is True, returns file as a list of strings,
+ flattening any nested structures and discarding non string names
+ Normally returns a list with one item for each job or a just a list of file names.
+ For "single_job_single_output" i.e. @merge and @files with single jobs,
+ returns the output of a single job (i.e. can be a string)
+ """
+
+ #
+ # N.B. active_if_checks is called once per task
+ # in make_job_parameter_generator() for consistency
+ #
+ # self.is_active can be set using self.active_if_checks in that function,
+ # and therefore can be changed BETWEEN invocations of pipeline_run
+ #
+ # self.is_active is not used anywhere else
+ #
+ if (not self.is_active):
+ return []
+
+ #
+ # This looks like the wrong place to flatten
+ #
+ flattened = False
+ if self.output_filenames == None:
+
+ self.output_filenames = []
+
+ # skip tasks which don't have parameters
+ if self.param_generator_func != None:
+
+ cnt_jobs = 0
+ for param, descriptive_param in self.param_generator_func(runtime_data):
+ cnt_jobs += 1
+ # skip tasks which don't have output parameters
+ if len(param) >= 2:
+ # make sure each @split or @subdivide or @originate returns a list of jobs
+ # i.e. each @split or @subdivide or @originate is always a ->many operation
+ # even if len(many) can be 1 (or zero)
+ if self.indeterminate_output and not non_str_sequence(param[1]):
+ self.output_filenames.append([param[1]])
+ else:
+ self.output_filenames.append(param[1])
+
+
+ if self._single_job_single_output == self.single_job_single_output:
+ if cnt_jobs > 1:
+ raise error_task_get_output(self,
+ "Task which is supposed to produce a single output "
+ "somehow has more than one job.")
+
+ #
+ # The output of @split should be treated as multiple jobs
+ #
+ # The output of @split is always a list of lists:
+ # 1) There is a list of @split jobs
+ # A) For advanced (regex) @split
+ # this is a many -> many more operation
+ # So len(list) == many (i.e. the number of jobs
+ # B) For normal @split
+ # this is a 1 -> many operation
+ # So len(list) = 1
+ #
+ # 2) The output of each @split job is a list
+ # The items in this list of lists are each a job in subsequent tasks
+ #
+ #
+ # So we need to concatenate these separate lists into a single list of output
+ #
+ # For example:
+ # @split(["a.1", "b.1"], regex(r"(.)\.1"), r"\1.*.2")
+ # def example(input, output):
+ # # JOB 1
+ # # a.1 -> a.i.2
+ # # -> a.j.2
+ #
+ # # JOB 2
+ # # b.1 -> b.i.2
+ # # -> b.j.2
+ #
+ # output_filenames = [ [a.i.2, a.j.2], [b.i.2, b.j.2] ]
+ #
+ # we want [ a.i.2, a.j.2, b.i.2, b.j.2 ]
+ #
+ # This also works for simple @split
+ #
+ # @split("a.1", r"a.*.2")
+ # def example(input, output):
+ # # only job
+ # # a.1 -> a.i.2
+ # # -> a.j.2
+ #
+ # output_filenames = [ [a.i.2, a.j.2] ]
+ #
+ # we want [ a.i.2, a.j.2 ]
+ #
+ if len(self.output_filenames) and self.indeterminate_output:
+ self.output_filenames = reduce(lambda x,y: x + y, self.output_filenames)
+
+
+ if flattened:
+ # if single file name, return that
+ # accepts unicode
+ if (do_not_expand_single_job_tasks and
+ len(self.output_filenames) and
+ isinstance(self.output_filenames[0], path_str_type)):
+ return self.output_filenames
+ # if it is flattened, might as well sort it
+ return sorted(get_strings_in_nested_sequence(self.output_filenames))
+
+ else:
+ # special handling for jobs which have a single task,
+ if (do_not_expand_single_job_tasks and
+ self._single_job_single_output and
+ len(self.output_filenames) ):
+ return self.output_filenames[0]
+
+ #
+ # sort by jobs so it is just a weeny little bit less deterministic
+ #
+ return sorted(self.output_filenames, key = lambda x: str(x))
+
+
+
+ #_____________________________________________________________________________________
+
+ # completed
+ #
+ # All logging logic moved to caller site
+ #_____________________________________________________________________________________
+ def completed (self):
+ """
+ called even when all jobs are up to date
+ """
+ if not self.is_active:
+ self.output_filenames = None
+ return
+
+ for f in self.posttask_functions:
+ f()
+
+ #
+ # indeterminate output. Check actual output again if someother tasks job function depend on it
+ # used for @split
+ #
+ if self.indeterminate_output:
+ self.output_filenames = None
+
+
+
+
+
+
+
+
+
+ #_________________________________________________________________________________________
+
+ # handle_tasks_globs_in_inputs
+
+ #_________________________________________________________________________________________
+ def handle_tasks_globs_in_inputs(self, input_params):
+ """
+ Helper function for tasks which
+ 1) Notes globs and tasks
+ 2) Replaces tasks names and functions with actual tasks
+ 3) Adds task dependencies automatically via task_follows
+ """
+ #
+ # get list of function/function names and globs
+ #
+ function_or_func_names, globs, runtime_data_names = get_nested_tasks_or_globs(input_params)
+
+ #
+ # replace function / function names with tasks
+ #
+ tasks = self.task_follows(function_or_func_names)
+ functions_to_tasks = dict(zip(function_or_func_names, tasks))
+ input_params = replace_func_names_with_tasks(input_params, functions_to_tasks)
+
+ return t_params_tasks_globs_run_time_data(input_params, tasks, globs, runtime_data_names)
+
+
+
+
+
+ #8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # task handlers
+
+ # sets
+ # 1) action_type
+ # 2) param_generator_func
+ # 3) needs_update_func
+ # 4) job wrapper
+
+
+ #8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ #_________________________________________________________________________________________
+
+ # do_task_subdivide
+
+ #_________________________________________________________________________________________
+ def do_task_subdivide (self, orig_args, decorator_name, error_type):
+ """
+ @subdivide and @split are synonyms
+ Common code here
+ """
+
+ if len(orig_args) < 3:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+
+
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+ # allows split to take a single file or task
+ input_files_task_globs.single_file_to_list()
+
+ # how to transform input to output file name
+ file_names_transform = self.choose_file_names_transform (orig_args[1], error_type, decorator_name)
+
+ orig_args = orig_args[2:]
+
+ # inputs can also be defined by pattern match
+ extra_inputs, replace_inputs, output_pattern, extra_params = self.get_extra_inputs_outputs_extra (orig_args, error_type, decorator_name)
+
+ #
+ # output globs will be replaced with files. But there should not be tasks here!
+ #
+ output_files_task_globs = self.handle_tasks_globs_in_inputs(output_pattern)
+ if len(output_files_task_globs.tasks):
+ raise error_type(self, ("%s cannot output to another task. "
+ "Do not include tasks in output parameters.") % decorator_name)
+
+
+
+ self.param_generator_func = subdivide_param_factory ( input_files_task_globs,
+ False, # flatten input
+ file_names_transform,
+ extra_inputs,
+ replace_inputs,
+ output_files_task_globs,
+ *extra_params)
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ #self.job_descriptor = io_files_job_descriptor # (orig_args[0], output_runtime_data_names)
+ self.job_descriptor = io_files_one_to_many_job_descriptor
+
+ # output is a glob
+ self.indeterminate_output = 2
+ self.single_multi_io = self.many_to_many
+
+ #_________________________________________________________________________________________
+
+ # task_split
+
+ #_________________________________________________________________________________________
+ def do_task_simple_split (self, orig_args, decorator_name, error_type):
+
+ #check enough arguments
+ if len(orig_args) < 2:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+ #
+ # replace output globs with files
+ #
+ output_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[1])
+ if len(output_files_task_globs.tasks):
+ raise error_type(self, ("%s cannot output to another task. "
+ "Do not include tasks in output parameters.") % decorator_name)
+
+ extra_params = orig_args[2:]
+ self.param_generator_func = split_param_factory (input_files_task_globs, output_files_task_globs, *extra_params)
+
+
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ #self.job_descriptor = io_files_job_descriptor# (orig_args[1], output_runtime_data_names)
+ self.job_descriptor = io_files_one_to_many_job_descriptor
+
+ # output is a glob
+ self.indeterminate_output = 1
+ self.single_multi_io = self.one_to_many
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_split
+
+ #_________________________________________________________________________________________
+ def task_split (self, orig_args):
+ """
+ Splits a single set of input files into multiple output file names,
+ where the number of output files may not be known beforehand.
+ """
+ decorator_name = "@split"
+ error_type = error_task_split
+ self.set_action_type (_task.action_task_split)
+
+ #
+ # This is actually @subdivide
+ #
+ if isinstance(orig_args[1], regex):
+ self.do_task_subdivide(orig_args, decorator_name, error_type)
+
+ #
+ # This is actually @split
+ #
+ else:
+ self.do_task_simple_split(orig_args, decorator_name, error_type)
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_originate
+
+ #_________________________________________________________________________________________
+ def task_originate (self, orig_args):
+ """
+ Splits out multiple output file names,
+ where the number of output files may or may not be known beforehand.
+ This is a synonym for @split(None,...)
+ """
+ decorator_name = "@originate"
+ error_type = error_task_originate
+ self.set_action_type (_task.action_task_originate)
+
+ if len(orig_args) < 1:
+ raise error_type(self, "%s takes a single argument" % decorator_name)
+
+ output_params = orig_args[0]
+
+ # make sure output_params is a list.
+ # Each of these will be called as an output
+ if not non_str_sequence (output_params):
+ output_params = [output_params]
+
+ #
+ # output globs will be replaced with files. But there should not be tasks here!
+ #
+ list_output_files_task_globs = [self.handle_tasks_globs_in_inputs(oo) for oo in output_params]
+ for oftg in list_output_files_task_globs:
+ if len(oftg.tasks):
+ raise error_type(self, ("%s cannot output to another task. "
+ "Do not include tasks in output parameters.") % decorator_name)
+
+ self.param_generator_func = originate_param_factory (list_output_files_task_globs, orig_args[1:])
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_output_files
+ self.job_descriptor = io_files_one_to_many_job_descriptor
+
+ # output is not a glob
+ self.indeterminate_output = 0
+ self.single_multi_io = self.many_to_many
+
+
+
+
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_subdivide
+
+ #_________________________________________________________________________________________
+ def task_subdivide (self, orig_args):
+ """
+ Splits a single set of input files into multiple output file names,
+ where the number of output files may not be known beforehand.
+ """
+ decorator_name = "@subdivide"
+ error_type = error_task_subdivide
+ self.set_action_type (_task.action_task_subdivide)
+ self.do_task_subdivide(orig_args, decorator_name, error_type)
+
+ #_________________________________________________________________________________________
+
+ # get_extra_inputs
+
+ #_________________________________________________________________________________________
+ def get_extra_inputs_outputs_extra (self, orig_args, error_type, decorator_name):
+ """
+ shared code for subdivide, transform, product etc for parsing orig_args into
+ add_inputs/inputs, output, extra
+ """
+
+ #
+ # inputs can also be defined by pattern match
+ #
+ if isinstance(orig_args[0], inputs):
+ if len(orig_args) < 2:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+ if len(orig_args[0].args) != 1:
+ raise error_task_transform_inputs_multiple_args(self,
+ "inputs(...) expects only a single argument. "
+ "This can be, for example, a file name, "
+ "a regular expression pattern, or any "
+ "nested structure. If the intention was to "
+ "specify a tuple as the input parameter, "
+ "please wrap the elements of the tuple "
+ "in brackets in the decorator\n\n"
+ "%s(..., inputs(...), ...)\n" % (decorator_name))
+ replace_inputs = t_extra_inputs.REPLACE_INPUTS
+ extra_inputs = self.handle_tasks_globs_in_inputs(orig_args[0].args[0])
+ output_pattern = orig_args[1]
+ extra_params = orig_args[2:]
+ elif isinstance(orig_args[0], add_inputs):
+ if len(orig_args) < 2:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+ replace_inputs = t_extra_inputs.ADD_TO_INPUTS
+ extra_inputs = self.handle_tasks_globs_in_inputs(orig_args[0].args)
+ output_pattern = orig_args[1]
+ extra_params = orig_args[2:]
+ else:
+ replace_inputs = t_extra_inputs.KEEP_INPUTS
+ extra_inputs = None
+ output_pattern = orig_args[0]
+ extra_params = orig_args[1:]
+
+ return extra_inputs, replace_inputs, output_pattern, extra_params
+
+ #_________________________________________________________________________________________
+
+ # choose_file_names_transform
+
+ #_________________________________________________________________________________________
+ def choose_file_names_transform (self, file_name_transform_tag, error_type, decorator_name, valid_tags = (regex, suffix, formatter)):
+ """
+ shared code for subdivide, transform, product etc for choosing method for transform input file to output files
+ """
+ valid_tag_names = [];
+ # regular expression match
+ if (regex in valid_tags):
+ valid_tag_names.append("regex()")
+ if isinstance(file_name_transform_tag, regex):
+ return t_regex_file_names_transform(self, file_name_transform_tag, error_type, decorator_name)
+
+ # simulate end of string (suffix) match
+ if (suffix in valid_tags):
+ valid_tag_names.append("suffix()")
+ if isinstance(file_name_transform_tag, suffix):
+ return t_suffix_file_names_transform(self, file_name_transform_tag, error_type, decorator_name)
+
+ # new style string.format()
+ if (formatter in valid_tags):
+ valid_tag_names.append("formatter()")
+ if isinstance(file_name_transform_tag, formatter):
+ return t_formatter_file_names_transform(self, file_name_transform_tag, error_type, decorator_name)
+
+ raise error_type(self, "%s expects one of %s as the second argument" % (decorator_name, ", ".join(valid_tag_names)))
+
+
+ #_________________________________________________________________________________________
+
+ # task_product
+
+ #_________________________________________________________________________________________
+ def task_product(self, orig_args):
+ """
+ all versus all
+ """
+ decorator_name = "@product"
+ error_type = error_task_product
+ if len(orig_args) < 3:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+ #
+ # get all pairs of tasks / globs and formatter()
+ #
+ list_input_files_task_globs = []
+ list_formatter = []
+ while len(orig_args) >= 3:
+ if isinstance(orig_args[1], formatter):
+ list_input_files_task_globs .append(orig_args[0])
+ list_formatter .append(orig_args[1])
+ orig_args = orig_args[2:]
+ else:
+ break
+
+ if not len(list_formatter):
+ raise error_task_product(self, "@product expects formatter() as the second argument")
+
+
+ self.set_action_type (_task.action_task_product)
+
+ #
+ # replace function / function names with tasks
+ #
+ list_input_files_task_globs = [self.handle_tasks_globs_in_inputs(ii) for ii in list_input_files_task_globs]
+
+
+ # list of new style string.format()
+ file_names_transform = t_nested_formatter_file_names_transform(self, list_formatter, error_task_product, decorator_name)
+
+
+ #
+ # inputs can also be defined by pattern match
+ #
+ extra_inputs, replace_inputs, output_pattern, extra_params = self.get_extra_inputs_outputs_extra (orig_args, error_type, decorator_name)
+
+ self.param_generator_func = product_param_factory ( list_input_files_task_globs,
+ False, # flatten input
+ file_names_transform,
+ extra_inputs,
+ replace_inputs,
+ output_pattern,
+ *extra_params)
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+ self.single_multi_io = self.many_to_many
+
+
+ #_________________________________________________________________________________________
+
+ # task_combinatorics
+
+ #_________________________________________________________________________________________
+ def task_combinatorics (self, orig_args, combinatorics_type, decorator_name, error_type):
+ """
+ Common code for task_permutations, task_combinations_with_replacement, task_combinations
+ """
+
+ if len(orig_args) < 4:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+
+ if not isinstance(orig_args[1], formatter):
+ raise error_task_product(self, "%s expects formatter() as the second argument" % decorator_name)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+ k_tuple = orig_args[2]
+
+ # how to transform input to output file name: len(k-tuples) of (identical) formatters
+ file_names_transform = t_nested_formatter_file_names_transform(self, [orig_args[1]] * k_tuple, error_type, decorator_name)
+
+
+ self.set_action_type (_task.action_task_permutations)
+
+ if not isinstance(orig_args[2], int):
+ raise error_task_product(self, "%s expects an integer number as the third argument specifying the number of elements in each tuple." % decorator_name)
+
+
+ orig_args = orig_args[3:]
+
+
+ #
+ # inputs can also be defined by pattern match
+ #
+ extra_inputs, replace_inputs, output_pattern, extra_params = self.get_extra_inputs_outputs_extra (orig_args, error_type, decorator_name)
+
+ self.param_generator_func = combinatorics_param_factory ( input_files_task_globs,
+ False, # flatten input
+ combinatorics_type,
+ k_tuple,
+ file_names_transform,
+ extra_inputs,
+ replace_inputs,
+ output_pattern,
+ *extra_params)
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+ self.single_multi_io = self.many_to_many
+
+ #_________________________________________________________________________________________
+
+ # task_permutations
+
+ #_________________________________________________________________________________________
+ def task_permutations(self, orig_args):
+ """
+ k-permutations of n
+
+ k-length tuples, all possible orderings, no self vs self
+ """
+ decorator_name = "@permutations"
+ error_type = error_task_permutations
+ combinatorics_type = t_combinatorics_type.COMBINATORICS_PERMUTATIONS
+ self.task_combinatorics (orig_args, combinatorics_type, decorator_name, error_type)
+
+
+ #_________________________________________________________________________________________
+
+ # task_combinations
+
+ #_________________________________________________________________________________________
+ def task_combinations(self, orig_args):
+ """
+ k-length tuples
+ Single (sorted) ordering, i.e. AB is the same as BA,
+ No repeats. No AA, BB
+
+ E.g.
+ combinations("ABCD", 3) = ['ABC', 'ABD', 'ACD', 'BCD']
+ combinations("ABCD", 2) = ['AB', 'AC', 'AD', 'BC', 'BD', 'CD']
+ """
+ decorator_name = "@combinations"
+ error_type = error_task_combinations
+ combinatorics_type = t_combinatorics_type.COMBINATORICS_COMBINATIONS
+ self.task_combinatorics (orig_args, combinatorics_type, decorator_name, error_type)
+
+
+ #_________________________________________________________________________________________
+
+ # task_combinations_with_replacement
+
+ #_________________________________________________________________________________________
+ def task_combinations_with_replacement(self, orig_args):
+ """
+ k-length tuples
+ Single (sorted) ordering, i.e. AB is the same as BA,
+ Repeats. AA, BB, AAC etc.
+
+ E.g.
+ combinations_with_replacement("ABCD", 3) = ['AAA', 'AAB', 'AAC', 'AAD',
+ 'ABB', 'ABC', 'ABD',
+ 'ACC', 'ACD',
+ 'ADD',
+ 'BBB', 'BBC', 'BBD',
+ 'BCC', 'BCD',
+ 'BDD',
+ 'CCC', 'CCD',
+ 'CDD',
+ 'DDD']
+ combinations_with_replacement("ABCD", 2) = ['AA', 'AB', 'AC', 'AD',
+ 'BB', 'BC', 'BD',
+ 'CC', 'CD',
+ 'DD']
+
+ """
+ decorator_name = "@combinations_with_replacement"
+ error_type = error_task_combinations_with_replacement
+ combinatorics_type = t_combinatorics_type.COMBINATORICS_COMBINATIONS_WITH_REPLACEMENT
+ self.task_combinatorics (orig_args, combinatorics_type, decorator_name, error_type)
+
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_transform
+
+ #_________________________________________________________________________________________
+ def task_transform (self, orig_args):
+ """
+ Merges multiple input files into a single output.
+ """
+ decorator_name = "@transform"
+ error_type = error_task_transform
+ if len(orig_args) < 3:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+
+ self.set_action_type (_task.action_task_transform)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+
+ #_________________________________________________________________________________
+ #
+ # single_job_single_output is bad policy. Can we remove it?
+ # What does this actually mean in Ruffus semantics?
+ #
+ #
+ # allows transform to take a single file or task
+ if input_files_task_globs.single_file_to_list():
+ self._single_job_single_output = self.single_job_single_output
+
+ #
+ # whether transform generates a list of jobs or not will depend on the parent task
+ #
+ elif isinstance(input_files_task_globs.params, _task):
+ self._single_job_single_output = input_files_task_globs.params
+
+ #_________________________________________________________________________________
+
+ # how to transform input to output file name
+ file_names_transform = self.choose_file_names_transform (orig_args[1], error_task_transform, decorator_name)
+
+ orig_args = orig_args[2:]
+
+
+ #
+ # inputs can also be defined by pattern match
+ #
+ extra_inputs, replace_inputs, output_pattern, extra_params = self.get_extra_inputs_outputs_extra (orig_args, error_type, decorator_name)
+
+ self.param_generator_func = transform_param_factory ( input_files_task_globs,
+ False, # flatten input
+ file_names_transform,
+ extra_inputs,
+ replace_inputs,
+ output_pattern,
+ *extra_params)
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+ self.single_multi_io = self.many_to_many
+
+ #_________________________________________________________________________________________
+
+ # task_collate
+
+ #_________________________________________________________________________________________
+ def task_collate (self, orig_args):
+ """
+ Merges multiple input files into a single output.
+ """
+ decorator_name = "@collate"
+ error_type = error_task_collate
+ if len(orig_args) < 3:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+ self.set_action_type (_task.action_task_collate)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+
+ # how to transform input to output file name
+ file_names_transform = self.choose_file_names_transform (orig_args[1], error_task_collate, decorator_name, (regex, formatter))
+
+ orig_args = orig_args[2:]
+
+ #
+ # inputs also defined by pattern match
+ #
+ extra_inputs, replace_inputs, output_pattern, extra_params = self.get_extra_inputs_outputs_extra (orig_args, error_type, decorator_name)
+
+ self.single_multi_io = self.many_to_many
+
+ self.param_generator_func = collate_param_factory ( input_files_task_globs,
+ False, # flatten input
+ file_names_transform,
+ extra_inputs,
+ replace_inputs,
+ output_pattern,
+ *extra_params)
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_merge
+
+ #_________________________________________________________________________________________
+ def task_merge (self, orig_args):
+ """
+ Merges multiple input files into a single output.
+ """
+ #
+ # check enough arguments
+ #
+ if len(orig_args) < 2:
+ raise error_task_merge(self, "Too few arguments for @merge")
+
+ self.set_action_type (_task.action_task_merge)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+ extra_params = orig_args[1:]
+ self.param_generator_func = merge_param_factory (input_files_task_globs,
+ *extra_params)
+
+
+# self._single_job_single_output = self.multiple_jobs_outputs
+ self._single_job_single_output = self.single_job_single_output
+ self.single_multi_io = self.many_to_one
+
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+
+ #_________________________________________________________________________________________
+
+ # task_parallel
+
+ #_________________________________________________________________________________________
+ def task_parallel (self, orig_args):
+ """
+ calls user function in parallel
+ with either each of a list of parameters
+ or using parameters generated by a custom function
+ """
+ self.set_action_type (_task.action_parallel)
+
+ # unmodified from __init__
+ #
+ # self.needs_update_func = None
+ # self.job_wrapper = job_wrapper_generic
+ # self.job_descriptor = io_files_job_descriptor
+
+ if len(orig_args) == 0:
+ raise error_task_parallel(self, "Too few arguments for @parallel")
+
+ # Use parameters generated by a custom function
+ if len(orig_args) == 1 and isinstance(orig_args[0], collections.Callable):
+ #if len(orig_args) == 1 and type(orig_args[0]) == types.FunctionType:
+ self.param_generator_func = args_param_factory(orig_args[0]())
+
+ # list of params
+ else:
+ if len(orig_args) > 1:
+ # single jobs
+ params = copy.copy([orig_args])
+ self._single_job_single_output = self.single_job_single_output
+ else:
+ # multiple jobs with input/output parameters etc.
+ params = copy.copy(orig_args[0])
+ check_parallel_parameters (self, params, error_task_parallel)
+
+ self.param_generator_func = args_param_factory (params)
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_files
+
+ #_________________________________________________________________________________________
+ def task_files (self, orig_args):
+ """
+ calls user function in parallel
+ with either each of a list of parameters
+ or using parameters generated by a custom function
+
+ In the parameter list,
+ The first two items of each set of parameters must
+ be input/output files or lists of files or Null
+ """
+
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+
+ if len(orig_args) == 0:
+ raise error_task_files(self, "Too few arguments for @files")
+
+ # Use parameters generated by a custom function
+ if len(orig_args) == 1 and isinstance(orig_args[0], collections.Callable):
+ #if len(orig_args) == 1 and type(orig_args[0]) == types.FunctionType:
+
+ self.set_action_type (_task.action_task_files_func)
+ self.param_generator_func = files_custom_generator_param_factory(orig_args[0])
+
+ # assume
+ self.single_multi_io = self.many_to_many
+
+ # Use parameters in supplied list
+ else:
+ self.set_action_type (_task.action_task_files)
+
+ if len(orig_args) > 1:
+
+ # single jobs
+ # This is true even if the previous task has multiple output
+ # These will all be joined together at the hip (like @merge)
+ # If you want different behavior, use @transform
+ params = copy.copy([orig_args])
+ self._single_job_single_output = self.single_job_single_output
+ self.single_multi_io = self.one_to_one
+
+
+ else:
+
+ # multiple jobs with input/output parameters etc.
+ params = copy.copy(orig_args[0])
+ self._single_job_single_output = self.multiple_jobs_outputs
+ self.single_multi_io = self.many_to_many
+
+ check_files_io_parameters (self, params, error_task_files)
+
+ #
+ # get list of function/function names and globs for all job params
+ #
+
+ #
+ # replace function / function names with tasks
+ #
+ input_patterns = [j[0] for j in params]
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(input_patterns)
+
+
+ #
+ # extra params
+ #
+ output_extra_params = [tuple(j[1:]) for j in params]
+
+ self.param_generator_func = files_param_factory (input_files_task_globs,
+ False, # flatten input
+ True, # do_not_expand_single_job_tasks
+ output_extra_params)
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_files_re
+
+ #_________________________________________________________________________________________
+ def task_files_re (self, old_args):
+ """
+ calls user function in parallel
+ with input_files, output_files, parameters
+ These needed to be generated on the fly by
+ getting all file names in the supplied list/glob pattern
+ There are two variations:
+
+ 1) inputfiles = all files in glob which match the regular expression
+ outputfile = generated from the replacement string
+
+ 2) inputfiles = all files in glob which match the regular expression and
+ generated from the "from" replacement string
+ outputfiles = all files in glob which match the regular expression and
+ generated from the "to" replacement string
+ """
+ #
+ # check enough arguments
+ #
+ if len(old_args) < 3:
+ raise error_task_files_re(self, "Too few arguments for @files_re")
+
+ self.set_action_type (_task.action_task_files_re)
+
+ # check if parameters wrapped in combine
+ combining_all_jobs, orig_args = is_file_re_combining(old_args)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+ file_names_transform = t_regex_file_names_transform(self, regex(orig_args[1]), error_task_files_re, "@files_re")
+
+
+ # if the input file term is missing, just use the original
+ if len(orig_args) == 3:
+ extra_input_files_task_globs = None
+ output_and_extras = [orig_args[2]]
+ else:
+ extra_input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[2])
+ output_and_extras = orig_args[3:]
+
+
+ if combining_all_jobs:
+ self.single_multi_io = self.many_to_many
+ self.param_generator_func = collate_param_factory (input_files_task_globs,
+ False, # flatten
+ file_names_transform,
+ extra_input_files_task_globs,
+ t_extra_inputs.REPLACE_INPUTS,
+ *output_and_extras)
+ else:
+
+ self.single_multi_io = self.many_to_many
+ self.param_generator_func = transform_param_factory (input_files_task_globs,
+ False, # flatten
+ file_names_transform,
+ extra_input_files_task_globs,
+ t_extra_inputs.REPLACE_INPUTS,
+ *output_and_extras)
+
+
+ self.needs_update_func = self.needs_update_func or needs_update_check_modify_time
+ self.job_wrapper = job_wrapper_io_files
+ self.job_descriptor = io_files_job_descriptor
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_mkdir
+
+ # only called within task_follows
+
+ #_________________________________________________________________________________________
+ def task_mkdir (self, orig_args):
+ self.cnt_task_mkdir += 1
+ # give unique name to this instance of mkdir
+ unique_name = r"(mkdir %d) before " % self.cnt_task_mkdir + self._name
+ new_node = _task(self._module_name, unique_name)
+ self.add_child(new_node)
+ new_node.do_task_mkdir(orig_args)
+ new_node.display_name = new_node._description
+
+
+ def do_task_mkdir (self, orig_args):
+ """
+ list of directory names or a single argument which is aa list of directory names
+ Creates directory if missing
+ """
+ decorator_name = "mkdir"
+ error_type = error_task_mkdir
+
+ # jump through hoops
+ self.set_action_type (_task.action_mkdir)
+ self.needs_update_func = self.needs_update_func or needs_update_check_directory_missing
+ # don't shorten in description: full path
+ self._description = "Make directories %s" % (shorten_filenames_encoder(orig_args, 0))
+ self.job_wrapper = job_wrapper_mkdir
+ self.job_descriptor = mkdir_job_descriptor
+
+ # doesn't have a real function
+ # use job_wrapper just so it is not None
+ self.user_defined_work_func = self.job_wrapper
+
+
+ #
+ # @transform like behaviour with regex / suffix or formatter
+ #
+ if len(orig_args) > 1 and isinstance(orig_args[1], (formatter, suffix, regex)):
+ self.single_multi_io = self.many_to_many
+
+ if len(orig_args) < 3:
+ raise error_type(self, "Too few arguments for %s" % decorator_name)
+
+ #
+ # replace function / function names with tasks
+ #
+ input_files_task_globs = self.handle_tasks_globs_in_inputs(orig_args[0])
+
+
+ # how to transform input to output file name
+ file_names_transform = self.choose_file_names_transform (orig_args[1], error_task_transform, decorator_name)
+
+ orig_args = orig_args[2:]
+
+ #
+ # inputs can also be defined by pattern match
+ #
+ extra_inputs, replace_inputs, output_pattern, extra_params = self.get_extra_inputs_outputs_extra (orig_args, error_type, decorator_name)
+
+ if len(extra_params):
+ raise error_type(self, "Too many arguments for %s" % decorator_name)
+
+
+ self.param_generator_func = transform_param_factory ( input_files_task_globs,
+ False, # flatten input
+ file_names_transform,
+ extra_inputs,
+ replace_inputs,
+ output_pattern,
+ *extra_params)
+
+ #
+ # simple behaviour: just make directories in list of strings
+ #
+ # the mkdir decorator accepts one string, multiple strings or a list of strings
+ else:
+ self.single_multi_io = self.one_to_one
+
+ #
+ #
+ #
+ # if a single argument collection of parameters, keep that as is
+ if len(orig_args) == 0:
+ mkdir_params = []
+ elif len(orig_args) > 1:
+ mkdir_params = orig_args
+ # len(orig_args) == 1: unpack orig_args[0]
+ elif non_str_sequence (orig_args[0]):
+ mkdir_params = orig_args[0]
+ # single string or other non collection types
+ else:
+ mkdir_params = orig_args
+
+ # all directories created in one job to reduce race conditions
+ # so we are converting [a,b,c] into [ [(a, b,c)] ]
+ # where orig_args = (a,b,c)
+ # i.e. one job whose solitory argument is a tuple/list of directory names
+ self.param_generator_func = args_param_factory([[sorted(mkdir_params)]])
+
+
+
+
+
+
+
+
+ #8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ # Other task handlers
+
+
+
+ #8888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_follows
+
+ #_________________________________________________________________________________________
+ def task_follows (self, args):
+ """
+ Saved decorator arguments should be:
+ (string/task,...)
+ """
+ new_tasks = []
+ for arg in args:
+ #
+ # specified by string: unicode or otherwise
+ #
+ if isinstance(arg, path_str_type):
+ # string looks up to defined task, use that
+ if node.is_node(arg):
+ arg = node.lookup_node_from_name(arg)
+ # string looks up to defined task in main module, use that
+ elif node.is_node("__main__." + arg):
+ arg = node.lookup_node_from_name("__main__." + arg)
+
+ #
+ # string does not look up to defined task: defer
+ #
+ else:
+ # no module: use same module as current task
+ names = arg.rsplit(".", 2)
+ if len(names) == 1:
+ arg = _task(self._module_name, arg)
+ else:
+ arg = _task(*names)
+
+ #
+ # add dependency
+ # duplicate dependencies are ignore automatically
+ #
+ self.add_child(arg)
+ new_tasks.append(arg)
+
+
+ #
+ # for mkdir, automatically generate task with unique name
+ #
+ elif isinstance(arg, mkdir):
+ self.cnt_task_mkdir += 1
+ # give unique name to this instance of mkdir
+ unique_name = r"(mkdir %d) before " % self.cnt_task_mkdir + self._name
+ new_node = _task(self._module_name, unique_name)
+ self.add_child(new_node)
+ new_node.do_task_mkdir(arg.args)
+ new_node.display_name = new_node._description
+ new_tasks.append(new_node)
+
+
+
+
+ #
+ # Is this a function?
+ # Turn this function into a task
+ # (add task as attribute of this function)
+ # Add self as dependent
+ else:
+ #if type(arg) != types.FunctionType:
+ if not isinstance(arg, collections.Callable):
+
+ raise error_decorator_args("Dependencies must be functions or function names in " +
+ "@task_follows %s:\n[%s]" %
+ (self._name, str(arg)))
+
+ # add task as attribute of this function
+ if not hasattr(arg, "pipeline_task"):
+ arg.pipeline_task = _task.create_task(arg)
+ self.add_child(arg.pipeline_task)
+ new_tasks.append(arg.pipeline_task)
+
+ return new_tasks
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_check_if_uptodate
+
+ #_________________________________________________________________________________________
+ def task_check_if_uptodate (self, args):
+ """
+ Saved decorator arguments should be:
+ a function which takes the appropriate number of arguments for each job
+ """
+ if len(args) != 1 or not isinstance(args[0], collections.Callable):
+ #if len(args) != 1 or type(args[0]) != types.FunctionType:
+ raise error_decorator_args("Expecting a single function in " +
+ "@task_check_if_uptodate %s:\n[%s]" %
+ (self._name, str(args)))
+ self.needs_update_func = args[0]
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_posttask
+
+ #_________________________________________________________________________________________
+ def task_posttask(self, args):
+ """
+ Saved decorator arguments should be:
+ one or more functions which will be called if the task completes
+ """
+ for arg in args:
+ if isinstance(arg, touch_file):
+ self.posttask_functions.append(touch_file_factory (arg.args, register_cleanup))
+ elif isinstance(arg, collections.Callable):
+ #elif type(arg) == types.FunctionType:
+ self.posttask_functions.append(arg)
+ else:
+ raise PostTaskArgumentError("Expecting simple functions or touch_file in " +
+ "@posttask(...)\n Task = %s" %
+ (self._name))
+
+ #_________________________________________________________________________________________
+
+ # task_jobs_limit
+
+ #_________________________________________________________________________________________
+ def task_jobs_limit(self, args):
+ """
+ Limit the number of concurrent jobs
+ """
+ maximum_jobs, name = (args + (None,))[0:2]
+ try:
+ maximum_jobs_num = int(maximum_jobs)
+ assert(maximum_jobs_num >= 1)
+ except:
+ limit_name = ", " + name if name else ""
+ raise JobsLimitArgumentError(('In @jobs_limit(%s%s), the limit '
+ 'must be an integer number greater than or '
+ 'equal to 1') %
+ (maximum_jobs_num, limit_name))
+ if name != None:
+ self.semaphore_name = name
+ if self.semaphore_name in self.job_limit_semaphores:
+ curr_maximum_jobs = self.job_limit_semaphores[self.semaphore_name]
+ if curr_maximum_jobs != maximum_jobs_num:
+ raise JobsLimitArgumentError(('@jobs_limit(%d, "%s") cannot ' +
+ 're-defined with a different limit of %d') %
+ (self.semaphore_name, curr_maximum_jobs,
+ maximum_jobs_num))
+ else:
+ #
+ # save semaphore and limit
+ #
+ self.job_limit_semaphores[self.semaphore_name] = maximum_jobs_num
+
+
+ #_________________________________________________________________________________________
+
+ # task_active_if
+
+ #_________________________________________________________________________________________
+ def task_active_if (self, active_if_checks):
+ """
+ If any of active_checks is False or returns False, then the task is
+ marked as "inactive" and its outputs removed.
+ """
+ #print 'job is active:', active_checks, [
+ # arg() if isinstance(arg, collections.Callable) else arg
+ # for arg in active_checks]
+ if self.active_if_checks == None:
+ self.active_if_checks = []
+ self.active_if_checks.extend(active_if_checks)
+ #print(self.active_if_checks)
+
+
+
+ #_________________________________________________________________________________________
+
+ # task_graphviz
+
+ #_________________________________________________________________________________________
+ def task_graphviz(self, *unnamed_args, **named_args):
+ self.graphviz_attributes=named_args
+
+
+class task_encoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, set):
+ return list(obj)
+ if isinstance(obj, defaultdict):
+ return dict(obj)
+ if isinstance(obj, _task):
+ return obj._name #, _task.action_names[obj.action_task], obj._description]
+ return json.JSONEncoder.default(self, obj)
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#_________________________________________________________________________________________
+
+# link_task_names_to_functions
+
+#_________________________________________________________________________________________
+def link_task_names_to_functions ():
+ """
+ Make sure all tasks in dependency list are linked to real functions
+ Call this before running anything else
+ """
+
+ for n in node._all_nodes:
+ if n.user_defined_work_func == None:
+ dependent_display_task_name = n._inward[0].get_task_name()
+ if n._module_name in sys.modules:
+ module = sys.modules[n._module_name]
+ if hasattr(module, n._func_name):
+ n.user_defined_work_func = getattr(module, n._func_name)
+ else:
+ raise error_decorator_args(("Module '%s' has no function '%s' in " +
+ "\n at task_follows('%s')\ndef %s...") %
+ (n._module_name, n._func_name, n.get_task_name(), dependent_display_task_name))
+ else:
+ raise error_decorator_args("Module '%s' not found in " +
+ "\n at task_follows('%s')\ndef %s..." %
+ (n._module_name, n.get_task_name(), dependent_display_task_name))
+
+
+ #
+ # some jobs single state status mirrors parent's state
+ # and parent task not known until know
+ #
+ if isinstance(n._single_job_single_output, _task):
+ n._single_job_single_output = n._single_job_single_output._single_job_single_output
+
+#_________________________________________________________________________________________
+
+# update_checksum_level_on_tasks
+
+#_________________________________________________________________________________________
+def update_checksum_level_on_tasks (checksum_level):
+ """Reset the checksum level for all tasks"""
+ for n in node._all_nodes:
+ n.checksum_level = checksum_level
+
+
+#_________________________________________________________________________________________
+
+# update_active_states_for_all_tasks
+
+#_________________________________________________________________________________________
+def update_active_states_for_all_tasks ():
+ """
+
+ @active_if decorated tasks can change their active state every time
+ pipeline_run / pipeline_printout / pipeline_printout_graph is called
+
+ update_active_states_for_all_tasks ()
+
+ """
+ for n in node._all_nodes:
+ n.update_active_state()
+
+#_________________________________________________________________________________________
+
+# task_names_to_tasks
+
+#_________________________________________________________________________________________
+def task_names_to_tasks (task_description, task_names):
+ """
+ Given a list of task names, look up the corresponding tasks
+ Will just pass through if the task_name is already a task
+ """
+
+ #
+ # In case we are given a single item instead of a list
+ # accepts unicode
+ #
+ if isinstance(task_names, path_str_type) or isinstance(task_names, collections.Callable):
+ #if isinstance(task_names, basestring) or type(task_names) == types.FunctionType:
+ task_names = [task_names]
+
+ task_nodes = []
+ for task_name in task_names:
+
+ # Is this already a function, don't do mapping if already is task
+ if isinstance(task_name, collections.Callable):
+ #if type(task_name) == types.FunctionType:
+ if hasattr(task_name, "pipeline_task"):
+ task_nodes.append(task_name.pipeline_task)
+ continue
+ else:
+ # blow up for unwrapped function
+ raise error_function_is_not_a_task(("Function def %s(...): is not a pipelined task in ruffus." %
+ task_name.__name__) +
+ " To include this, this function needs to have a ruffus "+
+ "decoration like '@parallel', '@files', or named as a dependent "+
+ "of some other Ruffus task function via '@follows'.")
+
+ # assumes is some kind of string
+ if not node.is_node(task_name):
+ if node.is_node("__main__." + task_name):
+ task_nodes.append(node.lookup_node_from_name("__main__." + task_name))
+ else:
+ raise error_node_not_task("%s task '%s' is not a pipelined task in Ruffus. Have you mis-spelt the function name?" % (
+ task_description, task_name))
+ else:
+ task_nodes.append(node.lookup_node_from_name(task_name))
+ return task_nodes
+
+
+
+
+#_________________________________________________________________________________________
+
+# pipeline_printout_in_dot_format
+
+#_________________________________________________________________________________________
+def pipeline_printout_graph (stream,
+ output_format = None,
+ target_tasks = [],
+ forcedtorun_tasks = [],
+ draw_vertically = True,
+ ignore_upstream_of_target = False,
+ skip_uptodate_tasks = False,
+ gnu_make_maximal_rebuild_mode = True,
+ test_all_task_for_update = True,
+ no_key_legend = False,
+ minimal_key_legend = True,
+ user_colour_scheme = None,
+ pipeline_name = "Pipeline:",
+ size = (11,8),
+ dpi = 120,
+ runtime_data = None,
+ checksum_level = None,
+ history_file = None):
+ # Remember to add further extra parameters here to "extra_pipeline_printout_graph_options" inside cmdline.py
+ # This will forward extra parameters from the command line to pipeline_printout_graph
+ """
+ print out pipeline dependencies in various formats
+
+ :param stream: where to print to
+ :type stream: file-like object with ``write()`` function
+ :param output_format: ["dot", "jpg", "svg", "ps", "png"]. All but the first depends on the `dot <http://www.graphviz.org>`_ program.
+ :param target_tasks: targets task functions which will be run if they are out-of-date.
+ :param forcedtorun_tasks: task functions which will be run whether or not they are out-of-date.
+ :param draw_vertically: Top to bottom instead of left to right.
+ :param ignore_upstream_of_target: Don't draw upstream tasks of targets.
+ :param skip_uptodate_tasks: Don't draw up-to-date tasks if possible.
+ :param gnu_make_maximal_rebuild_mode: Defaults to re-running *all* out-of-date tasks. Runs minimal
+ set to build targets if set to ``True``. Use with caution.
+ :param test_all_task_for_update: Ask all task functions if they are up-to-date.
+ :param no_key_legend: Don't draw key/legend for graph.
+ :param minimal_key_legend: Only add entries to the legend for task types which appear
+ :param user_colour_scheme: Dictionary specifying colour scheme for flowchart
+ :param pipeline_name: Pipeline Title
+ :param size: tuple of x and y dimensions
+ :param dpi: print resolution
+ :param runtime_data: Experimental feature for passing data to tasks at run time
+ :param history_file: The database file which stores checksums and file timestamps for input/output files.
+ :param checksum_level: Several options for checking up-to-dateness are available: Default is level 1.
+ level 0 : Use only file timestamps
+ level 1 : above, plus timestamp of successful job completion
+ level 2 : above, plus a checksum of the pipeline function body
+ level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+ """
+
+ # EXTRA pipeline_run DEBUGGING
+ global EXTRA_PIPELINERUN_DEBUGGING
+ EXTRA_PIPELINERUN_DEBUGGING = False
+
+ if checksum_level is None:
+ checksum_level = get_default_checksum_level()
+
+ link_task_names_to_functions ()
+ update_checksum_level_on_tasks (checksum_level)
+
+ #
+ # @active_if decorated tasks can change their active state every time
+ # pipeline_run / pipeline_printout / pipeline_printout_graph is called
+ #
+ update_active_states_for_all_tasks ()
+
+ #
+ # run time data
+ #
+ if runtime_data == None:
+ runtime_data = {}
+ if not isinstance(runtime_data, dict):
+ raise Exception("pipeline_run parameter runtime_data should be a dictionary of "
+ "values passes to jobs at run time.")
+
+ #
+ # If we aren't using checksums, and history file hasn't been specified,
+ # we might be a bit surprised to find Ruffus writing to a sqlite db anyway.
+ # Let us just use a in memory db which will be thrown away
+ # Of course, if history_file is specified, we presume you know what you are doing
+ #
+ if checksum_level == CHECKSUM_FILE_TIMESTAMPS and history_file == None:
+ history_file = ':memory:'
+
+ #
+ # load previous job history if it exists, otherwise create an empty history
+ #
+ job_history = open_job_history (history_file)
+
+ #
+ # target jobs
+ #
+ if target_tasks == None:
+ target_tasks = []
+ if forcedtorun_tasks == None:
+ forcedtorun_tasks = []
+ target_tasks = task_names_to_tasks ("Target", target_tasks)
+ forcedtorun_tasks = task_names_to_tasks ("Forced to run", forcedtorun_tasks)
+
+
+ (topological_sorted, ignore_param1, ignore_param2,
+ ignore_param3) = topologically_sorted_nodes(target_tasks, forcedtorun_tasks,
+ gnu_make_maximal_rebuild_mode,
+ extra_data_for_signal = [t_verbose_logger(0, 0, None, runtime_data), job_history])
+ if not len(target_tasks):
+ target_tasks = topological_sorted[-1:]
+
+
+
+ # open file if (unicode?) string
+ if isinstance(stream, path_str_type):
+ stream = open(stream, "w")
+
+ # derive format automatically from name
+ if output_format == None:
+ output_format = os.path.splitext(stream.name)[1].lstrip(".")
+
+
+
+ graph_printout ( stream,
+ output_format,
+ target_tasks,
+ forcedtorun_tasks,
+ draw_vertically,
+ ignore_upstream_of_target,
+ skip_uptodate_tasks,
+ gnu_make_maximal_rebuild_mode,
+ test_all_task_for_update,
+ no_key_legend,
+ minimal_key_legend,
+ user_colour_scheme,
+ pipeline_name,
+ size,
+ dpi,
+ extra_data_for_signal = [t_verbose_logger(0, 0, None, runtime_data), job_history])
+
+
+#_________________________________________________________________________________________
+
+# get_completed_task_strings
+
+#_________________________________________________________________________________________
+def get_completed_task_strings (incomplete_tasks, all_tasks, forcedtorun_tasks, verbose, verbose_abbreviated_path, indent, runtime_data, job_history):
+ """
+ Printout list of completed tasks
+ """
+ completed_task_strings = []
+ if len(all_tasks) > len(incomplete_tasks):
+ completed_task_strings.append("")
+ completed_task_strings.append("_" * 40)
+ completed_task_strings.append("Tasks which are up-to-date:")
+ completed_task_strings.append("")
+ completed_task_strings.append("")
+ set_of_incomplete_tasks = set(incomplete_tasks)
+
+ for t in all_tasks:
+ # Only print Up to date tasks
+ if t in set_of_incomplete_tasks:
+ continue
+ # LOGGER
+ completed_task_strings.extend(t.printout(runtime_data, t in forcedtorun_tasks, job_history, False, verbose, verbose_abbreviated_path, indent))
+
+ completed_task_strings.append("_" * 40)
+ completed_task_strings.append("")
+ completed_task_strings.append("")
+
+ return completed_task_strings
+
+#_________________________________________________________________________________________
+
+# pipeline_printout
+
+#_________________________________________________________________________________________
+def pipeline_printout( output_stream = None,
+ target_tasks = [],
+ forcedtorun_tasks = [],
+ # verbose defaults to 4 if None
+ verbose = None,
+ indent = 4,
+ gnu_make_maximal_rebuild_mode = True,
+ wrap_width = 100,
+ runtime_data = None,
+ checksum_level = None,
+ history_file = None,
+ verbose_abbreviated_path = None):
+ # Remember to add further extra parameters here to "extra_pipeline_printout_options" inside cmdline.py
+ # This will forward extra parameters from the command line to pipeline_printout
+ """
+ Printouts the parts of the pipeline which will be run
+
+ Because the parameters of some jobs depend on the results of previous tasks, this function
+ produces only the current snap-shot of task jobs. In particular, tasks which generate
+ variable number of inputs into following tasks will not produce the full range of jobs.
+
+ ::
+ verbose = 0 : Nothing
+ verbose = 1 : Out-of-date Task names
+ verbose = 2 : All Tasks (including any task function docstrings)
+ verbose = 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+ verbose = 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+ verbose = 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+ verbose = 6 : All jobs in All Tasks whether out of date or not
+
+ :param output_stream: where to print to
+ :type output_stream: file-like object with ``write()`` function
+ :param target_tasks: targets task functions which will be run if they are out-of-date
+ :param forcedtorun_tasks: task functions which will be run whether or not they are out-of-date
+ :param verbose: level 0 : nothing
+ level 1 : Out-of-date Task names
+ level 2 : All Tasks (including any task function docstrings)
+ level 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+ level 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+ level 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+ level 6 : All jobs in All Tasks whether out of date or not
+ level 10: logs messages useful only for debugging ruffus pipeline code
+ :param indent: How much indentation for pretty format.
+ :param gnu_make_maximal_rebuild_mode: Defaults to re-running *all* out-of-date tasks. Runs minimal
+ set to build targets if set to ``True``. Use with caution.
+ :param wrap_width: The maximum length of each line
+ :param runtime_data: Experimental feature for passing data to tasks at run time
+ :param checksum_level: Several options for checking up-to-dateness are available: Default is level 1.
+ level 0 : Use only file timestamps
+ level 1 : As above, plus timestamp of successful job completion
+ level 2 : As above, plus a checksum of the pipeline function body
+ level 3 : As above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+ :param history_file: The database file which stores checksums and file timestamps for input/output files.
+ :param verbose_abbreviated_path: whether input and output paths are abbreviated.
+ level 0: The full (expanded, abspath) input or output path
+ level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with ``[,,,]/``
+ level < 0: level < 0: Input / Output parameters are truncated to ``MMM`` letters where ``verbose_abbreviated_path ==-MMM``. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by ``<???>``
+ """
+ # do nothing!
+ if verbose == 0:
+ return
+
+ #
+ # default values
+ #
+ if verbose_abbreviated_path == None:
+ verbose_abbreviated_path = 2
+ if verbose == None:
+ verbose = 4
+
+ # EXTRA pipeline_run DEBUGGING
+ global EXTRA_PIPELINERUN_DEBUGGING
+ EXTRA_PIPELINERUN_DEBUGGING = False
+
+ if output_stream == None:
+ import sys
+ output_stream = sys.stdout
+
+
+ if not hasattr(output_stream, "write"):
+ raise Exception("The first parameter to pipeline_printout needs to be an output file, e.g. sys.stdout and not %s" % str(output_stream))
+
+ if runtime_data == None:
+ runtime_data = {}
+ if not isinstance(runtime_data, dict):
+ raise Exception("pipeline_run parameter runtime_data should be a dictionary of "
+ "values passes to jobs at run time.")
+
+ if checksum_level is None:
+ checksum_level = get_default_checksum_level()
+
+ link_task_names_to_functions ()
+ update_checksum_level_on_tasks(checksum_level)
+
+ #
+ # @active_if decorated tasks can change their active state every time
+ # pipeline_run / pipeline_printout / pipeline_printout_graph is called
+ #
+ update_active_states_for_all_tasks ()
+
+ #
+ # target jobs
+ #
+ target_tasks = task_names_to_tasks ("Target", target_tasks)
+ forcedtorun_tasks = task_names_to_tasks ("Forced to run", forcedtorun_tasks)
+
+ logging_strm = t_verbose_logger(verbose, verbose_abbreviated_path, t_stream_logger(output_stream), runtime_data)
+
+ #
+ # If we aren't using checksums, and history file hasn't been specified,
+ # we might be a bit surprised to find Ruffus writing to a sqlite db anyway.
+ # Let us just use a in memory db which will be thrown away
+ # Of course, if history_file is specified, we presume you know what you are doing
+ #
+ if checksum_level == CHECKSUM_FILE_TIMESTAMPS and history_file == None:
+ history_file = ':memory:'
+
+ #
+ # load previous job history if it exists, otherwise create an empty history
+ #
+ job_history = open_job_history (history_file)
+
+ (incomplete_tasks,
+ self_terminated_nodes,
+ dag_violating_edges,
+ dag_violating_nodes) = topologically_sorted_nodes(target_tasks, forcedtorun_tasks,
+ gnu_make_maximal_rebuild_mode,
+ extra_data_for_signal = [t_verbose_logger(0, 0, None, runtime_data), job_history])
+
+
+ #
+ # raise error if DAG violating nodes
+ #
+ if len(dag_violating_nodes):
+ dag_violating_tasks = ", ".join(t._name for t in dag_violating_nodes)
+
+ e = error_circular_dependencies("Circular dependencies found in the "
+ "pipeline involving one or more of (%s)" %
+ (dag_violating_tasks))
+ raise e
+
+ wrap_indent = " " * (indent + 11)
+
+ #
+ # Get updated nodes as all_nodes - nodes_to_run
+ #
+ # LOGGER level 6 : All jobs in All Tasks whether out of date or not
+ if verbose == 2 or verbose >= 5:
+ (all_tasks, ignore_param1, ignore_param2,
+ ignore_param3) = topologically_sorted_nodes(target_tasks, True,
+ gnu_make_maximal_rebuild_mode,
+ extra_data_for_signal = [t_verbose_logger(0, 0, None, runtime_data), job_history])
+ for m in get_completed_task_strings (incomplete_tasks, all_tasks, forcedtorun_tasks, verbose, verbose_abbreviated_path, indent, runtime_data, job_history):
+ output_stream.write(textwrap.fill(m, subsequent_indent = wrap_indent, width = wrap_width) + "\n")
+
+ output_stream.write("\n" + "_" * 40 + "\nTasks which will be run:\n\n")
+ for t in incomplete_tasks:
+ # LOGGER
+ messages = t.printout(runtime_data, t in forcedtorun_tasks, job_history, True, verbose, verbose_abbreviated_path, indent)
+ for m in messages:
+ output_stream.write(textwrap.fill(m, subsequent_indent = wrap_indent, width = wrap_width) + "\n")
+
+ if verbose:
+ # LOGGER
+ output_stream.write("_" * 40 + "\n")
+
+
+#_________________________________________________________________________________________
+
+# get_semaphore
+
+#_________________________________________________________________________________________
+def get_semaphore (t, job_limit_semaphores, syncmanager):
+ """
+ return semaphore to limit the number of concurrent jobs
+ """
+ #
+ # Is this task limited in the number of jobs?
+ #
+ if t.semaphore_name not in t.job_limit_semaphores:
+ return None
+
+
+ #
+ # create semaphore if not yet created
+ #
+ if t.semaphore_name not in job_limit_semaphores:
+ maximum_jobs_num = t.job_limit_semaphores[t.semaphore_name]
+ job_limit_semaphores[t.semaphore_name] = syncmanager.BoundedSemaphore(maximum_jobs_num)
+ return job_limit_semaphores[t.semaphore_name]
+
+#_________________________________________________________________________________________
+#
+# Parameter generator for all jobs / tasks
+#
+#________________________________________________________________________________________
+def make_job_parameter_generator (incomplete_tasks, task_parents, logger, forcedtorun_tasks,
+ task_with_completed_job_q, runtime_data, verbose,
+ verbose_abbreviated_path,
+ syncmanager,
+ death_event,
+ touch_files_only, job_history):
+
+ inprogress_tasks = set()
+ job_limit_semaphores = dict()
+
+ def parameter_generator():
+ count_remaining_jobs = defaultdict(int)
+ log_at_level (logger, 10, verbose, " job_parameter_generator BEGIN")
+ while len(incomplete_tasks):
+ cnt_jobs_created_for_all_tasks = 0
+ cnt_tasks_processed = 0
+
+ #
+ # get rid of all completed tasks first
+ # Completion is signalled from pipeline_run
+ #
+ while True:
+ try:
+ item = task_with_completed_job_q.get_nowait()
+ job_completed_task, job_completed_task_name, job_completed_name = item
+
+
+ if not job_completed_task in incomplete_tasks:
+ raise Exception("Last job %s for %s. Missing from incomplete tasks in make_job_parameter_generator" % (job_completed_name, job_completed_task_name))
+ count_remaining_jobs[job_completed_task] = count_remaining_jobs[job_completed_task] - 1
+ #
+ # This is bad: something has gone very wrong
+ #
+ if count_remaining_jobs[t] < 0:
+ raise Exception("job %s for %s causes job count < 0." % (job_completed_name, job_completed_task_name))
+
+ #
+ # This Task completed
+ #
+ if count_remaining_jobs[job_completed_task] == 0:
+ log_at_level (logger, 10, verbose, " Last job for %s. Retired from incomplete tasks in pipeline_run " % job_completed_task._name)
+ incomplete_tasks.remove(job_completed_task)
+ job_completed_task.completed ()
+ # LOGGER: Out-of-date Tasks
+ log_at_level (logger, 1, verbose, "Completed Task = " + job_completed_task.get_task_name())
+
+ except queue.Empty:
+ break
+
+ for t in list(incomplete_tasks):
+ #
+ # wrap in execption handler so that we know which task exception
+ # came from
+ #
+ try:
+ log_at_level (logger, 10, verbose, " job_parameter_generator consider task = %s" % t._name)
+
+ # ignore tasks in progress
+ if t in inprogress_tasks:
+ continue
+ log_at_level (logger, 10, verbose, " job_parameter_generator task %s not in progress" % t._name)
+
+ # ignore tasks with incomplete dependencies
+ incomplete_parent = False
+ for parent in task_parents[t]:
+ if parent in incomplete_tasks:
+ incomplete_parent = True
+ break
+ if incomplete_parent:
+ continue
+
+ log_at_level (logger, 10, verbose, " job_parameter_generator start task %s (parents completed)" % t._name)
+ force_rerun = t in forcedtorun_tasks
+ #
+ # Only log active task
+ #
+ if t.is_active:
+ # LOGGER: Out-of-date Tasks
+ log_at_level (logger, 1, verbose, "Task enters queue = " + t.get_task_name() + (": Forced to rerun" if force_rerun else ""))
+ # LOGGER: logs All Tasks (including any task function docstrings)
+ # indent string
+ if len(t._description):
+ log_at_level (logger, 2, verbose, " " + t._description)
+ inprogress_tasks.add(t)
+ cnt_tasks_processed += 1
+
+
+ #
+ # Use output parameters actually generated by running task
+ #
+ t.output_filenames = []
+
+
+
+ #
+ # If no parameters: just call task function (empty list)
+ #
+ #if (t.active_if_checks != None):
+ # t.is_active = all(arg() if isinstance(arg, collections.Callable) else arg
+ # for arg in t.active_if_checks)
+ if not t.is_active:
+ parameters = []
+
+
+
+ #
+ # If no parameters: just call task function (empty list)
+ #
+ elif t.param_generator_func == None:
+ parameters = ([[], []],)
+ else:
+ parameters = t.param_generator_func(runtime_data)
+
+ #
+ # iterate through parameters
+ #
+ cnt_jobs_created = 0
+ for param, descriptive_param in parameters:
+
+ #
+ # save output even if uptodate
+ #
+ if len(param) >= 2:
+ t.output_filenames.append(param[1])
+
+ job_name = t.get_job_name(descriptive_param, verbose_abbreviated_path, runtime_data)
+
+ #
+ # don't run if up to date unless force to run
+ #
+ if force_rerun:
+ # LOGGER: Out-of-date Jobs in Out-of-date Tasks
+ log_at_level (logger, 3, verbose, " force task %s to rerun " % job_name)
+ else:
+ if not t.needs_update_func:
+ # LOGGER: Out-of-date Jobs in Out-of-date Tasks
+ log_at_level (logger, 3, verbose, " %s no function to check if up-to-date " % job_name)
+ else:
+ # extra clunky hack to also pass task info--
+ # makes sure that there haven't been code or arg changes
+ if t.needs_update_func == needs_update_check_modify_time:
+ needs_update, msg = t.needs_update_func (*param, task=t, job_history = job_history, verbose_abbreviated_path = verbose_abbreviated_path)
+ else:
+ needs_update, msg = t.needs_update_func (*param)
+
+ if not needs_update:
+ # LOGGER: All Jobs in Out-of-date Tasks
+ log_at_level (logger, 5, verbose, " %s unnecessary: already up to date " % job_name)
+ continue
+ else:
+ # LOGGER: Out-of-date Jobs in Out-of-date Tasks: Why out of date
+ if not log_at_level (logger, 4, verbose, " %s %s " % (job_name, msg)):
+
+ # LOGGER: Out-of-date Jobs in Out-of-date Tasks: No explanation
+ log_at_level (logger, 3, verbose, " %s" % (job_name))
+
+ #
+ # Clunky hack to make sure input files exists right before
+ # job is called for better error messages
+ #
+ if t.needs_update_func == needs_update_check_modify_time:
+ check_input_files_exist (*param)
+
+ # pause for one second before first job of each tasks
+ # @originate tasks do not need to pause, because they depend on nothing!
+ if cnt_jobs_created == 0 and touch_files_only < 2:
+ if "ONE_SECOND_PER_JOB" in runtime_data and runtime_data["ONE_SECOND_PER_JOB"] and t._action_type != _task.action_task_originate:
+ log_at_level (logger, 10, verbose, " 1 second PAUSE in job_parameter_generator\n\n\n")
+ time.sleep(1.01)
+ else:
+ time.sleep(0.1)
+
+
+ count_remaining_jobs[t] += 1
+ cnt_jobs_created += 1
+ cnt_jobs_created_for_all_tasks += 1
+ yield (param,
+ t._name,
+ job_name,
+ t.job_wrapper,
+ t.user_defined_work_func,
+ get_semaphore (t, job_limit_semaphores, syncmanager),
+ death_event,
+ touch_files_only)
+
+ # if no job came from this task, this task is complete
+ # we need to retire it here instead of normal completion at end of job tasks
+ # precisely because it created no jobs
+ if cnt_jobs_created == 0:
+ incomplete_tasks.remove(t)
+ t.completed ()
+ if not t.is_active:
+ log_at_level (logger, 2, verbose, "Inactive Task = " + t.get_task_name())
+ else:
+ log_at_level (logger, 2, verbose, "Uptodate Task = " + t.get_task_name())
+ # LOGGER: logs All Tasks (including any task function docstrings)
+ log_at_level (logger, 10, verbose, " No jobs created for %s. Retired in parameter_generator " % t._name)
+
+ #
+ # Add extra warning if no regular expressions match:
+ # This is a common class of frustrating errors
+ #
+ if (verbose >= 1 and "ruffus_WARNING" in runtime_data and
+ t.param_generator_func in runtime_data["ruffus_WARNING"]):
+ for msg in runtime_data["ruffus_WARNING"][t.param_generator_func]:
+ logger.warning(" 'In Task def %s(...):' %s " % (t.get_task_name(), msg))
+
+
+ #
+ # GeneratorExit is thrown when this generator does not complete.
+ # I.e. there is a break in the pipeline_run loop.
+ # This happens where there are exceptions signalled from within a job
+ #
+ # This is not really an exception, more a way to exit the generator loop
+ # asynchrononously so that cleanups can happen (e.g. the "with" statement
+ # or finally.)
+ #
+ # We could write except Exception: below which will catch everything but
+ # KeyboardInterrupt and StopIteration and GeneratorExit in python 2.6
+ #
+ # However, in python 2.5, GeneratorExit inherits from Exception. So
+ # we explicitly catch and rethrow GeneratorExit.
+ except GeneratorExit:
+ raise
+ except:
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ exception_stack = traceback.format_exc()
+ exception_name = exceptionType.__module__ + '.' + exceptionType.__name__
+ exception_value = str(exceptionValue)
+ if len(exception_value):
+ exception_value = "(%s)" % exception_value
+ errt = RethrownJobError([(t._name,
+ "",
+ exception_name,
+ exception_value,
+ exception_stack)])
+ errt.specify_task(t, "Exceptions generating parameters")
+ raise errt
+
+
+
+ # extra tests incase final tasks do not result in jobs
+ if len(incomplete_tasks) and (not cnt_tasks_processed or cnt_jobs_created_for_all_tasks):
+ log_at_level (logger, 10, verbose, " incomplete tasks = " +
+ ",".join([t._name for t in incomplete_tasks] ))
+ yield waiting_for_more_tasks_to_complete()
+
+ yield all_tasks_complete()
+ # This function is done
+ log_at_level (logger, 10, verbose, " job_parameter_generator END")
+
+ return parameter_generator
+
+
+
+#_________________________________________________________________________________________
+#
+# feed_job_params_to_process_pool
+#
+#
+#________________________________________________________________________________________
+def feed_job_params_to_process_pool_factory (parameter_q, death_event, logger, verbose):
+ """
+ Process pool gets its parameters from this generator
+ Use factory function to save parameter_queue
+ """
+ def feed_job_params_to_process_pool ():
+ log_at_level (logger, 10, verbose, " Send param to Pooled Process START")
+ while 1:
+ log_at_level (logger, 10, verbose, " Get next parameter size = %d" %
+ parameter_q.qsize())
+ if not parameter_q.qsize():
+ time.sleep(0.1)
+ param = parameter_q.get()
+ log_at_level (logger, 10, verbose, " Get next parameter done")
+
+ # all tasks done
+ if isinstance(param, all_tasks_complete):
+ break
+
+ if death_event.is_set():
+ death_event.clear()
+ break
+
+ log_at_level (logger, 10, verbose, " Send param to Pooled Process=>" + str(param[0]))
+ yield param
+
+ log_at_level (logger, 10, verbose, " Send param to Pooled Process END")
+
+ # return generator
+ return feed_job_params_to_process_pool
+
+#_________________________________________________________________________________________
+#
+# fill_queue_with_job_parameters
+#
+#________________________________________________________________________________________
+def fill_queue_with_job_parameters (job_parameters, parameter_q, POOL_SIZE, logger, verbose):
+ """
+ Ensures queue is filled with number of parameters > jobs / slots (POOL_SIZE)
+ """
+ log_at_level (logger, 10, verbose, " fill_queue_with_job_parameters START")
+ for param in job_parameters:
+
+ # stop if no more jobs available
+ if isinstance(param, waiting_for_more_tasks_to_complete):
+ log_at_level (logger, 10, verbose, " fill_queue_with_job_parameters WAITING for task to complete")
+ break
+
+ if not isinstance(param, all_tasks_complete):
+ log_at_level (logger, 10, verbose, " fill_queue_with_job_parameters=>" + str(param[0]))
+
+ # put into queue
+ parameter_q.put(param)
+
+ # queue size needs to be at least 2 so that the parameter queue never consists of a single
+ # waiting_for_task_to_complete entry which will cause
+ # a loop and everything to hang!
+ if parameter_q.qsize() > POOL_SIZE + 1:
+ break
+ log_at_level (logger, 10, verbose, " fill_queue_with_job_parameters END")
+
+
+
+
+#_________________________________________________________________________________________
+
+# pipeline_get_task_names
+
+#_________________________________________________________________________________________
+def pipeline_get_task_names ():
+ """
+ Get all task names in a pipeline
+ Not that does not check if pipeline is wired up properly
+ """
+
+ # EXTRA pipeline_run DEBUGGING
+ global EXTRA_PIPELINERUN_DEBUGGING
+ EXTRA_PIPELINERUN_DEBUGGING = False
+
+ #
+ # Make sure all tasks in dependency list are linked to real functions
+ #
+ link_task_names_to_functions ()
+
+ #
+ # Return task names for all nodes willy nilly
+ #
+
+
+ return [n.get_task_name() for n in node._all_nodes]
+
+
+#
+# How the job queue works:
+#
+# Main loop
+# iterates pool.map using feed_job_params_to_process_pool()
+# (calls parameter_q.get() until all_tasks_complete)
+#
+# if errors but want to finish tasks already in pipeine:
+# parameter_q.put(all_tasks_complete())
+# keep going
+# else:
+#
+# loops through jobs until no more jobs in non-dependent tasks
+# separate loop in generator so that list of incomplete_tasks does not
+# get updated half way through
+# causing race conditions
+#
+# parameter_q.put(param)
+# until waiting_for_more_tasks_to_complete
+# until queue is full (check *after*)
+#
+#_________________________________________________________________________________________
+
+# pipeline_run
+
+#_________________________________________________________________________________________
+def pipeline_run(target_tasks = [],
+ forcedtorun_tasks = [],
+ multiprocess = 1,
+ logger = stderr_logger,
+ gnu_make_maximal_rebuild_mode = True,
+ #verbose defaults to 1 if None
+ verbose = None,
+ runtime_data = None,
+ one_second_per_job = None,
+ touch_files_only = False,
+ exceptions_terminate_immediately = False,
+ log_exceptions = False,
+ checksum_level = None,
+ multithread = 0,
+ history_file = None,
+ # defaults to 2 if None
+ verbose_abbreviated_path = None):
+ # Remember to add further extra parameters here to "extra_pipeline_run_options" inside cmdline.py
+ # This will forward extra parameters from the command line to pipeline_run
+ """
+ Run pipelines.
+
+ :param target_tasks: targets task functions which will be run if they are out-of-date
+ :param forcedtorun_tasks: task functions which will be run whether or not they are out-of-date
+ :param multiprocess: The number of concurrent jobs running on different processes.
+ :param multithread: The number of concurrent jobs running as different threads. If > 1, ruffus will use multithreading *instead of* multiprocessing (and ignore the multiprocess parameter). Using multi threading is particularly useful to manage high performance clusters which otherwise are prone to "processor storms" when large number of cores finish jobs at the same time. (Thanks Andreas Heger)
+ :param logger: Where progress will be logged. Defaults to stderr output.
+ :type logger: `logging <http://docs.python.org/library/logging.html>`_ objects
+ :param verbose: level 0 : nothing
+ level 1 : Out-of-date Task names
+ level 2 : All Tasks (including any task function docstrings)
+ level 3 : Out-of-date Jobs in Out-of-date Tasks, no explanation
+ level 4 : Out-of-date Jobs in Out-of-date Tasks, with explanations and warnings
+ level 5 : All Jobs in Out-of-date Tasks, (include only list of up-to-date tasks)
+ level 6 : All jobs in All Tasks whether out of date or not
+ level 10: logs messages useful only for debugging ruffus pipeline code
+ :param touch_files_only: Create or update input/output files only to simulate running the pipeline. Do not run jobs. If set to CHECKSUM_REGENERATE, will regenerate the checksum history file to reflect the existing i/o files on disk.
+ :param exceptions_terminate_immediately: Exceptions cause immediate termination
+ rather than waiting for N jobs to finish where N = multiprocess
+ :param log_exceptions: Print exceptions to the logger as soon as they occur.
+ :param checksum_level: Several options for checking up-to-dateness are available: Default is level 1.
+ level 0 : Use only file timestamps
+ level 1 : above, plus timestamp of successful job completion
+ level 2 : above, plus a checksum of the pipeline function body
+ level 3 : above, plus a checksum of the pipeline function default arguments and the additional arguments passed in by task decorators
+ :param one_second_per_job: To work around poor file timepstamp resolution for some file systems. Defaults to True if checksum_level is 0 forcing Tasks to take a minimum of 1 second to complete.
+ :param runtime_data: Experimental feature for passing data to tasks at run time
+ :param gnu_make_maximal_rebuild_mode: Defaults to re-running *all* out-of-date tasks. Runs minimal
+ set to build targets if set to ``True``. Use with caution.
+ :param history_file: The database file which stores checksums and file timestamps for input/output files.
+ :param verbose_abbreviated_path: whether input and output paths are abbreviated.
+ level 0: The full (expanded, abspath) input or output path
+ level > 1: The number of subdirectories to include. Abbreviated paths are prefixed with ``[,,,]/``
+ level < 0: level < 0: Input / Output parameters are truncated to ``MMM`` letters where ``verbose_abbreviated_path ==-MMM``. Subdirectories are first removed to see if this allows the paths to fit in the specified limit. Otherwise abbreviated paths are prefixed by ``<???>``
+ """
+
+ #
+ # default values
+ #
+ if touch_files_only == False:
+ touch_files_only = 0
+ elif touch_files_only == True:
+ touch_files_only = 1
+ else:
+ touch_files_only = 2
+ # we are not running anything so do it as quickly as possible
+ one_second_per_job = False
+ if verbose == None:
+ verbose = 1
+ if verbose_abbreviated_path == None:
+ verbose_abbreviated_path = 2
+
+ # EXTRA pipeline_run DEBUGGING
+ global EXTRA_PIPELINERUN_DEBUGGING
+ if verbose >= 10:
+ EXTRA_PIPELINERUN_DEBUGGING = True
+ else:
+ EXTRA_PIPELINERUN_DEBUGGING = False
+
+
+ syncmanager = multiprocessing.Manager()
+
+ if runtime_data == None:
+ runtime_data = {}
+ if not isinstance(runtime_data, dict):
+ raise Exception("pipeline_run parameter runtime_data should be a dictionary of "
+ "values passes to jobs at run time.")
+
+
+ #
+ # whether using multiprocessing or multithreading
+ #
+ if multithread:
+ pool = ThreadPool(multithread)
+ parallelism = multithread
+ elif multiprocess > 1:
+ pool = Pool(multiprocess)
+ parallelism = multiprocess
+ else:
+ parallelism = 1
+ pool = None
+
+ if checksum_level is None:
+ checksum_level = get_default_checksum_level()
+
+ #
+ # Supplement mtime with system clock if using CHECKSUM_HISTORY_TIMESTAMPS
+ # we don't need to default to adding 1 second delays between jobs
+ #
+ if one_second_per_job == None:
+ if checksum_level == CHECKSUM_FILE_TIMESTAMPS:
+ log_at_level (logger, 10, verbose, " Checksums rely on FILE TIMESTAMPS only and we don't know if the system file time resolution: Pause 1 second...")
+ runtime_data["ONE_SECOND_PER_JOB"] = True
+ else:
+ log_at_level (logger, 10, verbose, " Checksum use calculated time as well: No 1 second pause...")
+ runtime_data["ONE_SECOND_PER_JOB"] = False
+ else:
+ log_at_level (logger, 10, verbose, " One second per job specified to be %s" % one_second_per_job)
+ runtime_data["ONE_SECOND_PER_JOB"] = one_second_per_job
+
+
+ if verbose == 0:
+ logger = black_hole_logger
+ elif verbose >= 11:
+ # debugging aid: See t_stderr_logger
+ # Each invocation of add_unique_prefix adds a unique prefix to all subsequent output
+ # So that individual runs of pipeline run are tagged
+ if hasattr(logger, "add_unique_prefix"):
+ logger.add_unique_prefix()
+
+
+ if touch_files_only and verbose >= 1:
+ logger.info("Touch output files instead of remaking them.")
+
+ link_task_names_to_functions ()
+ update_checksum_level_on_tasks (checksum_level)
+
+ #
+ # If we aren't using checksums, and history file hasn't been specified,
+ # we might be a bit surprised to find Ruffus writing to a sqlite db anyway.
+ # Let us just use a in-memory db which will be thrown away
+ # Of course, if history_file is specified, we presume you know what you are doing
+ #
+ if checksum_level == CHECKSUM_FILE_TIMESTAMPS and history_file == None:
+ history_file = ':memory:'
+
+ job_history = open_job_history (history_file)
+
+
+
+
+ #
+ # @active_if decorated tasks can change their active state every time
+ # pipeline_run / pipeline_printout / pipeline_printout_graph is called
+ #
+ update_active_states_for_all_tasks ()
+
+
+ #
+ # target jobs
+ #
+ target_tasks = task_names_to_tasks ("Target", target_tasks)
+ forcedtorun_tasks = task_names_to_tasks ("Forced to run", forcedtorun_tasks)
+
+
+ #
+ # To update the checksum file, we force all tasks to rerun but then don't actually call the task function...
+ #
+ # So starting with target_tasks and forcedtorun_tasks, we harvest all upstream dependencies willy, nilly
+ # and assign the results to forcedtorun_tasks
+ #
+ if touch_files_only == 2:
+ (forcedtorun_tasks, ignore_param1, ignore_param2,
+ ignore_param3) = topologically_sorted_nodes(target_tasks + forcedtorun_tasks, True,
+ gnu_make_maximal_rebuild_mode,
+ extra_data_for_signal = [t_verbose_logger(0, 0, None, runtime_data), job_history])
+
+
+
+ #
+ # If verbose >=10, for debugging:
+ # Prints which tasks trigger the pipeline rerun...
+ # i.e. start from the farthest task, prints out all the up to date tasks, and the first out of date task
+ #
+ (incomplete_tasks,
+ self_terminated_nodes,
+ dag_violating_edges,
+ dag_violating_nodes) = topologically_sorted_nodes( target_tasks, forcedtorun_tasks,
+ gnu_make_maximal_rebuild_mode,
+ extra_data_for_signal = [t_verbose_logger(verbose, verbose_abbreviated_path, logger, runtime_data), job_history])
+
+ if len(dag_violating_nodes):
+ dag_violating_tasks = ", ".join(t._name for t in dag_violating_nodes)
+
+ e = error_circular_dependencies("Circular dependencies found in the "
+ "pipeline involving one or more of (%s)" %
+ (dag_violating_tasks))
+ raise e
+
+
+
+ #
+ # get dependencies. Only include tasks which will be run
+ #
+ set_of_incomplete_tasks = set(incomplete_tasks)
+ task_parents = defaultdict(set)
+ for t in set_of_incomplete_tasks:
+ task_parents[t] = set()
+ for parent in t._outward:
+ if parent in set_of_incomplete_tasks:
+ task_parents[t].add(parent)
+
+
+ #
+ # Print Complete tasks
+ #
+ # LOGGER level 5 : All jobs in All Tasks whether out of date or not
+ if verbose == 2 or verbose >= 5:
+ (all_tasks, ignore_param1, ignore_param2,
+ ignore_param3) = topologically_sorted_nodes(target_tasks, True,
+ gnu_make_maximal_rebuild_mode,
+ extra_data_for_signal = [t_verbose_logger(0, 0, None, runtime_data), job_history])
+ # indent hardcoded to 4
+ for m in get_completed_task_strings (incomplete_tasks, all_tasks, forcedtorun_tasks, verbose, verbose_abbreviated_path, 4, runtime_data, job_history):
+ logger.info(m)
+
+
+ #print json.dumps(task_parents.items(), indent=4, cls=task_encoder)
+ logger.info("")
+ logger.info("_" * 40)
+ logger.info("Tasks which will be run:")
+ logger.info("")
+ logger.info("")
+
+
+
+ # prepare tasks for pipeline run:
+ #
+ # clear task outputs
+ # task.output_filenames = None
+ #
+ # **********
+ # BEWARE
+ # **********
+ #
+ # Because state is stored, ruffus is *not* reentrant.
+ #
+ # **********
+ # BEWARE
+ # **********
+ for t in incomplete_tasks:
+ t.init_for_pipeline()
+
+
+ #
+ # prime queue with initial set of job parameters
+ #
+ death_event = syncmanager.Event()
+ #death_event = None
+ parameter_q = queue.Queue()
+ task_with_completed_job_q = queue.Queue()
+ parameter_generator = make_job_parameter_generator (incomplete_tasks, task_parents,
+ logger, forcedtorun_tasks,
+ task_with_completed_job_q,
+ runtime_data, verbose,
+ verbose_abbreviated_path,
+ syncmanager,
+ death_event,
+ touch_files_only, job_history)
+ job_parameters = parameter_generator()
+ fill_queue_with_job_parameters(job_parameters, parameter_q, parallelism, logger, verbose)
+
+ #
+ # N.B.
+ # Handling keyboard shortcuts may require
+ # See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
+ #
+ # When waiting for a condition in threading.Condition.wait(), KeyboardInterrupt is never sent
+ # unless a timeout is specified
+ #
+ #
+ #
+ # #
+ # # whether using multiprocessing
+ # #
+ # pool = Pool(parallelism) if multiprocess > 1 else None
+ # if pool:
+ # pool_func = pool.imap_unordered
+ # job_iterator_timeout = []
+ # else:
+ # pool_func = imap
+ # job_iterator_timeout = [999999999999]
+ #
+ #
+ # ....
+ #
+ #
+ # it = pool_func(run_pooled_job_without_exceptions, feed_job_params_to_process_pool())
+ # while 1:
+ # try:
+ # job_result = it.next(*job_iterator_timeout)
+ #
+ # ...
+ #
+ # except StopIteration:
+ # break
+
+
+
+
+ if pool:
+ pool_func = pool.imap_unordered
+ else:
+ pool_func = map
+
+
+
+ feed_job_params_to_process_pool = feed_job_params_to_process_pool_factory (parameter_q, death_event, logger, verbose)
+
+ #
+ # for each result from job
+ #
+ job_errors = RethrownJobError()
+ tasks_with_errors = set()
+
+
+ #
+ # job_result.job_name / job_result.return_value
+ # Reserved for returning result from job...
+ # How?
+ #
+ # Rewrite for loop so we can call iter.next() with a timeout
+ try:
+
+ #for job_result in pool_func(run_pooled_job_without_exceptions, feed_job_params_to_process_pool()):
+ ii = iter(pool_func(run_pooled_job_without_exceptions, feed_job_params_to_process_pool()))
+ while 1:
+ # Use a timeout of 3 years per job..., so that the condition we are waiting for in the thread
+ # can be interrupted by signals... In other words, so that Ctrl-C works
+ # Yucky part is that timeout is an extra parameter to IMapIterator.next(timeout=None)
+ # but next() for normal iterators do not take any extra parameters.
+ if pool:
+ job_result = ii.next(timeout=99999999)
+ else:
+ job_result = next(ii)
+ # run next task
+ log_at_level (logger, 11, verbose, "r" * 80 + "\n")
+ t = node.lookup_node_from_name(job_result.task_name)
+
+ # remove failed jobs from history-- their output is bogus now!
+ if job_result.state in (JOB_ERROR, JOB_SIGNALLED_BREAK):
+
+ if len(job_result.params) > 1: # some jobs have no outputs
+ output_file_name = job_result.params[1]
+ if not isinstance(output_file_name, list): # some have multiple outputs from one job
+ output_file_name = [output_file_name]
+ #
+ # N.B. output parameters are not necessary all strings
+ #
+ for o_f_n in get_strings_in_nested_sequence(output_file_name):
+ #
+ # use paths relative to working directory
+ #
+ o_f_n = os.path.relpath(o_f_n)
+ job_history.pop(o_f_n, None) # remove outfile from history if it exists
+
+ # only save poolsize number of errors
+ if job_result.state == JOB_ERROR:
+ log_at_level (logger, 10, verbose, " Exception caught for %s" % job_result.job_name)
+ job_errors.append(job_result.exception)
+ tasks_with_errors.add(t)
+
+ #
+ # print to logger immediately
+ #
+ if log_exceptions:
+ log_at_level (logger, 10, verbose, " Log Exception")
+ logger.error(job_errors.get_nth_exception_str())
+
+ #
+ # break if too many errors
+ #
+ if len(job_errors) >= parallelism or exceptions_terminate_immediately:
+ log_at_level (logger, 10, verbose, " Break loop %s %s %s " % (exceptions_terminate_immediately, len(job_errors), parallelism) )
+ parameter_q.put(all_tasks_complete())
+ break
+
+
+ # break immediately if the user says stop
+ elif job_result.state == JOB_SIGNALLED_BREAK:
+ job_errors.append(job_result.exception)
+ job_errors.specify_task(t, "Exceptions running jobs")
+ log_at_level (logger, 10, verbose, " Break loop JOB_SIGNALLED_BREAK %s %s " % (len(job_errors), parallelism) )
+ parameter_q.put(all_tasks_complete())
+ break
+
+ else:
+ if job_result.state == JOB_UP_TO_DATE:
+ # LOGGER: All Jobs in Out-of-date Tasks
+ log_at_level (logger, 5, verbose, " %s unnecessary: already up to date" % job_result.job_name)
+ else:
+ # LOGGER: Out-of-date Jobs in Out-of-date Tasks
+ log_at_level (logger, 3, verbose, " %s completed" % job_result.job_name)
+ # save this task name and the job (input and output files)
+ # alternatively, we could just save the output file and its
+ # completion time, or on the other end of the spectrum,
+ # we could save a checksum of the function that generated
+ # this file, something akin to:
+ # chksum = md5.md5(marshal.dumps(t.user_defined_work_func.func_code.co_code))
+ # we could even checksum the arguments to the function that
+ # generated this file:
+ # chksum2 = md5.md5(marshal.dumps(t.user_defined_work_func.func_defaults) +
+ # marshal.dumps(t.args))
+
+ if len(job_result.params) > 1: # some jobs have no outputs
+ output_file_name = job_result.params[1]
+ if not isinstance(output_file_name, list): # some have multiple outputs from one job
+ output_file_name = [output_file_name]
+ #
+ # N.B. output parameters are not necessary all strings
+ # and not all files have been successfully created,
+ # even though the task apparently completed properly!
+ # Remember to expand globs
+ #
+ for possible_glob_str in get_strings_in_nested_sequence(output_file_name):
+ for o_f_n in glob.glob(possible_glob_str):
+ #
+ # use paths relative to working directory
+ #
+ o_f_n = os.path.relpath(o_f_n)
+ try:
+ log_at_level (logger, 10, verbose, " Job History for : " + o_f_n)
+ mtime = os.path.getmtime(o_f_n)
+ #
+ # use probably higher resolution time.time() over mtime
+ # which might have 1 or 2s resolutions, unless there is
+ # clock skew and the filesystem time > system time
+ # (e.g. for networks)
+ #
+ epoch_seconds = time.time()
+ # Aargh. go back to insert one second between jobs
+ if epoch_seconds < mtime:
+ if one_second_per_job == None and not runtime_data["ONE_SECOND_PER_JOB"]:
+ log_at_level (logger, 10, verbose, " Switch to one second per job")
+ runtime_data["ONE_SECOND_PER_JOB"] = True
+ elif epoch_seconds - mtime < 1.1:
+ mtime = epoch_seconds
+ chksum = JobHistoryChecksum(o_f_n, mtime, job_result.params[2:], t)
+ job_history[o_f_n] = chksum
+ except:
+ pass
+
+ ##for output_file_name in t.output_filenames:
+ ## # could use current time instead...
+ ## if not isinstance(output_file_name, list):
+ ## output_file_name = [output_file_name]
+ ## for o_f_n in output_file_name:
+ ## mtime = os.path.getmtime(o_f_n)
+ ## chksum = JobHistoryChecksum(o_f_n, mtime, job_result.params[2:], t)
+ ## job_history[o_f_n] = chksum
+
+
+ log_at_level (logger, 10, verbose, " signal completed task after checksumming...")
+ #
+ # signal completed task after checksumming
+ #
+ task_with_completed_job_q.put((t, job_result.task_name, job_result.job_name))
+
+
+ # make sure queue is still full after each job is retired
+ # do this after undating which jobs are incomplete
+ log_at_level (logger, 10, verbose, " job errors?")
+ if len(job_errors):
+ #parameter_q.clear()
+ #if len(job_errors) == 1 and not parameter_q._closed:
+ log_at_level (logger, 10, verbose, " all tasks completed...")
+ parameter_q.put(all_tasks_complete())
+ else:
+ log_at_level (logger, 10, verbose, " Fill queue with more parameter...")
+ fill_queue_with_job_parameters(job_parameters, parameter_q, parallelism, logger, verbose)
+ # The equivalent of the normal end of a fall loop
+ except StopIteration as e:
+ #print ("END iteration normally", file=sys.stderr)
+ pass
+ except:
+ exception_name, exception_value, exception_Traceback = sys.exc_info()
+ exception_stack = traceback.format_exc()
+ # save exception to rethrow later
+ job_errors.append((None, None, exception_name, exception_value, exception_stack))
+ log_at_level (logger, 10, verbose, " Exception caught %s" % (exception_value))
+ log_at_level (logger, 10, verbose, " Exception caught %s" % (exception_name))
+ log_at_level (logger, 10, verbose, " Exception caught %s" % (exception_stack))
+ log_at_level (logger, 10, verbose, " Get next parameter size = %d" %
+ parameter_q.qsize())
+ log_at_level (logger, 10, verbose, " Task with completed jobs size = %d" %
+ task_with_completed_job_q.qsize())
+ parameter_q.put(all_tasks_complete())
+ try:
+ death_event.clear()
+ except:
+ pass
+
+ if pool:
+ log_at_level (logger, 10, verbose, " pool.close")
+ pool.close()
+ log_at_level (logger, 10, verbose, " pool.terminate")
+ try:
+ pool.terminate()
+ except:
+ pass
+ log_at_level (logger, 10, verbose, " pool.terminated")
+ raise job_errors
+
+
+ #log_at_level (logger, 10, verbose, " syncmanager.shutdown")
+ #syncmanager.shutdown()
+
+
+ if pool:
+ log_at_level (logger, 10, verbose, " pool.close")
+ #pool.join()
+ pool.close()
+ log_at_level (logger, 10, verbose, " pool.terminate")
+ # an exception may be thrown after a signal is caught (Ctrl-C)
+ # when the EventProxy(s) for death_event might be left hanging
+ try:
+ pool.terminate()
+ except:
+ pass
+ log_at_level (logger, 10, verbose, " pool.terminated")
+
+ # Switch back off EXTRA pipeline_run DEBUGGING
+ EXTRA_PIPELINERUN_DEBUGGING = False
+
+ if len(job_errors):
+ raise job_errors
+
+
+
+# use high resolution timestamps where available
+# default in python 2.5 and greater
+# N.B. File modify times / stat values have 1 second precision for many file systems
+# and may not be accurate to boot, especially over the network.
+os.stat_float_times(True)
+
+
+if __name__ == '__main__':
+ import unittest
+
+
+
+
+ #
+ # debug parameter ignored if called as a module
+ #
+ if sys.argv.count("--debug"):
+ sys.argv.remove("--debug")
+ unittest.main()
diff --git a/ruffus/test/auto_generated_pipeline_examples/parallel.py b/ruffus/test/auto_generated_pipeline_examples/parallel.py
new file mode 100644
index 0000000..b0f37e6
--- /dev/null
+++ b/ruffus/test/auto_generated_pipeline_examples/parallel.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python2.5
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+import StringIO
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# graph, task etc are one directory down
+if __name__ == '__main__':
+ sys.path.append("/net/cpp-group/Leo/inprogress/pipeline/installation/src/ruffus")
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+from graph import *
+from task import *
+import task
+from print_dependencies import *
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def create_custom_file_func(params):
+ """
+ creates function which can be used as input to @files_func
+ """
+ def cust_func ():
+ for job_param in params:
+ yield job_param
+ return cust_func
+
+
+def is_job_uptodate (infiles, outfiles, *extra_params):
+ """
+ assumes first two parameters are files, checks if they are up to date
+ """
+ return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
+
+
+
+def test_post_task_function ():
+ print "Hooray"
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+
+
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 3, 4], # 2nd job
+ ['C', 5, 6], # 3rd job
+ ]
+
+#
+# first task
+#
+def first_task():
+ print >>sys.stderr, "First task"
+
+ at follows(first_task)
+ at parallel(parameters)
+def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\n" % (param1, param2, param1 + param2))
+
+
+ pipeline_run([parallel_task], multiprocess = 2)
+
+pipeline_run([parallel_task])
+
+
+
+
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, long_winded=True)
+
+elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+
diff --git a/ruffus/test/auto_generated_pipeline_examples/simple.py b/ruffus/test/auto_generated_pipeline_examples/simple.py
new file mode 100644
index 0000000..1a71286
--- /dev/null
+++ b/ruffus/test/auto_generated_pipeline_examples/simple.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python2.5
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+import StringIO
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# graph, task etc are one directory down
+if __name__ == '__main__':
+ sys.path.append("/net/cpp-group/Leo/inprogress/pipeline/installation/src/ruffus")
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+from graph import *
+from task import *
+import task
+from print_dependencies import *
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def create_custom_file_func(params):
+ """
+ creates function which can be used as input to @files_func
+ """
+ def cust_func ():
+ for job_param in params:
+ yield job_param
+ return cust_func
+
+
+def is_job_uptodate (infiles, outfiles, *extra_params):
+ """
+ assumes first two parameters are files, checks if they are up to date
+ """
+ return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
+
+
+
+def test_post_task_function ():
+ print "Hooray"
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+
+
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# task1
+#
+ at files(None, 'a.1')
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task2
+#
+ at files_re('*.1', '(.*).1', r'\1.2')
+ at follows(task1)
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task3
+#
+ at files_re('*.1', '(.*).1', r'\1.3')
+ at follows(task2)
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task4
+#
+ at files_re('*.1', '(.*).1', r'\1.4')
+ at follows(task3)
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, long_winded=True)
+
+elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+
diff --git a/ruffus/test/auto_generated_pipeline_examples/simpler.py b/ruffus/test/auto_generated_pipeline_examples/simpler.py
new file mode 100644
index 0000000..3420fcf
--- /dev/null
+++ b/ruffus/test/auto_generated_pipeline_examples/simpler.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python2.5
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+import StringIO
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# graph, task etc are one directory down
+if __name__ == '__main__':
+ sys.path.append("/net/cpp-group/Leo/inprogress/pipeline/installation/src/ruffus")
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import StringIO
+import re
+import operator
+import sys
+from collections import defaultdict
+
+from graph import *
+from task import *
+import task
+from print_dependencies import *
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def create_custom_file_func(params):
+ """
+ creates function which can be used as input to @files_func
+ """
+ def cust_func ():
+ for job_param in params:
+ yield job_param
+ return cust_func
+
+
+def is_job_uptodate (infiles, outfiles, *extra_params):
+ """
+ assumes first two parameters are files, checks if they are up to date
+ """
+ return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
+
+
+
+def test_post_task_function ():
+ print "Hooray"
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+
+
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =StringIO.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# task1
+#
+ at files(None, 'a.1')
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task2
+#
+ at files_re('*.1', '(.*).1', r'\1.1', r'\1.2')
+ at follows(task1)
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task3
+#
+ at files_re('*.1', '(.*).1', r'\1.2', r'\1.3')
+ at follows(task2)
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task4
+#
+ at files_re('*.1', '(.*).1', r'\1.3', r'\1.4')
+ at follows(task3)
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+
+if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
diff --git a/ruffus/test/complicated_example.py b/ruffus/test/complicated_example.py
new file mode 100755
index 0000000..15bf56e
--- /dev/null
+++ b/ruffus/test/complicated_example.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ complicated_example.py
+
+"""
+
+import os, sys
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from time import sleep
+import random
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = ["summarise_all"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+
+parser.add_option("-d", "--data_dir", dest="data_dir",
+ default="%s/data_for_complicated_example" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with starting data [*.fa].")
+parser.add_option("-w", "--working_dir", dest="working_dir",
+ default="/working_dir",
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+
+
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-D", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+try:
+ import StringIO as io
+except:
+ import io as io
+import re
+import operator
+import sys
+from collections import defaultdict
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#_________________________________________________________________________________________
+#
+# Helper function:
+#
+# split_gene_files
+#
+#_________________________________________________________________________________________
+def split_gene_files ( gene_file_name,
+ job_completion_flag_file_name,
+ split_output_dir):
+ """
+ Helper function to simulate splitting gene files into "chunks" suitable for
+ parallel jobs on a computational cluster
+
+ The number of output files is only known at runtime
+ because the number of "chunks" depend on the size
+ of starting the gene sets
+
+ We simulate this using a random number from 20->50
+ """
+
+ #
+ # make output directory
+ #
+ if not os.path.exists(split_output_dir):
+ os.makedirs(split_output_dir)
+
+ # save number of chunks for later tasks
+ number_of_output_files = int(random.uniform(20, 50))
+
+ for index in range(number_of_output_files):
+ open("%s/%d.fa" % (split_output_dir, index), "w")
+ open(job_completion_flag_file_name, "w")
+
+
+#_________________________________________________________________________________________
+#
+# get_unknown_gene_set_names
+# get_species_names
+#
+#
+# functions for getting unknown gene set names and species names
+#
+#_________________________________________________________________________________________
+import glob, re
+def get_chunked_gene_file_names (dir_name):
+ """
+ Get list of gene file names
+ Helper function for getting unknown gene set names, and species names
+ """
+ regex = re.compile(r".+/(.+).genes.fa")
+ gene_set_names = []
+ for file_name in glob.glob("%s/%s/*.genes.fa" % (d_dir, dir_name)):
+ m = regex.search(file_name)
+ gene_set_names.append(m.group(1))
+ return gene_set_names
+def get_unknown_gene_set_names ():
+ return get_chunked_gene_file_names("unknown_genes")
+def get_species_names ():
+ return get_chunked_gene_file_names("all_genes_in_each_species")
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+d_dir = options.data_dir
+w_dir = options.working_dir
+
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 1:
+#
+# split_unknown_gene_set
+#
+# data_dir/unknown_genes/XXX.genes.fa
+# ->working_dir/XXX/split_gene_sets.completed
+# ->working_dir/XXX/NNN.fa
+#
+#_________________________________________________________________________________________
+ at follows(mkdir(w_dir))
+ at files_re("%s/unknown_genes/*.genes.fa" % d_dir,
+ r"(.*/)(.*)(\.genes.fa)",
+ r"\1\2\3", # unknown_gene_set file name
+ r"%s/\2/split_gene_sets.completed" % w_dir, # job_completion_flag
+ r"%s/\2" % w_dir) # split_output_dir
+def split_unknown_gene_set( starting_gene_set,
+ job_completion_flag,
+ split_output_dir):
+ """
+ Simulate splitting gene files for unknown gene set into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+ split_gene_files ( starting_gene_set,
+ job_completion_flag,
+ split_output_dir)
+
+
+#_________________________________________________________________________________________
+#
+# Step 2:
+#
+# split_per_species_gene_sets
+
+# data_dir/all_genes_in_each_species/YYY.genes.fa
+# ->working_dir/species_YYY/split_gene_sets.completed
+# ->working_dir/species_YYY/MMM.fa
+#
+#_________________________________________________________________________________________
+ at follows(mkdir(w_dir))
+ at files_re("%s/all_genes_in_each_species/*.genes.fa" % d_dir,
+ r"(.*/)(.*)(\.genes.fa)",
+ r"\1\2\3", # all_genes_in_species
+ r"%s/species_\2/split_gene_sets.completed" % w_dir, # job_completion_flag
+ r"%s/species_\2" % w_dir) # split_output_dir
+def split_per_species_gene_sets(all_genes_in_species,
+ job_completion_flag,
+ split_output_dir):
+ """
+ Simulate splitting gene files for each species into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+ split_gene_files ( all_genes_in_species,
+ job_completion_flag,
+ split_output_dir)
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 3:
+#
+# all_vs_all_comparisons
+# working_dir/species_YYY/MMM.fa
+# working_dir/XXX/NNN.fa
+# -> compare/x/y.n.m.comparison_res
+# -> compare/x/y.n.m.complete
+#
+#_________________________________________________________________________________________
+#
+# function for generating custom parameters
+#
+def generate_all_vs_all_params ():
+ """
+ Custom function to generate
+ all vs. all file names for the various "chunks"
+ """
+
+ chunk_index_regex = re.compile(r".+/(.+).fa")
+ def parse_index_from_chunk_filename (chunk_filename):
+ match = chunk_index_regex.search(chunk_filename)
+ return int(match.group(1))
+
+ species_names = get_species_names()
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ for y in species_names:
+ y = "species_" + y
+
+ m_files = glob.glob("%s/%s/*.fa" % (w_dir, x))
+ n_files = glob.glob("%s/%s/*.fa" % (w_dir, y))
+
+ #
+ # for each species chunk vs for each unknown chunk
+ #
+ for m_file in m_files:
+ for n_file in n_files:
+ input_files = [m_file, n_file]
+ output_dir = "%s/compare/%s" % (w_dir, x)
+
+ m = parse_index_from_chunk_filename(m_file)
+ n = parse_index_from_chunk_filename(n_file)
+
+ job_completion_flag = output_dir + "/%s.%d.%d.complete" % (y, m, n)
+ result_file = output_dir + "/%s.%d.%d.comparison_res" % (y, m, n)
+ name = "%s -> %d vs %d\n" % (y, m, n)
+ yield input_files, job_completion_flag, output_dir, result_file, name
+
+
+
+ at follows(split_unknown_gene_set, split_per_species_gene_sets)
+ at files(generate_all_vs_all_params)
+def all_vs_all_comparisons(file_chunks,
+ job_completion_flag,
+ output_dir,
+ result_file,
+ name):
+ """
+ Simulate comparison of gene chunks against each other
+ Normally runs in parallel on a computational cluster
+ """
+
+ #
+ # make output directory
+ #
+ try:
+ os.makedirs(output_dir)
+ except OSError:
+ pass
+
+ open(job_completion_flag, "w")
+ open(result_file, "w").write(name)
+
+
+#_________________________________________________________________________________________
+#
+# Step 4:
+#
+# Recombine: alignment results to make gene families
+# compare/x/*.comparison_res
+# -> multiple_alignment/x/x.gene_families
+#
+#_________________________________________________________________________________________
+
+#
+# generate_params_for_making_gene_families
+#
+# function for generating custom parameters
+#
+def generate_params_for_making_gene_families ():
+ """
+ Custom function to combining comparison files into gene families
+ """
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ results_files = glob.glob("%s/compare/%s/*.comparison_res" % (w_dir, x))
+ output_dir = "%s/multiple_alignment/%s" % (w_dir, x)
+ family_file = "%s/gene.families" % output_dir
+ yield results_files, family_file, output_dir
+
+
+ at follows(all_vs_all_comparisons)
+ at files(generate_params_for_making_gene_families)
+def combine_into_gene_familes (results_files, family_file_name, output_dir):
+ """
+ Simulate making gene families by concatenating comparison results :-)
+ """
+ #
+ # make output directory
+ #
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ family_file = open(family_file_name, "w")
+ for f in results_files:
+ family_file.write(open(f).read())
+
+#_________________________________________________________________________________________
+#
+# Step 5:
+#
+# split_gene_family_for_evolutionary_analysis
+# multiple_alignment/x/x.gene_families
+# -> multiple_alignment/x/NNN.aln
+# -> multiple_alignment/x/split.completed
+#
+#_________________________________________________________________________________________
+ at follows(combine_into_gene_familes)
+ at files_re("%s/multiple_alignment/*/gene.families" % w_dir,
+ r"(.+)/(gene.families)",
+ r"\1/\2",
+ r"\1/split.completed",
+ r"\1")
+def split_gene_family_for_evolutionary_analysis( family_file,
+ job_completion_flag_file, split_output_dir):
+ """
+ Simulate splitting family of genes into "chunks" suitable for
+ parallel jobs on a computational cluster
+ """
+
+ # save number of chunks for later tasks
+ number_of_output_files = int(random.uniform(20, 50))
+
+ for index in range(number_of_output_files):
+ open("%s/%d.aln" % (split_output_dir, index), "w").write("chunk %d" % index)
+ open(job_completion_flag_file, "w")
+
+
+#_________________________________________________________________________________________
+#
+# Step 6:
+#
+# evolution_analysis
+# multiple_alignment/x/NNN.aln
+# -> multiple_alignment/x/NNN.evo_res
+#
+#_________________________________________________________________________________________
+ at follows(split_gene_family_for_evolutionary_analysis)
+ at files_re("%s/multiple_alignment/*/*.aln" % w_dir,
+ r"(.+).aln",
+ r"\1.evo_res")
+def evolution_analysis( family_file, result_file_name):
+ """
+ Simulate evolutionary analysis
+ """
+
+ result_file = open(result_file_name, "w")
+ result_file.write(family_file + "\n")
+
+
+#_________________________________________________________________________________________
+#
+# Step 7:
+#
+# combine_evolution_analysis
+# multiple_alignment/x/NNN.evo_res
+# -> evolutionary_analysis/x.results
+#
+#_________________________________________________________________________________________
+
+#
+# generate_params_for_combining_evolutionary_analyses
+#
+# function for generating custom parameters
+#
+def generate_params_for_combining_evolutionary_analyses ():
+ """
+ Custom function to combining evolutionary analyses per unknown gene set
+ """
+ gene_set_names = get_unknown_gene_set_names()
+ for x in gene_set_names:
+ results_files = glob.glob("%s/multiple_alignment/%s/*.evo_res" % (w_dir, x))
+ combined_file = "%s/evolutionary_analysis/%s.results" % (w_dir, x)
+ yield results_files, combined_file
+
+ at follows(evolution_analysis, mkdir("%s/evolutionary_analysis" % w_dir))
+ at files(generate_params_for_combining_evolutionary_analyses)
+def combine_evolution_analysis (results_files, combined_file_name):
+ """
+ Simular combining evolutionary analyses
+ """
+ combined_file = open(combined_file_name, "w")
+ for f in results_files:
+ combined_file.write(open(f).read())
+
+
+
+#_________________________________________________________________________________________
+#
+# Step 8:
+#
+# summarise_evolution_analysis
+# evolutionary_analysis/x.results
+# -> evolutionary_analysis/x.summary
+#
+#_________________________________________________________________________________________
+ at follows(combine_evolution_analysis)
+ at files_re("%s/evolutionary_analysis/*.results" % w_dir,
+ r"(.+).results",
+ r"\1.summary")
+def summarise_evolution_analysis( results_file, summary_file_name):
+ """
+ Simulate summary of evolutionary analysis
+ """
+ summary_file = open(summary_file_name, "w")
+ summary_file.write("summary of " + open(results_file).read())
+
+
+#_________________________________________________________________________________________
+#
+# Step 9:
+#
+# summarise_all
+# evolutionary_analysis/x.summary
+# -> all.total_summary
+#
+#_________________________________________________________________________________________
+summary_file_names = ["%s/evolutionary_analysis/%s.summary" % (w_dir, n)
+ for n in get_unknown_gene_set_names()]
+total_summary_file_name = "%s/all.total_summary" % w_dir
+
+ at follows(summarise_evolution_analysis)
+ at files(summary_file_names, total_summary_file_name)
+def summarise_all( summary_files, total_summary_file_name):
+ """
+ Simulate summarize all
+ """
+ total_summary_file = open(total_summary_file_name, "w")
+ total_summary_file.write("Over all Summary:\n")
+ for f in summary_files:
+ total_summary_file.write(open(f).read())
+
+
+
+
+
+
+
+#888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# print pipeline or run pipeline
+#
+#
+
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ try:
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, verbose=1)
+
+ elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs)
+ except Exception as e:
+ print(e.args)
+
diff --git a/ruffus/test/create_test_script_from_dependency_tree.py b/ruffus/test/create_test_script_from_dependency_tree.py
new file mode 100755
index 0000000..96712fa
--- /dev/null
+++ b/ruffus/test/create_test_script_from_dependency_tree.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ draw_specified_dependency_tree.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+# graph, task etc are one directory down
+if __name__ == '__main__':
+ sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-i", "--input_dot_file", dest="dot_file",
+ metavar="FILE",
+ default = os.path.join(exe_path, "dependency_data", "simple.dag"),
+ type="string",
+ help="name and path of tree file in modified DOT format used to generate "
+ "test script dependencies.")
+parser.add_option("-o", "--output_file", dest="output_file",
+ metavar="FILE",
+ default = os.path.join(exe_path, "pipelines", "simple.py"),
+ type="string",
+ help="name and path of output python test script.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option( "-J", "--jumble_task_order", dest = "jumble_task_order",
+ action="store_true", default=False,
+ help="Do not define task functions in order of dependency.")
+
+parameters = [
+ ]
+
+mandatory_parameters = ["dot_file"]
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys
+from collections import defaultdict
+
+import random
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+dumps = json.dumps
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#_________________________________________________________________________________________
+
+# make_tree_from_dotfile
+
+#_________________________________________________________________________________________
+from adjacent_pairs_iterate import adjacent_pairs_iterate
+
+
+
+
+
+#_________________________________________________________________________________________
+
+# task_dependencies_from_dotfile
+
+#_________________________________________________________________________________________
+def task_dependencies_from_dotfile(stream):
+ """
+ Read programme task specified in dot file format
+ """
+
+ decorator_regex = re.compile(r"([a-z_]+) *(\(.*)")
+ attributes_regex = re.compile(r"\[.*\]")
+
+ which_task_follows = defaultdict(list)
+ task_decorators = defaultdict(list)
+ task_descriptions = dict()
+
+ #
+ # remember node
+ #
+
+ all_tasks = dict()
+ io_tasks = set()
+ for linenum, line in enumerate(stream):
+ # remove heading and trailing spaces
+ line = line.strip()
+
+ if "digraph" in line:
+ continue;
+ if not len(line):
+ continue
+
+ #
+ # decorators
+ #
+ if line[0:2] == '#@':
+ fields = line[2:].split('::', 2)
+ if len(fields) != 3:
+ raise Exception("Unexpected task specification on line# %d\n(%s)" %
+ (linenum, line))
+ task_name, decorators, description = fields
+ if decorators[0] != '@':
+ raise Exception("Task decorator missing starting ampersand '@' on line# %d\n(%s)" %
+ (linenum, line))
+ for d in decorators[1:].split("@"):
+ m = decorator_regex.match(d)
+ if not m:
+ raise Exception("Task decorator (%s) missing parentheses on line# %d\n(%s)" %
+ (d, linenum, line))
+ task_decorators[task_name].append((m.group(1), m.group(2)))
+ if m.group(1)[0:5] == "files" or m.group(1) == "parallel":
+ io_tasks.add(task_name)
+
+ task_descriptions[task_name] = description
+ continue
+
+ #
+ # other comments
+ #
+ if line[0] in '#{}/':
+ continue;
+ line = line.strip(';')
+ line = attributes_regex.sub("", line)
+
+ #
+ # ignore assignments
+ #
+ if "=" in line:
+ continue;
+ nodes = [x.strip() for x in line.split('->')]
+ for name1, name2 in adjacent_pairs_iterate(nodes):
+ which_task_follows[name2].append(name1)
+ all_tasks[name1] = 1
+ all_tasks[name2] = 1
+
+ for task in task_decorators:
+ if task not in all_tasks:
+ raise Exception("Decorated task %s not in dependencies")
+
+
+ # order tasks by precedence the dump way: iterating until true
+ disordered = True
+ while (disordered):
+ disordered = False
+ for to_task, from_tasks in which_task_follows.items():
+ for f in from_tasks:
+ if all_tasks[to_task] <= all_tasks[f]:
+ all_tasks[to_task] += all_tasks[f]
+ disordered = True
+
+ sorted_task_names = list(sorted(list(all_tasks.keys()), key=lambda x:all_tasks[x]))
+ return which_task_follows, sorted_task_names, task_decorators, io_tasks, task_descriptions
+
+
+
+
+#_________________________________________________________________________________________
+
+# generate_program_task_file
+
+#_________________________________________________________________________________________
+def generate_program_task_file(stream, task_dependencies, task_names,
+ task_decorators, io_tasks, task_descriptions):
+
+ print("task_decorators = ", dumps(task_decorators, indent = 4), file=sys.stderr)
+ print("task_names = ", dumps(task_names), file=sys.stderr)
+ print("task_dependencies = ", dumps(task_dependencies), file=sys.stderr)
+ print("io_tasks = ", dumps(list(io_tasks)), file=sys.stderr)
+
+
+ if options.jumble_task_order:
+ random.shuffle(task_names)
+ defined_tasks = set()
+
+ #
+ # iterate through tasks
+ #
+ for task_name in task_names:
+ defined_tasks.add(task_name)
+ stream.write("\n#\n# %s\n#\n" % task_name)
+ #
+ # write task decorators
+ #
+ if task_name in task_decorators:
+ for decorator, decorator_parameters in task_decorators[task_name]:
+ stream.write("@" + decorator + decorator_parameters + "\n")
+
+ #
+ # write task dependencies
+ #
+ if task_name in task_dependencies:
+ params = ", ".join(t if t in defined_tasks else '"%s"' % t
+ for t in task_dependencies[task_name])
+ stream.write("@follows(%s)\n" % params)
+
+
+ #
+ # Function body
+ #
+ #if task_name in io_tasks:
+ if 1:
+ stream.write("def %s(infiles, outfiles, *extra_params):\n" % task_name)
+ stream.write(' """\n')
+ description = task_descriptions[task_name]
+ description = description.replace("\n", " \n")
+ stream.write(' %s\n' % description)
+ stream.write(' """\n')
+
+ stream.write(" test_job_io(infiles, outfiles, extra_params)\n")
+ #else:
+ # stream.write("def %s(*params):\n" % task_name)
+
+
+ stream.write("\n\n")
+
+ stream.write(
+ """
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ try:
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+except Exception, e:
+ print e.args
+ \n""")
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+# mandatory options
+for parameter in mandatory_parameters:
+ if options.__dict__[parameter] == None:
+ die_error("Please specify a file in --%s.\n\n" % parameter + helpstr)
+
+
+
+(task_dependencies, task_names,
+ task_decorators, io_tasks,
+ task_descriptions) = task_dependencies_from_dotfile(open(options.dot_file))
+
+output_file = open(options.output_file, "w")
+
+#
+# print template for python file output
+#
+output_file.write(open(os.path.join(exe_path,"test_script.py_template")).read().
+ replace("PYPER_PATH",
+ os.path.abspath(os.path.join(exe_path, "..", ".."))))
+generate_program_task_file(output_file, task_dependencies, task_names,
+ task_decorators, io_tasks, task_descriptions)
diff --git a/ruffus/test/draw_specified_dependency_tree.py b/ruffus/test/draw_specified_dependency_tree.py
new file mode 100755
index 0000000..009216b
--- /dev/null
+++ b/ruffus/test/draw_specified_dependency_tree.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ draw_specified_dependency_tree.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-d", "--dot_file", dest="dot_file",
+ metavar="FILE",
+ default = os.path.join(exe_path, "test_data/dag.dependency"),
+ type="string",
+ help="name and path of tree file in DOT format")
+parser.add_option("-u", "--uptodate_job_names", dest="uptodate_job_names",
+ action="append",
+ metavar="JOBNAME",
+ default = list(),
+ type="string",
+ help="nodes to terminate on.")
+parser.add_option("-t", "--target_job_names", dest="target_job_names",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="nodes to start on.")
+parser.add_option("-f", "--forced_job_names", dest="forced_job_names",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="nodes to start on.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-z", "--horizontal_graph", dest="horizontal_graph",
+ action="store_true", default=False,
+ help="Draw dependency graph horizontally")
+parser.add_option("--skip_upstream", dest="skip_upstream",
+ action="store_true", default=False,
+ help="Only draw from targets")
+parser.add_option("--skip_up_to_date", dest="skip_up_to_date",
+ action="store_true", default=False,
+ help="Only draw tasks which need to be rerun")
+
+parameters = [
+ "uptodate_job_names"
+ ]
+
+mandatory_parameters = ["dot_file"]
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys
+from collections import defaultdict
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#_________________________________________________________________________________________
+
+# make_tree_from_dotfile
+
+#_________________________________________________________________________________________
+from adjacent_pairs_iterate import adjacent_pairs_iterate
+
+def make_tree_from_dotfile (stream):
+
+ attributes = re.compile("\[.+\]")
+
+ #
+ # remember node
+ #
+
+
+ for linenum, line in enumerate(stream):
+ line = line.strip()
+ if "digraph" in line:
+ continue;
+ if not len(line) or line[0] in '#{}/':
+ continue;
+ line = line.strip(';')
+ line = attributes.sub("", line)
+ if "=" in line:
+ continue;
+ nodes = [x.strip() for x in line.split('->')]
+ for name1, name2 in adjacent_pairs_iterate(nodes):
+ if not node.is_node(name1):
+ node(name1)
+ if not node.is_node(name2):
+ node(name2)
+ node.lookup_node_from_name(name2).add_child(node.lookup_node_from_name(name1))
+
+ #
+ # task hack
+ node.lookup_node_from_name(name2)._action = 1
+ node.lookup_node_from_name(name1)._action = 1
+
+
+
+def die_error(Msg):
+ """
+ Standard way of dying after a fatal error
+ """
+ print_error (Msg)
+ sys.exit()
+
+def print_error (Msg):
+ """
+ Standard way of printing error
+ """
+ sys.stderr.write("\nError:\n" + wrap_text(Msg, "\t", "") + "\n")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+if __name__ == '__main__':
+
+ # get help string
+ f =io.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+ (options, remaining_args) = parser.parse_args()
+ # mandatory options
+ for parameter in mandatory_parameters:
+ if options.__dict__[parameter] == None:
+ die_error("Please specify a file in --%s.\n\n" % parameter + helpstr)
+
+
+
+
+
+
+ make_tree_from_dotfile(open(options.dot_file))
+
+ #
+ # set up_to_date jobs
+ #
+ uptodate_jobs = task_names_to_tasks ("Up to date", options.uptodate_job_names)
+ for n in uptodate_jobs:
+ n._signal = True
+
+ graph_printout_in_dot_format (sys.stdout,
+ options.target_job_names,
+ options.forced_job_names,
+ not options.horizontal_graph,
+ options.skip_upstream,
+ options.skip_up_to_date)
+
+
+
diff --git a/ruffus/test/five_second.py b/ruffus/test/five_second.py
new file mode 100755
index 0000000..eed4f98
--- /dev/null
+++ b/ruffus/test/five_second.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+from time import sleep
+import random
+import sys
+
+try:
+ for i in range(50):
+ sleep(1)
+ #if random.randint(5) == 4:
+ # print >> sys.stderr, "Throw"
+ # raise Exception("WWWWW")
+ print >> sys.stderr, i,
+ sleep(1)
+ print >> sys.stderr, "Done"
+except:
+ print >> sys.stderr, "Exception!!!!!!!!!!!!!!!!!!!!!"
+ pass
diff --git a/ruffus/test/play_with_colours.py b/ruffus/test/play_with_colours.py
new file mode 100755
index 0000000..30c7b3c
--- /dev/null
+++ b/ruffus/test/play_with_colours.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+
+################################################################################
+#
+# test
+#
+#
+# Copyright (c) 7/13/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+from optparse import OptionParser
+try:
+ import StringIO as io
+except:
+ import io as io
+
+parser = OptionParser(version="%play_with_colours 1.0",
+ usage = "\n\n play_with_colours "
+ "--flowchart FILE [options] "
+ "[--colour_scheme_index INT ] "
+ "[--key_legend_in_graph]")
+
+#
+# pipeline
+#
+parser.add_option("--flowchart", dest="flowchart",
+ metavar="FILE",
+ type="string",
+ help="Don't actually run any commands; just print the pipeline "
+ "as a flowchart.")
+parser.add_option("--colour_scheme_index", dest="colour_scheme_index",
+ metavar="INTEGER",
+ type="int",
+ help="Index of colour scheme for flow chart.")
+parser.add_option("--key_legend_in_graph", dest="key_legend_in_graph",
+ action="store_true", default=False,
+ help="Print out legend and key for dependency graph.")
+
+(options, remaining_args) = parser.parse_args()
+if not options.flowchart:
+ raise Exception("Missing mandatory parameter: --flowchart.\n")
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# up to date tasks
+#
+ at check_if_uptodate (lambda : (False, ""))
+def Up_to_date_task1(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Up_to_date_task2(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task2)
+def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task3)
+def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+#
+# Explicitly specified
+#
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+#
+# Tasks to run
+#
+ at follows(Explicitly_specified_task)
+def Task_to_run1(infile, outfile):
+ pass
+
+
+ at follows(Task_to_run1)
+def Task_to_run2(infile, outfile):
+ pass
+
+ at follows(Task_to_run2)
+def Task_to_run3(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Task_to_run2)
+def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+#
+# Final target
+#
+ at follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+def Final_target(infile, outfile):
+ pass
+
+#
+# Ignored downstream
+#
+ at follows(Final_target)
+def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+from collections import defaultdict
+custom_flow_chart_colour_scheme = defaultdict(dict)
+
+#
+# Base chart on this overall colour scheme index
+#
+custom_flow_chart_colour_scheme["colour_scheme_index"] = options.colour_scheme_index
+
+#
+# Overriding colours
+#
+if options.colour_scheme_index == None:
+ custom_flow_chart_colour_scheme["Vicious cycle"]["linecolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Pipeline"]["fontcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Key"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Key"]["fillcolor"] = '"#F6F4F4"'
+ custom_flow_chart_colour_scheme["Task to run"]["linecolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date"]["linecolor"] = "gray"
+ custom_flow_chart_colour_scheme["Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Final target"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Final target"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fillcolor"] = '"#FF3232"'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["fontcolor"] = 'white'
+ custom_flow_chart_colour_scheme["Vicious cycle"]["color"] = "white"
+ custom_flow_chart_colour_scheme["Vicious cycle"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fillcolor"] = '"#B8CC6E"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Down stream"]["fillcolor"] = "white"
+ custom_flow_chart_colour_scheme["Down stream"]["fontcolor"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["color"] = "gray"
+ custom_flow_chart_colour_scheme["Down stream"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fillcolor"] = "transparent"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["fontcolor"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["color"] = "black"
+ custom_flow_chart_colour_scheme["Explicitly specified task"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Task to run"]["fillcolor"] = '"#EBF3FF"'
+ custom_flow_chart_colour_scheme["Task to run"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Task to run"]["dashed"] = 0
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fillcolor"] = 'transparent'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["fontcolor"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["color"] = '"#0044A0"'
+ custom_flow_chart_colour_scheme["Up-to-date task forced to rerun"]["dashed"] = 1
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fillcolor"] = '"#EFA03B"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["fontcolor"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["color"] = '"#006000"'
+ custom_flow_chart_colour_scheme["Up-to-date Final target"]["dashed"] = 0
+
+if __name__ == '__main__':
+ pipeline_printout_graph (
+
+ open(options.flowchart, "w"),
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ os.path.splitext(options.flowchart)[1][1:],
+
+ # final targets
+ [Final_target, Up_to_date_final_target],
+
+ # Explicitly specified tasks
+ [Explicitly_specified_task],
+
+ # Do we want key legend
+ no_key_legend = not options.key_legend_in_graph,
+
+ # Print all the task types whether used or not
+ minimal_key_legend = False,
+
+ user_colour_scheme = custom_flow_chart_colour_scheme,
+ pipeline_name = "Colour schemes")
+
+
+
+
+
+
+
+
diff --git a/ruffus/test/qrsh_workaround.py b/ruffus/test/qrsh_workaround.py
new file mode 100755
index 0000000..4660626
--- /dev/null
+++ b/ruffus/test/qrsh_workaround.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import os, sys
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+from ruffus import *
+from subprocess import Popen, PIPE
+parameters = [
+ ['A', 1, 2], # 1st job
+ ['B', 2, 4], # 2nd job
+ ['C', 3, 6], # 3rd job
+ ['C', 4, 6], # 4rd job
+ ['C', 5, 6], # 5rd job
+ ['C', 6, 6], # 6rd job
+ ['C', 7, 6], # 7rd job
+ ['C', 8, 6], # 8rd job
+ ['C', 9, 6], # 9rd job
+ ['C', 10, 6], # 10rd job
+ ]
+import time,os
+
+ at parallel(parameters)
+ at follows(mkdir("qrsh_workaround"))
+ at posttask(lambda: os.system("rm -rf qrsh_workaround"))
+def parallel_task(name, param1, param2):
+ sys.stderr.write(" Parallel task %s: " % name)
+ sys.stderr.write("%d + %d = %d\n" % (param1, param2, param1 + param2))
+ cmds = ["qrsh",
+ "-now", "n",
+ "-cwd",
+ "-p", "-20",
+ "-N", "job%d" % (param1),
+ "-q", "short_jobs.q",
+ "ls qrsh_workaround > qrsh_workaround/output.%d" % param1]
+ p = Popen(cmds, stdin = PIPE)
+ p.stdin.close()
+ sts = os.waitpid(p.pid, 0)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ pipeline_run([parallel_task], multiprocess = 5)
diff --git a/ruffus/test/simpler.py b/ruffus/test/simpler.py
new file mode 100644
index 0000000..16cee2e
--- /dev/null
+++ b/ruffus/test/simpler.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+import time
+def sleep_a_while ():
+ time.sleep(0.1)
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# task1
+#
+ at files(None, 'a.1')
+def task1(infile, outfile):
+ """
+ First task
+ """
+ output_text = ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task2
+#
+ at transform(task1, suffix(".1"), ".2")
+def task2(infile, outfile):
+ """
+ Second task
+ """
+ output_text = open(infile).read() if infile else ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task3
+#
+ at transform(task2, suffix(".2"), ".3")
+def task3(infile, outfile):
+ """
+ Third task
+ """
+ output_text = open(infile).read() if infile else ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task4
+#
+ at transform(task3, suffix(".3"), ".4")
+def task4(infile, outfile):
+ """
+ Fourth task
+ """
+ output_text = open(infile).read() if infile else ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ try:
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+ except Exception as e:
+ print(e.args)
diff --git a/ruffus/test/simpler_at_runtime.py b/ruffus/test/simpler_at_runtime.py
new file mode 100755
index 0000000..9c79bdf
--- /dev/null
+++ b/ruffus/test/simpler_at_runtime.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_tasks.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-R", "--runtime_files", dest="runtime_files",
+ action="append",
+ default = ["a.3"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--debug", dest = "debug",
+ action="count", default=0,
+ help="Cleanup afterwards.")
+parser.add_option("--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# task1
+#
+ at originate(['a.1'] + options.runtime_files)
+def task1(outfile):
+ """
+ First task
+ """
+ output_text = ""
+ output_text += " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task2
+#
+ at transform(task1, suffix(".1"), ".2")
+def task2(infile, outfile):
+ """
+ Second task
+ """
+ output_text = open(infile).read() if infile else ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task3
+#
+ at transform(task2, suffix(".2"), ".3")
+def task3(infile, outfile):
+ """
+ Third task
+ """
+ output_text = open(infile).read() if infile else ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+
+
+#
+# task4
+#
+ at follows(task3)
+ at transform(runtime_parameter("a"), suffix(".3"), ".4")
+def task4(infile, outfile):
+ """
+ Fourth task
+ """
+ output_text = open(infile).read() if infile else ""
+ output_text += json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose, runtime_data = {"a": options.runtime_files})
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ import os
+ for f in ["a.1", "a.2","a.3","a.4"]:
+ if os.path.exists(f):
+ os.unlink(f)
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose, runtime_data = {"a": options.runtime_files})
+ for f in ["a.1", "a.2","a.3","a.4"]:
+ if os.path.exists(f):
+ os.unlink(f)
+ else:
+ raise Exception("%s is missing" % f)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose, runtime_data = {"a": options.runtime_files})
diff --git a/ruffus/test/simpler_with_shared_logging.py b/ruffus/test/simpler_with_shared_logging.py
new file mode 100755
index 0000000..0ef6545
--- /dev/null
+++ b/ruffus/test/simpler_with_shared_logging.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ simpler_with_shared_logging.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parser.add_option("-L", "--log_file_name", dest="log_file_name",
+ default="/tmp/simple.log",
+ metavar="FILE",
+ type="string",
+ help="log file.")
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+
+from ruffus.proxy_logger import *
+import logging
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Shared logging
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def create_custom_file_func(params):
+ """
+ creates function which can be used as input to @files_func
+ """
+ def cust_func ():
+ for job_param in params:
+ yield job_param
+ return cust_func
+
+def is_job_uptodate (infiles, outfiles, *extra_params):
+ """
+ assumes first two parameters are files, checks if they are up to date
+ """
+ return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
+
+def test_post_task_function ():
+ print("Hooray")
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+
+ # dump parameters
+ params = (infiles, outfiles)# + extra_params[0:-3]
+
+ logger_proxy, logging_mutex = extra_params
+ with logging_mutex:
+ logger_proxy.debug("job = %s, process name = %s" %
+ (json.dumps(params),
+ multiprocessing.current_process().name))
+
+
+ sys.stdout.write(' job = %s\n' % json.dumps(params))
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+if __name__ == '__main__':
+
+ # get help string
+ f =io.StringIO()
+ parser.print_help(f)
+ helpstr = f.getvalue()
+
+ #
+ # Get options
+ #
+ (options, remaining_args) = parser.parse_args()
+
+ args={}
+ args["file_name"] = options.log_file_name
+ args["level"] = logging.DEBUG
+ args["rotating"] = True
+ args["maxBytes"]=20000
+ args["backupCount"]=10
+ args["formatter"]="%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
+
+ (logger_proxy,
+ logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger,
+ "my_logger", args)
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+#
+# task1
+#
+ at files(None, os.path.abspath('a.1'), logger_proxy, logging_mutex)
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task2
+#
+ at files_re('*.1', '(.*).1', r'\1.1', r'\1.2', logger_proxy, logging_mutex)
+ at follows(task1)
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task3
+#
+ at files_re('*.1', '(.*).1', r'\1.2', r'\1.3', logger_proxy, logging_mutex)
+ at follows(task2)
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task4
+#
+ at files_re('*.1', '(.*).1', r'\1.3', r'\1.4', logger_proxy, logging_mutex)
+ at follows(task3)
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ try:
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose,
+ logger = logger_proxy)
+ except Exception as e:
+ print(e.args)
diff --git a/ruffus/test/test_N_x_M_and_collate.py b/ruffus/test/test_N_x_M_and_collate.py
new file mode 100755
index 0000000..3f3b3ab
--- /dev/null
+++ b/ruffus/test/test_N_x_M_and_collate.py
@@ -0,0 +1,425 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+if sys.hexversion < 0x03000000:
+ from future_builtins import zip, map
+"""
+
+ test_N_x_M_and_collate.py
+
+
+ This script takes N pairs of input file pairs
+ (with the suffices .gene and .gwas)
+ and runs them against M sets of simulation data
+ (with the suffix .simulation)
+ A summary per input file pair is then produced
+
+
+ In pseudo-code:
+
+ STEP_1:
+
+ for n_file in NNN_pairs_of_input_files:
+ for m_file in MMM_simulation_data:
+
+ [n_file.gene,
+ n_file.gwas,
+ m_file.simulation] -> n_file.m_file.simulation_res
+
+
+ STEP_2:
+
+ for n_file in NNN_pairs_of_input_files:
+
+ n_file.*.simulation_res -> n_file.mean
+
+
+ n = CNT_GENE_GWAS_FILES
+ m = CNT_SIMULATION_FILES
+
+
+
+"""
+
+CNT_GENE_GWAS_FILES = 2
+CNT_SIMULATION_FILES = 3
+
+
+
+import os, sys
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+
+
+from ruffus import *
+import random
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-D", "--debug", dest = "debug",
+ action="store_true", default=False,
+ help="Run as unit test with default values.")
+parser.add_option("-k", "--keep", dest = "keep",
+ action="store_true", default=False,
+ help="Do not cleanup after unit test runs.")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = ["statistical_summary"],
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=5,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+
+parser.add_option("-g", "--gene_data_dir", dest="gene_data_dir",
+ default="%s/temp_gene_data_for_intermediate_example" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with gene data [*.genes / *.gwas].")
+parser.add_option("-s", "--simulation_data_dir", dest="simulation_data_dir",
+ default="%s/temp_simulation_data_for_intermediate_example" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Directory with simulation data [*.simulation].")
+parser.add_option("-w", "--working_dir", dest="working_dir",
+ default="%s/working_dir_for_intermediate_example" % exe_path,
+ metavar="PATH",
+ type="string",
+ help="Working directory.")
+
+
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re
+import operator
+import sys
+from collections import defaultdict
+import glob
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#_________________________________________________________________________________________
+#
+# get gene gwas file pairs
+#
+#_________________________________________________________________________________________
+def get_gene_gwas_file_pairs( ):
+ """
+ Helper function to get all *.gene, *.gwas from the direction specified
+ in --gene_data_dir
+
+ Returns
+ file pairs with both .gene and .gwas extensions,
+ corresponding roots (no extension) of each file
+ """
+
+
+ gene_files = glob.glob(os.path.join(options.gene_data_dir, "*.gene"))
+ gwas_files = glob.glob(os.path.join(options.gene_data_dir, "*.gwas"))
+
+ common_roots = set([os.path.splitext(os.path.split(x)[1])[0] for x in gene_files])
+ common_roots &=set([os.path.splitext(os.path.split(x)[1])[0] for x in gwas_files])
+ common_roots = list(common_roots)
+
+ p = os.path; g_dir = options.gene_data_dir
+
+ file_pairs = [[p.join(g_dir, x + ".gene"), p.join(g_dir, x + ".gwas")] for x in common_roots]
+
+ return file_pairs, common_roots
+
+#_________________________________________________________________________________________
+#
+# get simulation files
+#
+#_________________________________________________________________________________________
+def get_simulation_files( ):
+ """
+ Helper function to get all *.simulation from the direction specified
+ in --simulation_data_dir
+ Returns
+ file with .simulation extensions,
+ corresponding roots (no extension) of each file
+ """
+ simulation_files = glob.glob(os.path.join(options.simulation_data_dir, "*.simulation"))
+ simulation_roots =[os.path.splitext(os.path.split(x)[1])[0] for x in simulation_files]
+ return simulation_files, simulation_roots
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+working_dir = options.working_dir
+
+
+
+
+#_________________________________________________________________________________________
+#
+# setup_simulation_data
+#
+#_________________________________________________________________________________________
+
+#
+# mkdir: makes sure output directories exist before task
+#
+ at follows(mkdir(options.gene_data_dir, options.simulation_data_dir))
+def setup_simulation_data ():
+ """
+ create simulation files
+ """
+ for i in range(CNT_GENE_GWAS_FILES):
+ open(os.path.join(options.gene_data_dir, "%03d.gene" % i), "w")
+ open(os.path.join(options.gene_data_dir, "%03d.gwas" % i), "w")
+
+ # gene files without corresponding gwas and vice versa
+ open(os.path.join(options.gene_data_dir, "orphan1.gene"), "w")
+ open(os.path.join(options.gene_data_dir, "orphan2.gwas"), "w")
+ open(os.path.join(options.gene_data_dir, "orphan3.gwas"), "w")
+
+ for i in range(CNT_SIMULATION_FILES):
+ open(os.path.join(options.simulation_data_dir, "%03d.simulation" % i), "w")
+
+
+
+
+#_________________________________________________________________________________________
+#
+# cleanup_simulation_data
+#
+#_________________________________________________________________________________________
+def try_rmdir (d):
+ if os.path.exists(d):
+ try:
+ os.rmdir(d)
+ except OSError:
+ sys.stderr.write("Warning:\t%s is not empty and will not be removed.\n" % d)
+
+def cleanup_simulation_data ():
+ """
+ cleanup files
+ """
+ if options.verbose:
+ sys.stderr.write("Cleanup working directory and simulation files.\n")
+
+ #
+ # cleanup gene and gwas files
+ #
+ for f in glob.glob(os.path.join(options.gene_data_dir, "*.gene")):
+ os.unlink(f)
+ for f in glob.glob(os.path.join(options.gene_data_dir, "*.gwas")):
+ os.unlink(f)
+ try_rmdir(options.gene_data_dir)
+
+ #
+ # cleanup simulation
+ #
+ for f in glob.glob(os.path.join(options.simulation_data_dir, "*.simulation")):
+ os.unlink(f)
+ try_rmdir(options.simulation_data_dir)
+
+
+ #
+ # cleanup working_dir
+ #
+ for f in glob.glob(os.path.join(working_dir, "simulation_results", "*.simulation_res")):
+ os.unlink(f)
+ try_rmdir(os.path.join(working_dir, "simulation_results"))
+
+ for f in glob.glob(os.path.join(working_dir, "*.mean")):
+ os.unlink(f)
+ try_rmdir(working_dir)
+
+
+#_________________________________________________________________________________________
+#
+# Step 1:
+#
+# for n_file in NNN_pairs_of_input_files:
+# for m_file in MMM_simulation_data:
+#
+# [n_file.gene,
+# n_file.gwas,
+# m_file.simulation] -> working_dir/n_file.m_file.simulation_res
+#
+#_________________________________________________________________________________________
+def generate_simulation_params ():
+ """
+ Custom function to generate
+ file names for gene/gwas simulation study
+ """
+
+ simulation_files, simulation_file_roots = get_simulation_files()
+ gene_gwas_file_pairs, gene_gwas_file_roots = get_gene_gwas_file_pairs()
+
+ for sim_file, sim_file_root in zip(simulation_files, simulation_file_roots):
+ for (gene, gwas), gene_file_root in zip(gene_gwas_file_pairs, gene_gwas_file_roots):
+
+ result_file = "%s.%s.simulation_res" % (gene_file_root, sim_file_root)
+ result_file_path = os.path.join(working_dir, "simulation_results", result_file)
+
+ yield [gene, gwas, sim_file], result_file_path, gene_file_root, sim_file_root, result_file
+
+#
+# mkdir: makes sure output directories exist before task
+#
+ at follows(mkdir(options.working_dir, os.path.join(working_dir, "simulation_results")))
+ at files(generate_simulation_params)
+def gwas_simulation(input_files, result_file_path, gene_file_root, sim_file_root, result_file):
+ """
+ Dummy calculation of gene gwas vs simulation data
+ Normally runs in parallel on a computational cluster
+ """
+ (gene_file,
+ gwas_file,
+ simulation_data_file) = input_files
+
+ simulation_res_file = open(result_file_path, "w")
+ simulation_res_file.write("%s + %s -> %s\n" % (gene_file_root, sim_file_root, result_file))
+
+
+#_________________________________________________________________________________________
+#
+# Step 2:
+#
+# Statistical summary per gene/gwas file pair
+#
+# for n_file in NNN_pairs_of_input_files:
+# working_dir/simulation_results/n.*.simulation_res
+# -> working_dir/n.mean
+#
+#_________________________________________________________________________________________
+
+
+ at collate(gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")
+ at posttask(lambda : sys.stdout.write("\nOK\n"))
+def statistical_summary (result_files, summary_file):
+ """
+ Simulate statistical summary
+ """
+
+ summary_file = open(summary_file, "w")
+ for f in result_files:
+ summary_file.write(open(f).read())
+
+
+
+
+
+
+
+
+
+#888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# print pipeline or run pipeline
+#
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ try:
+ if options.debug:
+ if not len(options.target_tasks):
+ options.target_tasks.append([statistical_summary])
+ pipeline_run([setup_simulation_data], [setup_simulation_data], multiprocess = options.jobs, verbose = 0)
+ else:
+ if (not len(get_gene_gwas_file_pairs( )[0]) or
+ not len (get_simulation_files( )[0])):
+ print("Warning!!\n\n\tNo *.gene / *.gwas or *.simulation: Run --debug to create simulation files first\n\n")
+ sys.exit(1)
+
+
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks, verbose=options.verbose)
+
+ elif options.dependency_file:
+ graph_printout ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs, verbose = options.verbose)
+
+
+ if options.debug and not options.keep:
+ cleanup_simulation_data ()
+
+ except Exception as e:
+ print(e.args)
+ raise
+
diff --git a/ruffus/test/test_active_if.py b/ruffus/test/test_active_if.py
new file mode 100755
index 0000000..d8435b9
--- /dev/null
+++ b/ruffus/test/test_active_if.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_active_if.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+# add self to search path for testing
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+from ruffus import *
+
+parser = cmdline.get_argparse( description='Test @active_if')
+
+
+options = parser.parse_args()
+
+# optional logger which can be passed to ruffus tasks
+logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+
+
+import json
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+
+def helper (infiles, outfiles):
+ if not isinstance(infiles, (tuple, list)):
+ infiles = [infiles]
+ if not isinstance(outfiles, list):
+ outfiles = [outfiles]
+
+ output_text = ""
+ preamble_len = 0
+ for infile in infiles:
+ if infile:
+ for line in open(infile):
+ output_text += line
+ preamble_len = max(preamble_len, len(line) - len(line.lstrip()))
+
+ preamble = " " * (preamble_len + 4) if len(output_text) else ""
+
+ for outfile in outfiles:
+ file_output_text = preamble + json.dumps(infile) + " -> " + json.dumps(outfile) + "\n"
+ open(outfile, "w").write(output_text + file_output_text)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+pipeline_active_if = True
+#
+# task1
+#
+ at follows(mkdir("test_active_if"))
+ at originate(['test_active_if/a.1', 'test_active_if/b.1'], "an extra_parameter")
+def task1(outfile, extra):
+ """
+ First task
+ """
+ sys.stderr.write("originate works with outfile '%s'" % outfile + " and " + extra + "\n")
+ helper (None, outfile)
+
+
+
+#
+# task2
+#
+ at transform(task1, suffix(".1"), ".2")
+def task2(infile, outfile):
+ """
+ Second task
+ """
+ helper (infile, outfile)
+
+
+#
+# task3
+#
+ at active_if(lambda:pipeline_active_if)
+ at transform(task1, suffix(".1"), ".3")
+def task3(infile, outfile):
+ """
+ Third task
+ """
+ helper (infile, outfile)
+
+
+
+#
+# task4
+#
+ at collate([task2, task3], regex(r"(.+)\.[23]"), r"\1.4")
+def task4(infiles, outfile):
+ """
+ Fourth task
+ """
+ helper (infiles, outfile)
+
+#
+# task4
+#
+ at merge(task4, "test_active_if/summary.5")
+def task5(infiles, outfile):
+ """
+ Fifth task
+ """
+ helper (infiles, outfile)
+
+
+expected_active_text = """null -> "test_active_if/a.1"
+ "test_active_if/a.1" -> "test_active_if/a.2"
+null -> "test_active_if/a.1"
+ "test_active_if/a.1" -> "test_active_if/a.3"
+ "test_active_if/a.3" -> "test_active_if/a.4"
+null -> "test_active_if/b.1"
+ "test_active_if/b.1" -> "test_active_if/b.2"
+null -> "test_active_if/b.1"
+ "test_active_if/b.1" -> "test_active_if/b.3"
+ "test_active_if/b.3" -> "test_active_if/b.4"
+ "test_active_if/b.4" -> "test_active_if/summary.5"
+"""
+
+expected_inactive_text = """null -> "test_active_if/a.1"
+ "test_active_if/a.1" -> "test_active_if/a.2"
+ "test_active_if/a.2" -> "test_active_if/a.4"
+null -> "test_active_if/b.1"
+ "test_active_if/b.1" -> "test_active_if/b.2"
+ "test_active_if/b.2" -> "test_active_if/b.4"
+ "test_active_if/b.4" -> "test_active_if/summary.5"
+"""
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+
+ # active run
+ cmdline.run (options)
+ active_text = open("test_active_if/summary.5").read()
+ if active_text != expected_active_text:
+ raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (active_text, expected_active_text))
+ os.system("rm -rf test_active_if")
+
+
+ # inactive run
+ pipeline_active_if = False
+ cmdline.run (options)
+ inactive_text = open("test_active_if/summary.5").read()
+ if inactive_text != expected_inactive_text:
+ raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (inactive_text, expected_inactive_text))
+ os.system("rm -rf test_active_if")
+
diff --git a/ruffus/test/test_branching_dependencies.py b/ruffus/test/test_branching_dependencies.py
new file mode 100755
index 0000000..44a2bcd
--- /dev/null
+++ b/ruffus/test/test_branching_dependencies.py
@@ -0,0 +1,452 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ branching.py
+
+ test branching dependencies
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+
+import re
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("--touch_files_only", dest = "touch_files_only",
+ action="store_true", default=False,
+ help="Do not run pipeline. Only touch.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import time
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+import ruffus
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ open(f, "w").write(output_text)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# 1 -> 2 -> 3 ->
+# -> 4 ->
+# 5 -> 6
+#
+
+tempdir = "temp_branching_dir/"
+#
+# task1
+#
+ at originate([tempdir + d for d in ('a.1', 'b.1', 'c.1')])
+ at follows(mkdir(tempdir))
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 1 Done\n"))
+def task1(outfile, *extra_params):
+ """
+ First task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([None, outfile]))
+ test_job_io(None, outfile, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([None, outfile]))
+
+
+#
+# task2
+#
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 2 Done\n"))
+ at transform(task1, suffix(".1"), ".2")
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+
+#
+# task3
+#
+ at transform(task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3')
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 3 Done\n"))
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+
+#
+# task4
+#
+ at jobs_limit(1)
+ at transform(tempdir + "*.1", suffix(".1"), ".4")
+ at follows(task1)
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 4 Done\n"))
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task is extra slow
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ time.sleep(0.1)
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+#
+# task5
+#
+ at files(None, tempdir + 'a.5')
+ at follows(mkdir(tempdir))
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 5 Done\n"))
+def task5(infiles, outfiles, *extra_params):
+ """
+ Fifth task is extra slow
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ time.sleep(1)
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+#
+# task6
+#
+#@files([[[tempdir + d for d in 'a.3', 'b.3', 'c.3', 'a.4', 'b.4', 'c.4', 'a.5'], tempdir + 'final.6']])
+ at merge([task3, task4, task5], tempdir + "final.6")
+ at follows(task3, task4, task5, )
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 6 Done\n"))
+def task6(infiles, outfiles, *extra_params):
+ """
+ final task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+
+
+
+def check_job_order_correct(filename):
+ """
+ 1 -> 2 -> 3 ->
+ -> 4 ->
+ 5 -> 6
+ """
+
+ precedence_rules = [[1, 2],
+ [2, 3],
+ [1, 4],
+ [5, 6],
+ [3, 6],
+ [4, 6]]
+
+ index_re = re.compile(r'.*\.([0-9])["\]\n]*$')
+ job_indices = defaultdict(list)
+ for linenum, l in enumerate(open(filename)):
+ m = index_re.search(l)
+ if not m:
+ raise "Non-matching line in [%s]" % filename
+ job_indices[int(m.group(1))].append(linenum)
+
+ for job_index in job_indices:
+ job_indices[job_index].sort()
+
+ for before, after in precedence_rules:
+ if before not in job_indices or after not in job_indices:
+ continue
+ if job_indices[before][-1] >= job_indices[after][0]:
+ raise Exception("Precedence violated for job %d [line %d] and job %d [line %d] of [%s]"
+ % ( before, job_indices[before][-1],
+ after, job_indices[after][0],
+ filename))
+
+
+
+def check_final_output_correct(after_touch_files = False):
+ """
+ check if the final output in final.6 is as expected
+ """
+ expected_output = \
+""" ["DIR/a.1"] -> ["DIR/a.2"]
+ ["DIR/a.1"] -> ["DIR/a.4"]
+ ["DIR/a.2", "DIR/a.1"] -> ["DIR/a.3"]
+ ["DIR/a.3", "DIR/b.3", "DIR/c.3", "DIR/a.4", "DIR/b.4", "DIR/c.4", "DIR/a.5"] -> ["DIR/final.6"]
+ ["DIR/b.1"] -> ["DIR/b.2"]
+ ["DIR/b.1"] -> ["DIR/b.4"]
+ ["DIR/b.2", "DIR/a.1"] -> ["DIR/b.3"]
+ ["DIR/c.1"] -> ["DIR/c.2"]
+ ["DIR/c.1"] -> ["DIR/c.4"]
+ ["DIR/c.2", "DIR/a.1"] -> ["DIR/c.3"]
+ [] -> ["DIR/a.1"]
+ [] -> ["DIR/a.1"]
+ [] -> ["DIR/a.1"]
+ [] -> ["DIR/a.1"]
+ [] -> ["DIR/a.1"]
+ [] -> ["DIR/a.5"]
+ [] -> ["DIR/b.1"]
+ [] -> ["DIR/b.1"]
+ [] -> ["DIR/c.1"]
+ [] -> ["DIR/c.1"]"""
+
+
+ expected_output = expected_output.replace(" ", "").replace("DIR/", tempdir).split("\n")
+ orig_expected_output = expected_output
+ if after_touch_files:
+ expected_output.pop(-3)
+ final_6_contents = sorted([l.rstrip() for l in open(tempdir + "final.6", "r").readlines()])
+ if final_6_contents != expected_output:
+ print("Actual:", file=sys.stderr)
+ for ll in final_6_contents:
+ print(ll, file=sys.stderr)
+ print("_" * 80, file=sys.stderr)
+ print("Expected:", file=sys.stderr)
+ for ll in orig_expected_output:
+ print(ll, file=sys.stderr)
+ print("_" * 80, file=sys.stderr)
+ for i, (l1, l2) in enumerate(zip(final_6_contents, expected_output)):
+ if l1 != l2:
+ sys.stderr.write("%d\nActual:\n >%s<\nExpected:\n >%s<\n" % (i, l1, l2))
+ raise Exception ("Final.6 output is not as expected\n")
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ print("Python version %s" % sys.version, file=sys.stderr)
+ print("Ruffus version %s" % ruffus.__version__, file=sys.stderr)
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose=options.verbose)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ no_key_legend = options.no_key_legend_in_graph)
+
+ elif options.debug:
+ import os
+ os.system("rm -rf %s" % tempdir)
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ verbose = options.verbose)
+
+
+ check_final_output_correct()
+ check_job_order_correct(tempdir + "jobs.start")
+ check_job_order_correct(tempdir + "jobs.finish")
+
+
+ #
+ # check touch file works, running the pipeline leaving an empty file where b.1
+ # would be
+ #
+ if options.touch_files_only:
+ #
+ # remove these because the precedence for the two runs must not be mixed together
+ #
+ os.unlink(os.path.join(tempdir, "jobs.start") )
+ os.unlink(os.path.join(tempdir, "jobs.finish") )
+
+ #
+ # remove b.1 and touch
+ #
+ if options.verbose:
+ print("\n\nNow just delete b.1 for task2...\n")
+ os.unlink(os.path.join(tempdir, "b.1"))
+ pipeline_run([task2], options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose,
+ touch_files_only = options.touch_files_only)
+
+
+ #
+ # Now wait for the empty b.1 to show up in the output
+ #
+ if options.verbose:
+ print("\n\nRun normally...\n")
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+ check_final_output_correct(options.touch_files_only)
+ check_job_order_correct(tempdir + "jobs.start")
+ check_job_order_correct(tempdir + "jobs.finish")
+
+
+
+ print("OK")
+ import shutil
+ shutil.rmtree(tempdir)
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose, touch_files_only = options.touch_files_only)
+ print("OK")
diff --git a/ruffus/test/test_cmdline.py b/ruffus/test/test_cmdline.py
new file mode 100755
index 0000000..67d35ea
--- /dev/null
+++ b/ruffus/test/test_cmdline.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_cmdline.py
+
+
+
+"""
+
+
+import unittest
+import os, re
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+from ruffus.cmdline import handle_verbose
+import ruffus.cmdline as cmdline
+
+# mock for command line options
+class t_options(object):
+ def __str__(self):
+ return self.__dict__
+ def __repr__(self):
+ return str(self.__dict__)
+
+
+class Test_cmdline(unittest.TestCase):
+ #def setUp(self):
+ #if sys.hexversion < 0x03000000:
+ # self.assertRaisesRegex = self.assertRaisesRegexp
+
+
+ #___________________________________________________________________________
+ #
+ # test_something()
+ #___________________________________________________________________________
+ #def test_something(self):
+ # s = StringIO()
+ # cleanup_tmpdir()
+ # pipeline_printout(s, [test_regex_task], verbose=5, wrap_width = 10000)
+ # self.assertTrue(re.search('Missing files\n\s+\[tmp_test_regex_error_messages/a_name.tmp1, tmp_test_regex_error_messages/a_name.tmp2', s.getvalue()))
+ # self.assertIn("Warning: File match failure: File 'tmp_test_regex_error_messages/a_name.tmp1' does not match regex", s.getvalue())
+ # self.assertRaisesRegex(fatal_error_input_file_does_not_match,
+ # "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*unknown group name",
+ # pipeline_printout,
+ # s, [test_regex_misspelt_capture_error_task],
+ # verbose = 3)
+
+
+ #___________________________________________________________________________
+ #
+ # cleanup
+ #___________________________________________________________________________
+ #def tearDown(self):
+ # pass
+ # shutil.rmtree(workdir)
+ #
+ #
+
+ #_____________________________________________________________________________________
+ #
+ # test_verbose
+ #_____________________________________________________________________________________
+ def test_verbose (self):
+ """
+ --verbose on its own increases the verbosity by one
+ --verbose NNN (re)sets the verbosity to NNN whatever the previous state
+ """
+
+ # options.verbose defined by user to be None
+ options = t_options()
+ setattr(options, "verbose", None)
+ handle_verbose(options)
+ self.assertTrue(options.verbose==None)
+ self.assertTrue(options.verbose_abbreviated_path == None)
+
+ # options.verbose defined by user to be 0
+ options = t_options()
+ setattr(options, "verbose", 0)
+ handle_verbose(options)
+ self.assertTrue(options.verbose==0)
+ self.assertTrue(options.verbose_abbreviated_path == None)
+
+ # options.verbose defined by user to be "6"
+ options = t_options()
+ setattr(options, "verbose", "6")
+ handle_verbose(options)
+ self.assertTrue(options.verbose==6)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ # options.verbose defined by user to be 6
+ options = t_options()
+ setattr(options, "verbose", 6)
+ handle_verbose(options)
+ self.assertTrue(options.verbose==6)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+
+ # options.verbose defined by user to "+"
+ options = t_options()
+ setattr(options, "verbose", "+")
+ handle_verbose(options)
+ self.assertTrue(options.verbose==1)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ # --verbose not set
+ options = t_options()
+ setattr(options, "verbose", [])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==0)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ # --verbose
+ options = t_options()
+ setattr(options, "verbose", ["+"])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==1)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ # --verbose --verbose 5 --verbose
+ options = t_options()
+ setattr(options, "verbose", ["+", "5", "+"])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==6)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+
+ # --verbose --verbose 5 --verbose --verbose 4
+ # last value overrides the 5
+ options = t_options()
+ setattr(options, "verbose", ["+", "5", "+", "4"])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==4)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+
+ #_____________________________________________________________________________________
+ #
+ # test_verbose_abbreviated_path
+ #_____________________________________________________________________________________
+ def test_verbose_abbreviated_path (self):
+ """
+ --verbose NNN:MMM sets the verbose_abbreviated_path to MMM
+ """
+
+ #
+ # do not override users' verbose_abbreviated_path
+ #
+ options = t_options()
+ # take verbose_abbreviated_path
+ setattr(options, "verbose", ["+", "5", "+", "4:3"])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==4)
+ self.assertTrue(options.verbose_abbreviated_path==3)
+ # do not override users' verbose_abbreviated_path
+ setattr(options, "verbose", ["+", "5", "+", "7:5"])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==7)
+ self.assertTrue(options.verbose_abbreviated_path==3)
+
+
+ options = t_options()
+ # take verbose_abbreviated_path
+ setattr(options, "verbose", ["+", "5:3", "+", "7:5", "+"])
+ handle_verbose(options)
+ self.assertTrue(options.verbose==8)
+ self.assertTrue(options.verbose_abbreviated_path==5)
+
+ #_____________________________________________________________________________________
+ #
+ # test_argparse
+ #_____________________________________________________________________________________
+ def test_argparse(self):
+ """
+ Same as above but setting up options using ruffus.cmdline.get_argparse
+ --verbose on its own increases the verbosity by one
+ --verbose NNN (re)sets the verbosity to NNN whatever the previous state
+ --verbose NNN:MMM sets the verbose_abbreviated_path to MMM
+ """
+
+ parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')
+
+ import sys
+
+ sys.argv = ["test", "--verbose", "--verbose=2"]
+ options = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==2)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ sys.argv = ["test", "--verbose", "--verbose=3", "--verbose"]
+ options = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==4)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose"]
+ options = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==6)
+ self.assertTrue(options.verbose_abbreviated_path==3)
+
+ sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose", "--verbose=7", "--verbose"]
+ options = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==8)
+ self.assertTrue(options.verbose_abbreviated_path==3)
+
+ sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose", "--verbose=7:5", "--verbose"]
+ options = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==8)
+ self.assertTrue(options.verbose_abbreviated_path==5)
+
+
+ #_____________________________________________________________________________________
+ #
+ # test_optparse
+ #_____________________________________________________________________________________
+ def test_optparse(self):
+ """
+ Same as above but setting up options using ruffus.cmdline.get_opt-parse
+ --verbose on its own increases the verbosity by one
+ --verbose NNN (re)sets the verbosity to NNN whatever the previous state
+ --verbose NNN:MMM sets the verbose_abbreviated_path to MMM
+ """
+ parser = cmdline.get_optparse(usage='WHAT DOES THIS PIPELINE DO?')
+
+ sys.argv = ["test", "--verbose", "--verbose=2"]
+ (options, remaining_args) = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==2)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ sys.argv = ["test", "--verbose", "--verbose=3", "--verbose"]
+ (options, remaining_args) = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==4)
+ self.assertTrue(options.verbose_abbreviated_path==None)
+
+ sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose"]
+ (options, remaining_args) = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==6)
+ self.assertTrue(options.verbose_abbreviated_path==3)
+
+ sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose", "--verbose=7", "--verbose"]
+ (options, remaining_args) = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==8)
+ self.assertTrue(options.verbose_abbreviated_path==3)
+
+ sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose", "--verbose=7:5", "--verbose"]
+ (options, remaining_args) = parser.parse_args()
+ handle_verbose(options)
+ self.assertTrue(options.verbose==8)
+ self.assertTrue(options.verbose_abbreviated_path==5)
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ #pipeline_printout(sys.stdout, [test_product_task], verbose = 3)
+ unittest.main()
diff --git a/ruffus/test/test_collate.py b/ruffus/test/test_collate.py
new file mode 100755
index 0000000..b7bd445
--- /dev/null
+++ b/ruffus/test/test_collate.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_collate.py
+
+ test branching dependencies
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+species_list = defaultdict(list)
+species_list["mammals"].append("cow" )
+species_list["mammals"].append("horse" )
+species_list["mammals"].append("sheep" )
+species_list["reptiles"].append("snake" )
+species_list["reptiles"].append("lizard" )
+species_list["reptiles"].append("crocodile" )
+species_list["fish" ].append("pufferfish")
+
+
+tempdir = "temp_filesre_combine/"
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# task1
+#
+ at follows(mkdir(tempdir, tempdir + "test"))
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 1 Done\n"))
+ at split(None, tempdir + '*.animal')
+def prepare_files (no_inputs, outputs):
+ # cleanup previous
+ for f in outputs:
+ os.unlink(f)
+
+ for grouping in species_list:
+ for species_name in species_list[grouping]:
+ filename = tempdir + "%s.%s.animal" % (species_name, grouping)
+ open(filename, "w").write(species_name + "\n")
+
+
+#
+# task2
+#
+ at collate(prepare_files, regex(r'(.*/).*\.(.*)\.animal'), r'\1\2.results')
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 2 Done\n"))
+def summarise_by_grouping(infiles, outfile):
+ """
+ Summarise by each species group, e.g. mammals, reptiles, fish
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfile]))
+ o = open(outfile, "w")
+ for i in infiles:
+ o.write(open(i).read())
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfile]))
+
+
+
+
+
+
+
+def check_species_correct():
+ """
+ #cow.mammals.animal
+ #horse.mammals.animal
+ #sheep.mammals.animal
+ # -> mammals.results
+ #
+ #snake.reptiles.animal
+ #lizard.reptiles.animal
+ #crocodile.reptiles.animal
+ # -> reptiles.results
+ #
+ #pufferfish.fish.animal
+ # -> fish.results
+ """
+ for grouping in species_list:
+ assert(open(tempdir + grouping + ".results").read() ==
+ "".join(s + "\n" for s in sorted(species_list[grouping])))
+
+
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ import os
+ os.system("rm -rf %s" % tempdir)
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose > 1)
+
+
+ check_species_correct()
+ os.system("rm -rf %s" % tempdir)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose > 1)
+
+
diff --git a/ruffus/test/test_combinatorics.py b/ruffus/test/test_combinatorics.py
new file mode 100755
index 0000000..a4b5d04
--- /dev/null
+++ b/ruffus/test/test_combinatorics.py
@@ -0,0 +1,571 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_combinatorics.py
+
+ test product, combine, permute, combine_with_replacement
+
+"""
+
+
+import unittest
+import os
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+import re
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+from ruffus.combinatorics import *
+from ruffus.ruffus_exceptions import RethrownJobError
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS)
+
+workdir = 'tmp_test_combinatorics'
+#sub-1s resolution in system?
+one_second_per_job = None
+
+
+def touch (filename):
+ with open(filename, "w"):
+ pass
+
+
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at originate([workdir + "/" + prefix + "_name.tmp1" for prefix in "abcd"])
+def generate_initial_files1(out_name):
+ with open(out_name, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at originate([workdir + "/e_name.tmp1", workdir + "/f_name.tmp1"])
+def generate_initial_files2(out_name):
+ with open(out_name, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at originate([workdir + "/g_name.tmp1", workdir + "/h_name.tmp1"])
+def generate_initial_files3(out_name):
+ with open(out_name, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# test_product_task
+#___________________________________________________________________________
+ at follows(generate_initial_files1)
+ at product(
+ [workdir + "/" + prefix + "_name.tmp1" for prefix in "abcd"],
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ generate_initial_files2,
+ formatter(),
+ generate_initial_files3,
+ formatter(r"tmp1$" ),
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.{basename[2][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}{basename[2][0][0]}", # extra: prefices only (abcd etc)
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_product_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ with open(outfile, "w") as p:
+ p.write(prefices + ",")
+
+
+#___________________________________________________________________________
+#
+# test_product_merged_task
+#___________________________________________________________________________
+ at merge(test_product_task, workdir + "/merged.results")
+def test_product_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+#___________________________________________________________________________
+#
+# test_product_misspelt_capture_error_task
+#___________________________________________________________________________
+ at product(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ "{path[0][0]}/{FILEPART[0][0]}.tmp2")
+def test_product_misspelt_capture_error_task( infiles, outfile):
+ """
+ FILE_PART mispelt as FILE_PART
+ """
+ with open(outfile, "w") as p: pass
+
+
+#___________________________________________________________________________
+#
+# test_product_out_of_range_formatter_ref_error_task
+#___________________________________________________________________________
+ at product(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ "{path[2][0]}/{basename[0][0]}.tmp2",
+ "{FILE_PART[0][0]}")
+def test_product_out_of_range_formatter_ref_error_task( infiles, outfile, ignored_filter):
+ """
+ {path[2][0]} when len(path) == 1
+ """
+ with open(outfile, "w") as p: pass
+
+#___________________________________________________________________________
+#
+# test_product_formatter_ref_index_error_task
+#___________________________________________________________________________
+ at product(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ "{path[0][0][1000]}/{basename[0][0]}.tmp2",
+ "{FILE_PART[0][0]}")
+def test_product_formatter_ref_index_error_task( infiles, outfile, ignored_filter):
+ """
+ {path[0][0][1000} when len of the path string len(path[0][0]) < 1000
+ """
+ with open(outfile, "w") as p: pass
+
+#___________________________________________________________________________
+#
+# test_combinations2_task
+#___________________________________________________________________________
+ at combinations(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ 2,
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}", # extra: prefices
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_combinations2_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ """
+ Test combinations with k-tuple = 2
+ """
+ with open(outfile, "w") as outf:
+ outf.write(prefices + ",")
+
+
+ at merge(test_combinations2_task, workdir + "/merged.results")
+def test_combinations2_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+#___________________________________________________________________________
+#
+# test_combinations3_task
+#___________________________________________________________________________
+ at combinations(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ 3,
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.{basename[2][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}{basename[2][0][0]}", # extra: prefices
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_combinations3_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ """
+ Test combinations with k-tuple = 3
+ """
+ with open(outfile, "w") as outf:
+ outf.write(prefices + ",")
+
+ at merge(test_combinations3_task, workdir + "/merged.results")
+def test_combinations3_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+
+#___________________________________________________________________________
+#
+# test_permutations2_task
+#___________________________________________________________________________
+ at permutations(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ 2,
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}", # extra: prefices
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_permutations2_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ """
+ Test permutations with k-tuple = 2
+ """
+ with open(outfile, "w") as outf:
+ outf.write(prefices + ",")
+
+ at merge(test_permutations2_task, workdir + "/merged.results")
+def test_permutations2_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+
+#___________________________________________________________________________
+#
+# test_permutations3_task
+#___________________________________________________________________________
+ at permutations(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ 3,
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.{basename[2][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}{basename[2][0][0]}", # extra: prefices
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_permutations3_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ """
+ Test permutations with k-tuple = 3
+ """
+ with open(outfile, "w") as outf:
+ outf.write(prefices + ",")
+
+ at merge(test_permutations3_task, workdir + "/merged.results")
+def test_permutations3_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+
+
+#___________________________________________________________________________
+#
+# test_combinations_with_replacement2_task
+#___________________________________________________________________________
+ at combinations_with_replacement(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ 2,
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}", # extra: prefices
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_combinations_with_replacement2_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ """
+ Test combinations_with_replacement with k-tuple = 2
+ """
+ with open(outfile, "w") as outf:
+ outf.write(prefices + ",")
+
+ at merge(test_combinations_with_replacement2_task, workdir + "/merged.results")
+def test_combinations_with_replacement2_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+
+#___________________________________________________________________________
+#
+# test_combinations_with_replacement3_task
+#___________________________________________________________________________
+ at combinations_with_replacement(
+ generate_initial_files1,
+ formatter(".*/(?P<FILE_PART>.+).tmp1$" ),
+ 3,
+ "{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.{basename[2][0]}.tmp2",
+ "{basename[0][0][0]}{basename[1][0][0]}{basename[2][0][0]}", # extra: prefices
+ "{subpath[0][0][0]}", # extra: path for 2nd input, 1st file
+ "{subdir[0][0][0]}")
+def test_combinations_with_replacement3_task( infiles, outfile,
+ prefices,
+ subpath,
+ subdir):
+ """
+ Test combinations_with_replacement with k-tuple = 3
+ """
+ with open(outfile, "w") as outf:
+ outf.write(prefices + ",")
+
+ at merge(test_combinations_with_replacement3_task, workdir + "/merged.results")
+def test_combinations_with_replacement3_merged_task( infiles, outfile):
+ with open(outfile, "w") as p:
+ for infile in sorted(infiles):
+ with open(infile) as ii:
+ p.write(ii.read())
+
+
+
+def cleanup_tmpdir():
+ os.system('rm -f %s %s' % (os.path.join(workdir, '*'), RUFFUS_HISTORY_FILE))
+
+
+class TestCombinatorics(unittest.TestCase):
+ def setUp(self):
+ try:
+ os.mkdir(workdir)
+ except OSError:
+ pass
+
+ #___________________________________________________________________________
+ #
+ # test product() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_product_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+ s = StringIO()
+ pipeline_printout(s, [test_product_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('Job needs update: Missing files\n\s+'
+ '\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/e_name.tmp1, '
+ '.*tmp_test_combinatorics/h_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.e_name.h_name.tmp2\]', s.getvalue()))
+
+ def test_product_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_product_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ "aeg,aeh,afg,afh,beg,beh,bfg,bfh,ceg,ceh,cfg,cfh,deg,deh,dfg,dfh,")
+
+ #___________________________________________________________________________
+ #
+ # test product() pipeline_printout diagnostic error messsages
+ #
+ # require verbose >= 3 or an empty jobs list
+ #___________________________________________________________________________
+ def test_product_misspelt_capture_error(self):
+ """Misspelt named capture group
+ Requires verbose >= 3 or an empty jobs list
+ """
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_product_misspelt_capture_error_task], verbose=3, wrap_width = 10000)
+ self.assertIn("Warning: File match failure: Unmatched field 'FILEPART'", s.getvalue())
+
+
+ def test_product_out_of_range_formatter_ref_error(self):
+ """
+ {path[2][0]} when len(path) == 1
+ Requires verbose >= 3 or an empty jobs list
+ """
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_product_out_of_range_formatter_ref_error_task], verbose=3, wrap_width = 10000)
+ self.assertIn("Warning: File match failure: Unmatched field 2", s.getvalue())
+
+ def test_product_formatter_ref_index_error(self):
+ """
+ {path[0][0][1000} when len of the path string len(path[0][0]) < 1000
+ Requires verbose >= 3 or an empty jobs list
+ """
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_product_formatter_ref_index_error_task], verbose=3, wrap_width = 10000)
+ self.assertIn("Warning: File match failure: Unmatched field string index out of range", s.getvalue())
+ #print s.getvalue()
+
+
+ #___________________________________________________________________________
+ #
+ # test combinations() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_combinations2_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_combinations2_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('Job needs update: Missing files\n\s+'
+ '\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/b_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.b_name.tmp2\]', s.getvalue()))
+
+
+ def test_combinations2_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_combinations2_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ 'ab,ac,ad,bc,bd,cd,')
+
+ #___________________________________________________________________________
+ #
+ # test combinations() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_combinations3_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_combinations3_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search(
+ '\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/b_name.tmp1, '
+ '.*tmp_test_combinatorics/c_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.b_name.c_name.tmp2\]', s.getvalue()))
+
+ def test_combinations3_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_combinations3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ "abc,abd,acd,bcd,")
+
+
+ #___________________________________________________________________________
+ #
+ # test permutations() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_permutations2_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_permutations2_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/b_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.b_name.tmp2\]', s.getvalue()))
+
+ def test_permutations2_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_permutations2_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ "ab,ac,ad,ba,bc,bd,ca,cb,cd,da,db,dc,")
+
+ #___________________________________________________________________________
+ #
+ # test permutations() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_permutations3_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_permutations3_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/b_name.tmp1, '
+ '.*tmp_test_combinatorics/c_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.b_name.c_name.tmp2\]', s.getvalue()))
+
+ def test_permutations3_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_permutations3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ 'abc,abd,acb,acd,adb,adc,bac,bad,bca,bcd,bda,bdc,cab,cad,cba,cbd,cda,cdb,dab,dac,dba,dbc,dca,dcb,')
+
+
+ #___________________________________________________________________________
+ #
+ # test combinations_with_replacement() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_combinations_with_replacement2_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_combinations_with_replacement2_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/b_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.b_name.tmp2\]', s.getvalue()))
+
+ def test_combinations_with_replacement2_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_combinations_with_replacement2_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ "aa,ab,ac,ad,bb,bc,bd,cc,cd,dd,")
+
+ #___________________________________________________________________________
+ #
+ # test combinations_with_replacement() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_combinations_with_replacement3_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_combinations_with_replacement3_merged_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('\[.*tmp_test_combinatorics/a_name.tmp1, '
+ '.*tmp_test_combinatorics/b_name.tmp1, '
+ '.*tmp_test_combinatorics/c_name.tmp1, '
+ '.*tmp_test_combinatorics/a_name.b_name.c_name.tmp2\]', s.getvalue()))
+
+ def test_combinations_with_replacement3_run(self):
+ """Run product"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_combinations_with_replacement3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
+ with open(workdir + "/merged.results") as oo:
+ self.assertEqual(oo.read(),
+ 'aaa,aab,aac,aad,abb,abc,abd,acc,acd,add,bbb,bbc,bbd,bcc,bcd,bdd,ccc,ccd,cdd,ddd,')
+
+
+ #___________________________________________________________________________
+ #
+ # cleanup
+ #___________________________________________________________________________
+ def tearDown(self):
+ pass
+ shutil.rmtree(workdir)
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ #pipeline_printout(sys.stdout, [test_product_task], verbose = 5)
+ unittest.main()
diff --git a/ruffus/test/test_ctrl_c_exceptions.py b/ruffus/test/test_ctrl_c_exceptions.py
new file mode 100755
index 0000000..fb419ee
--- /dev/null
+++ b/ruffus/test/test_ctrl_c_exceptions.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_job_history_with_exceptions.py
+
+ Make sure that when an exception is thrown only the current and following tasks fail
+
+"""
+
+
+import unittest
+import os
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+import re
+import subprocess
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+#from ruffus.combinatorics import *
+from ruffus.ruffus_exceptions import RethrownJobError
+from ruffus.drmaa_wrapper import run_job
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS,
+ get_default_history_file_name)
+
+workdir = 'tmp_test_job_history_with_exceptions'
+#sub-1s resolution in system?
+one_second_per_job = None
+throw_exception = False
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at mkdir(workdir)
+ at originate([workdir + "/" + prefix + "_name.tmp1" for prefix in "abcdefghijk"])
+def generate_initial_files1(on):
+ with open(on, 'w') as outfile:
+ pass
+
+
+#___________________________________________________________________________
+#
+# test_task2
+#___________________________________________________________________________
+ at transform(generate_initial_files1,
+ suffix(".tmp1"), ".tmp2")
+def test_task2( infile, outfile):
+ print ("%s start to run " % infile)
+ run_job("./five_second.py", run_locally = True)
+ print ("%s wake up " % infile)
+ with open(outfile, "w") as p:
+ pass
+
+#___________________________________________________________________________
+#
+# test_task3
+#___________________________________________________________________________
+ at transform(test_task2, suffix(".tmp2"), ".tmp3")
+def test_task3( infile, outfile):
+ print ("%s start to run " % infile)
+ #subprocess.check_call("./five_second.py")
+ run_job("./five_second.py", run_locally = True)
+ print ("%s wake up " % infile)
+ with open(outfile, "w") as p:
+ pass
+
+
+def cleanup_tmpdir():
+ os.system('rm -f %s %s' % (os.path.join(workdir, '*'), RUFFUS_HISTORY_FILE))
+
+
+def do_main ():
+ print("Press Ctrl-C Now!!", file=sys.stdout)
+ sys.stdout.flush()
+ time.sleep(2)
+ print("Start....", file=sys.stdout)
+ sys.stdout.flush()
+ pipeline_run(verbose = 11,
+ multiprocess = 5)
+ print("too late!!", file=sys.stdout)
+ sys.stdout.flush()
+ cleanup_tmpdir()
+
+do_main()
diff --git a/ruffus/test/test_drmaa.py b/ruffus/test/test_drmaa.py
new file mode 100644
index 0000000..368e4b0
--- /dev/null
+++ b/ruffus/test/test_drmaa.py
@@ -0,0 +1,9 @@
+from ruffus import *
+from ruffus.drmaa_wrapper import *
+import drmaa
+drmaa_session = drmaa.Session()
+drmaa_session.initialize()
+logger, logger_mutex = cmdline.setup_logging ("me", "test.log", 1)
+job_queue_name = "short.qb"
+job_other_options='-P mott-flint.prjb'
+run_job_using_drmaa ("ls", job_queue_name, None, "test_job_name", job_other_options, logger, drmaa_session)
diff --git a/ruffus/test/test_empty_files_decorator.py b/ruffus/test/test_empty_files_decorator.py
new file mode 100755
index 0000000..900e0c6
--- /dev/null
+++ b/ruffus/test/test_empty_files_decorator.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_pausing.py
+
+ test time.sleep keeping input files and output file times correct
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+a = []
+
+#
+# task1
+#
+ at files(a)
+def task1():
+ """
+ First task
+ """
+ print("Task1", file=sys.stderr)
+
+
+import unittest
+
+class Test_task(unittest.TestCase):
+
+
+ def test_task (self):
+ class t_save_to_str_logger:
+ """
+ Everything to stderr
+ """
+ def __init__ (self):
+ self.info_str = ""
+ self.warning_str = ""
+ self.debug_str = ""
+ def info (self, message):
+ self.info_str += message
+ def warning (self, message):
+ self.warning_str += message
+ def debug (self, message):
+ self.debug_str += message
+
+ save_to_str_logger = t_save_to_str_logger()
+ pipeline_run([task1], options.forced_tasks, multiprocess = options.jobs,
+ logger = save_to_str_logger,
+ verbose = 1)
+ self.assertTrue("@files() was empty" in save_to_str_logger.warning_str)
+ print("\n Warning printed out correctly", file=sys.stderr)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ unittest.main()
+
diff --git a/ruffus/test/test_exceptions.py b/ruffus/test/test_exceptions.py
new file mode 100755
index 0000000..a995cbf
--- /dev/null
+++ b/ruffus/test/test_exceptions.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_exceptions.py
+
+ use :
+ --debug to test automatically
+ -j N / --jobs N to specify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import sys, os
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+
+
+from ruffus import *
+
+parser = cmdline.get_argparse( description='Test exceptions?')
+options = parser.parse_args()
+
+# optional logger which can be passed to ruffus tasks
+logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+ at parallel([['A', 1], ['B',3], ['C',3], ['D',4], ['E',4], ['F',4]])
+def parallel_task(name, param1):
+ if options.verbose:
+ sys.stderr.write(" Parallel task %s: \n\n" % name)
+ #raise task.JobSignalledBreak("Oops! I did it again!")
+ raise Exception("new")
+
+try:
+ cmdline.run (options, logger = logger, log_exceptions = True)
+except:
+ pass
+
+
diff --git a/ruffus/test/test_file_name_parameters.py b/ruffus/test/test_file_name_parameters.py
new file mode 100755
index 0000000..b61201b
--- /dev/null
+++ b/ruffus/test/test_file_name_parameters.py
@@ -0,0 +1,1518 @@
+#!/usr/bin/env python
+from __future__ import print_function
+################################################################################
+#
+# test_file_name_parameters
+#
+#
+# Copyright (c) 11/9/2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+
+ test_file_name_parameters.py
+
+"""
+
+
+import sys, os
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Unit Tests
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import sys
+import ruffus
+from ruffus import *
+from ruffus.file_name_parameters import *
+#print ruffus.__version__
+history_file = ':memory:'
+history_file = None
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+
+dumps = json.dumps
+
+exe_path = os.path.join(os.path.split(os.path.abspath(sys.argv[0]))[0], "..")
+test_path = os.path.normpath(os.path.join(exe_path, "test", "file_name_parameters"))
+
+
+
+
+def touch (filename):
+ with open(filename, "w"):
+ pass
+
+
+
+
+#=========================================================================================
+
+# args_param_factory
+
+#=========================================================================================
+import unittest, time
+import inspect
+def lineno():
+ """Returns the current line number in our program."""
+ return inspect.currentframe().f_back.f_lineno
+
+
+def double_parameters(params):
+ return [(p,p) for p in params]
+
+
+from random import randint
+class Test_args_param_factory(unittest.TestCase):
+
+ # self.assertEqual(self.seq, range(10))
+ # self.assertTrue(element in self.seq)
+ # self.assertRaises(ValueError, random.sample, self.seq, 20)
+
+ def forwarded_function (self, params):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ it = args_param_factory(params)
+ return list(it(None))
+
+ def test_single_job_per_task(self):
+ """
+ test convenience form for single job per task
+ """
+ self.assertEqual(self.forwarded_function([["file.input", "file.output", "other", 1]]),
+ double_parameters([['file.input', 'file.output', 'other', 1]]))
+
+ def test_multiple_jobs_per_task(self):
+ """
+ test normal form for multiple job per task
+ """
+ params = [
+ ["file0input", "file0.output", "other", 1],
+ ["file1input", "file1.output", "other", 1],
+ ["file2input", "file2.output", "other", 1],
+ ]
+ self.assertEqual(self.forwarded_function(params),
+ double_parameters(params))
+
+ def test_nested_multiple_jobs_per_task(self):
+ """
+ test normal form for multiple job per task
+ """
+ params = [
+ [[["file0input"]], "file0.output", "other", 1],
+ ["file1input", "file1.output", "other", 1],
+ ["file2input", "file2.output", "other", 1],
+ ]
+ self.assertEqual(self.forwarded_function(params),
+ double_parameters(params))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#=========================================================================================
+
+# files_re_param_factory
+
+#=========================================================================================
+
+def recursive_replace(p, from_s, to_s):
+ """
+ recursively replaces file name specifications using regular expressions
+ Non-strings are left alone
+ """
+ if isinstance(p, str):
+ return p.replace(from_s, to_s)
+ elif non_str_sequence (p):
+ return type(p)(recursive_replace(pp, from_s, to_s) for pp in p)
+ else:
+ return p
+
+
+def list_generator_factory (list):
+ def list_generator (ignored_args):
+ for i in list:
+ yield i, i
+ return list_generator
+
+l1 = [["input1", "output1.test"], [3, "output2.test"], ["input3", "output3.test"], [3, ["output4.test", "output.ignored"]], [], [4, 5]]
+l2 = [["output4.test", "output.ignored"]]
+l3 = []
+l4 = [[1, (2,"output5.test")]]
+t1 = task._task("module", "func1"); t1.param_generator_func = list_generator_factory(l1)
+t2 = task._task("module", "func2"); t2.param_generator_func = list_generator_factory(l2)
+t2._single_job_single_output = t2.single_job_single_output
+t3 = task._task("module", "func3"); t3.param_generator_func = list_generator_factory(l3)
+t4 = task._task("module", "func4"); t4.param_generator_func = list_generator_factory(l4)
+t4._single_job_single_output = t4.single_job_single_output
+t5 = task._task("module", "func5"); t5.param_generator_func = None
+
+next_task_id = 1
+class Test_files_re_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/f%d.output" % (test_path, 0))
+ for i in range(3):
+ touch("%s/f%d.test" % (test_path, i))
+ time.sleep(0.1)
+ touch("%s/f%d.output" % (test_path, 1))
+ touch("%s/f%d.output" % (test_path, 2))
+ self.tasks = [t1, t2, t3, t4, t5]
+
+
+
+ def tearDown(self):
+ for i in range(3):
+ os.unlink("%s/f%d.test" % (test_path, i))
+ for i in range(3):
+ os.unlink("%s/f%d.output" % (test_path, i))
+ os.removedirs(test_path)
+ pass
+
+
+ # self.assertEqual(self.seq, range(10))
+ # self.assertTrue(element in self.seq)
+ # self.assertRaises(ValueError, random.sample, self.seq, 20)
+
+ #def get_param_iterator (self, *old_args):
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ # use global incrementing index to avoid name clashes
+ #fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ global next_task_id
+ next_task_id += 1
+ fake_task = task._task("module", "func_fake%d" % next_task_id)
+ fake_task.task_files_re(orig_args)
+ return fake_task.param_generator_func, fake_task
+
+ def files_re (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ return list(p1 for (p1, ps) in self.get_param_iterator(*old_args)[0](None))
+ #return list(self.get_param_iterator (*old_args)(None))
+
+ def check_input_files_exist(self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ it = self.get_param_iterator(*old_args)[0]
+ for param, param2 in it(None):
+ check_input_files_exist (*param)
+ return True
+
+ def needs_update_check_modify_time(self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ it, task = self.get_param_iterator(*old_args)
+ #print >> sys.stderr, [p for (p, param2) in it(None)], "??"
+ return [needs_update_check_modify_time (*p, task=task,
+ job_history = open_job_history(history_file)) for (p, param2) in it(None)]
+
+
+ def test_combine(self):
+ """
+ test combining operator
+ """
+ paths = self.files_re(test_path + "/*", r"(.*).test$", combine(r"\1.input"), r"\1.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [
+ (('DIR/f0.input',), 'DIR/f0.output'),
+ (('DIR/f1.input',), 'DIR/f1.output'),
+ (('DIR/f2.input',), 'DIR/f2.output'),
+ ]
+ )
+ paths = self.files_re(test_path + "/*", "(.*).test$", combine(r"\1.input"), r"combined.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [(('DIR/f0.input',
+ 'DIR/f1.input',
+ 'DIR/f2.input'), 'combined.output')])
+
+
+ def test_glob(self):
+ """
+ test globbed form
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.files_re(test_path + "/*", "(.*).test$", r"\1.input", r"\1.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [('DIR/f0.input', 'DIR/f0.output'),
+ ('DIR/f1.input', 'DIR/f1.output'),
+ ('DIR/f2.input', 'DIR/f2.output')])
+ self.assertTrue(self.check_input_files_exist(test_path + "/*", "(.*).test$",
+ r"\1.test", r"\1.output"))
+
+
+ #
+ # nested forms
+ #
+ paths = self.files_re(test_path + "/*", "(.*).test$", [r"\1.input",2,["something", r"\1"]], r"\1.output", r"\1.extra", 5)
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [(['DIR/f0.input', 2, ['something', 'DIR/f0']], 'DIR/f0.output', 'DIR/f0.extra', 5),
+ (['DIR/f1.input', 2, ['something', 'DIR/f1']], 'DIR/f1.output', 'DIR/f1.extra', 5),
+ (['DIR/f2.input', 2, ['something', 'DIR/f2']], 'DIR/f2.output', 'DIR/f2.extra', 5)])
+
+ #
+ # only output
+ #
+ paths = self.files_re(test_path + "/*", ".*/(.*).test$", r"\1.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [('DIR/f0.test', 'f0.output'),
+ ('DIR/f1.test', 'f1.output'),
+ ('DIR/f2.test', 'f2.output')])
+
+ def test_globbed_up_to_date(self):
+ """
+ test glob form
+ """
+ #
+ # check simple is up to date
+ #
+ self.assertEqual([res[0] for res in self.needs_update_check_modify_time(test_path + "/*",
+ "(.*).test$", r"\1.output")], [True, False, False])
+ #
+ # check complex is up to date
+ #
+ self.assertEqual([res[0] for res in self.needs_update_check_modify_time(test_path + "/*",
+ "(.*).test$", [1,2,[[r"\1.output",
+ r"\1.output"]]])], [True, False, False])
+
+ def test_filelist(self):
+ """
+ test file list form
+ """
+ file_list = ["DIR/f0.test", "DIR/f1.test", "DIR/f2.test"]
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.files_re(file_list, r"(.*).test$", r"\1.input", r"\1.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [('DIR/f0.input', 'DIR/f0.output'),
+ ('DIR/f1.input', 'DIR/f1.output'),
+ ('DIR/f2.input', 'DIR/f2.output')])
+
+ #
+ # nested forms
+ #
+ paths = self.files_re(file_list, "(.*).test$", [r"\1.input",2,["something", r"\1"]], r"\1.output", r"\1.extra", 5)
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [(['DIR/f0.input', 2, ['something', 'DIR/f0']], 'DIR/f0.output', 'DIR/f0.extra', 5),
+ (['DIR/f1.input', 2, ['something', 'DIR/f1']], 'DIR/f1.output', 'DIR/f1.extra', 5),
+ (['DIR/f2.input', 2, ['something', 'DIR/f2']], 'DIR/f2.output', 'DIR/f2.extra', 5)])
+
+ #
+ # only output
+ #
+ paths = self.files_re(file_list, ".*/(.*).test$", r"\1.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [('DIR/f0.test', 'f0.output'),
+ ('DIR/f1.test', 'f1.output'),
+ ('DIR/f2.test', 'f2.output')])
+
+
+ def test_tasks(self):
+ """
+ test if can use tasks to specify dependencies
+ """
+
+ paths = self.files_re(task.output_from("module.func1",
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"), r"(.test)", r"\1.yes")
+ self.assertEqual(paths,
+ [
+ ((2, 'output5.test'), 'output5.test.yes'),
+ (['output4.test', 'output.ignored'], 'output4.test.yes'),
+ ('output1.test', 'output1.test.yes'),
+ ('output2.test', 'output2.test.yes'),
+ ('output3.test', 'output3.test.yes'),
+ ])
+
+
+ paths = self.files_re(task.output_from("module.func2"), r"(.ignored)", r"\1.yes")
+ self.assertEqual(paths,
+ [('output.ignored', 'output.ignored.yes')])
+ paths = self.files_re([task.output_from("module.func2")], r"(.ignored)", r"\1.yes")
+ self.assertEqual(paths,
+ [('output.ignored', 'output.ignored.yes')])
+
+
+
+
+
+
+
+
+
+
+
+
+
+#=========================================================================================
+
+# split_param_factory
+
+#=========================================================================================
+
+class Test_split_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/f%d.output" % (test_path, 0))
+ for i in range(3):
+ touch("%s/f%d.test" % (test_path, i))
+ time.sleep(0.1)
+ touch("%s/f%d.output" % (test_path, 1))
+ touch("%s/f%d.output" % (test_path, 2))
+
+ self.tasks = [t1, t2, t3, t4, t5]
+
+
+ def tearDown(self):
+ for i in range(3):
+ os.unlink("%s/f%d.test" % (test_path, i))
+ for i in range(3):
+ os.unlink("%s/f%d.output" % (test_path, i))
+ os.removedirs(test_path)
+ pass
+
+
+
+
+ #_____________________________________________________________________________
+
+ # wrappers
+
+ #_____________________________________________________________________________
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ fake_task.task_split(orig_args)
+ return fake_task.param_generator_func
+
+
+ def do_task_split (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ # extra dereference because we are only interested in the first (only) job
+ #return list(self.get_param_iterator (*old_args)(None))[0]
+ return list(p1 for (p1, ps) in self.get_param_iterator (*old_args)(None))[0]
+
+
+
+ def test_glob(self):
+ """
+ test globbed form
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_split(test_path + "/*", [exe_path + "/a*.py", exe_path + "/r*.py"])
+ self.assertEqual(recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR"),
+ ( ['DIR/f0.output',
+ 'DIR/f0.test',
+ 'DIR/f1.output',
+ 'DIR/f1.test',
+ 'DIR/f2.output',
+ 'DIR/f2.test',
+ ],
+ ['DIR/adjacent_pairs_iterate.py',
+ 'DIR/ruffus_exceptions.py',
+ 'DIR/ruffus_utility.py',
+ 'DIR/ruffus_version.py'
+ ] ))
+ def test_tasks(self):
+ """
+ test if can use tasks to specify dependencies
+ """
+
+ paths = self.do_task_split([task.output_from("module.func1", # input params
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"),
+ test_path + "/*"],
+ [exe_path + "/a*.py", # output params
+ exe_path + "/r*.py",
+ "extra.file"],
+ 6) # extra params
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR")
+ self.assertEqual(paths,
+ ([
+ 5,
+ ['output4.test', 'output.ignored'],
+ 'output1.test',
+ 'output2.test',
+ 'output3.test',
+ 'output.ignored',
+ (2, 'output5.test'),
+ 'DIR/f0.output',
+ 'DIR/f0.test',
+ 'DIR/f1.output',
+ 'DIR/f1.test',
+ 'DIR/f2.output',
+ 'DIR/f2.test'],
+ ['DIR/adjacent_pairs_iterate.py',
+ 'DIR/ruffus_exceptions.py',
+ 'DIR/ruffus_utility.py',
+ 'DIR/ruffus_version.py',
+ 'extra.file'],
+ 6))
+
+
+ # single job output consisting of a single file
+ paths = self.do_task_split(task.output_from("module.func2"), exe_path + "/a*.py")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, ('output.ignored', ['DIR_E/adjacent_pairs_iterate.py']))
+
+ paths = self.do_task_split([task.output_from("module.func2")], exe_path + "/a*.py")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, (['output.ignored'], ['DIR_E/adjacent_pairs_iterate.py']))
+
+ # single job output consisting of a list
+ paths = self.do_task_split(task.output_from("module.func4"), exe_path + "/a*.py")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, ((2, 'output5.test'), ['DIR_E/adjacent_pairs_iterate.py']) )
+
+ paths = self.do_task_split([task.output_from("module.func4")], exe_path + "/a*.py")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, ([(2, 'output5.test')], ['DIR_E/adjacent_pairs_iterate.py']))
+
+#=========================================================================================
+
+# merge_param_factory
+
+#=========================================================================================
+
+class Test_merge_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/f%d.output" % (test_path, 0))
+ for i in range(3):
+ touch("%s/f%d.test" % (test_path, i))
+ time.sleep(0.1)
+ touch("%s/f%d.output" % (test_path, 1))
+ touch("%s/f%d.output" % (test_path, 2))
+
+ self.tasks = [t1, t2, t3, t4, t5]
+
+
+ def tearDown(self):
+ for i in range(3):
+ os.unlink("%s/f%d.test" % (test_path, i))
+ for i in range(3):
+ os.unlink("%s/f%d.output" % (test_path, i))
+ os.removedirs(test_path)
+ pass
+
+
+
+
+ #_____________________________________________________________________________
+
+ # wrappers
+
+ #_____________________________________________________________________________
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ fake_task.task_merge(orig_args)
+ return fake_task.param_generator_func
+
+ def do_task_merge (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ # extra dereference because we are only interested in the first (only) job
+ return list(p1 for (p1, ps) in self.get_param_iterator (*old_args)(None))[0]
+
+
+
+ def test_glob(self):
+ """
+ test globbed form
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_merge(test_path + "/*",
+ ["test1", # output params
+ "test2",
+ "extra.file"])
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ ( ['DIR/f0.output',
+ 'DIR/f0.test',
+ 'DIR/f1.output',
+ 'DIR/f1.test',
+ 'DIR/f2.output',
+ 'DIR/f2.test',
+ ],
+ ["test1",
+ "test2",
+ "extra.file"]
+ ))
+ def test_tasks(self):
+ """
+ test if can use tasks to specify dependencies
+ """
+
+ paths = self.do_task_merge([task.output_from("module.func1", # input params
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"),
+ test_path + "/*"],
+ ["test1", # output params
+ "test2",
+ "extra.file"],
+ 6) # extra params
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ ([
+ 5,
+ ['output4.test', 'output.ignored'],
+ 'output1.test',
+ 'output2.test',
+ 'output3.test',
+ 'output.ignored',
+ (2, 'output5.test'),
+ 'DIR/f0.output',
+ 'DIR/f0.test',
+ 'DIR/f1.output',
+ 'DIR/f1.test',
+ 'DIR/f2.output',
+ 'DIR/f2.test'],
+ ['test1', # output params
+ 'test2',
+ 'extra.file'],
+ 6))
+ paths = self.do_task_merge(task.output_from("module.func2"), "output", "extra")
+ paths = self.do_task_merge(task.output_from("module.func1", "module.func2"), "output", "extra")
+
+ # single job output consisting of a single file
+ paths = self.do_task_merge(task.output_from("module.func2"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, ('output.ignored', 'output'))
+
+ paths = self.do_task_merge([task.output_from("module.func2")], "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, (['output.ignored'], 'output'))
+
+ # single job output consisting of a list
+ paths = self.do_task_merge(task.output_from("module.func4"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, ((2, 'output5.test'), 'output'))
+
+ paths = self.do_task_merge([task.output_from("module.func4")], "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, ([(2, 'output5.test')], 'output'))
+
+#=========================================================================================
+
+# transform_param_factory
+
+#=========================================================================================
+
+class Test_transform_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/f%d.output" % (test_path, 0))
+ for i in range(3):
+ touch("%s/f%d.test" % (test_path, i))
+ time.sleep(0.1)
+ touch("%s/f%d.output" % (test_path, 1))
+ touch("%s/f%d.output" % (test_path, 2))
+
+ self.tasks = [t1, t2, t3, t4, t5]
+ self.maxDiff = None
+
+
+ def tearDown(self):
+ for i in range(3):
+ os.unlink("%s/f%d.test" % (test_path, i))
+ for i in range(3):
+ os.unlink("%s/f%d.output" % (test_path, i))
+ os.removedirs(test_path)
+ pass
+
+
+
+
+ #_____________________________________________________________________________
+
+ # wrappers
+
+ #_____________________________________________________________________________
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ fake_task.task_transform(orig_args)
+ return fake_task.param_generator_func
+
+
+ def do_task_transform (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ # extra dereference because we are only interested in the first (only) job
+ return list(p1 for (p1, ps) in self.get_param_iterator (*old_args)(None))
+
+
+ def test_simple(self):
+ """
+ test simple_form
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_transform("a.test", task.regex("a(.+)"), r"b\1")
+
+ self.assertEqual(paths,
+ [('a.test', 'b.test')] )
+
+
+ def test_suffix(self):
+ """
+ test suffix transform with globs
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_transform(test_path + "/*.test", task.suffix(".test"),
+ [".output1", ".output2"], ".output3")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ('DIR/f0.test', ['DIR/f0.output1', 'DIR/f0.output2'], ".output3"),
+ ('DIR/f1.test', ['DIR/f1.output1', 'DIR/f1.output2'], ".output3"),
+ ('DIR/f2.test', ['DIR/f2.output1', 'DIR/f2.output2'], ".output3"),
+ ])
+ def test_formatter(self):
+ """
+ test suffix transform with globs
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_transform(test_path + "/*.test",
+ task.formatter("/(?P<name>\w+).test$"),
+ ["{path[0]}/{name[0]}.output1{ext[0]}", "{path[0]}/{name[0]}.output2"], "{path[0]}/{name[0]}.output3")
+ #["{0[path][0]}"], ".txt")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ('DIR/f0.test', ['DIR/f0.output1.test', 'DIR/f0.output2'], "DIR/f0.output3"),
+ ('DIR/f1.test', ['DIR/f1.output1.test', 'DIR/f1.output2'], "DIR/f1.output3"),
+ ('DIR/f2.test', ['DIR/f2.output1.test', 'DIR/f2.output2'], "DIR/f2.output3"),
+ ])
+ def test_regex(self):
+ """
+ test regex transform with globs
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_transform(test_path + "/*.test", task.regex(r"(.*)\.test"),
+ [r"\1.output1", r"\1.output2"], r"\1.output3")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ('DIR/f0.test', ['DIR/f0.output1', 'DIR/f0.output2'], "DIR/f0.output3"),
+ ('DIR/f1.test', ['DIR/f1.output1', 'DIR/f1.output2'], "DIR/f1.output3"),
+ ('DIR/f2.test', ['DIR/f2.output1', 'DIR/f2.output2'], "DIR/f2.output3"),
+ ])
+
+ def test_inputs(self):
+ """
+ test transform with inputs in both regex and suffix forms
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ #
+ paths = self.do_task_transform(test_path + "/*.test", task.regex(r"(.*)\.test"),
+ task.inputs(r"\1.testwhat"),
+ [r"\1.output1", r"\1.output2"])
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ('DIR/f0.testwhat', ['DIR/f0.output1', 'DIR/f0.output2']),
+ ('DIR/f1.testwhat', ['DIR/f1.output1', 'DIR/f1.output2']),
+ ('DIR/f2.testwhat', ['DIR/f2.output1', 'DIR/f2.output2']),
+ ])
+ paths = self.do_task_transform(test_path + "/*.test", task.suffix(".test"),
+ task.inputs(r"a.testwhat"),
+ [".output1", ".output2"], ".output3")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ('a.testwhat', ['DIR/f0.output1', 'DIR/f0.output2'], '.output3'),
+ ('a.testwhat', ['DIR/f1.output1', 'DIR/f1.output2'], '.output3'),
+ ('a.testwhat', ['DIR/f2.output1', 'DIR/f2.output2'], '.output3')])
+ #
+ # add inputs
+ #
+ #
+ paths = self.do_task_transform(test_path + "/*.test", task.regex(r"(.*)\.test"),
+ task.add_inputs(r"\1.testwhat"),
+ [r"\1.output1", r"\1.output2"])
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (('DIR/f0.test','DIR/f0.testwhat'), ['DIR/f0.output1', 'DIR/f0.output2']),
+ (('DIR/f1.test','DIR/f1.testwhat'), ['DIR/f1.output1', 'DIR/f1.output2']),
+ (('DIR/f2.test','DIR/f2.testwhat'), ['DIR/f2.output1', 'DIR/f2.output2']),
+ ])
+ paths = self.do_task_transform(test_path + "/*.test", task.suffix(".test"),
+ task.add_inputs(r"a.testwhat"),
+ [".output1", ".output2"], ".output3")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (('DIR/f0.test','a.testwhat'), ['DIR/f0.output1', 'DIR/f0.output2'], '.output3'),
+ (('DIR/f1.test','a.testwhat'), ['DIR/f1.output1', 'DIR/f1.output2'], '.output3'),
+ (('DIR/f2.test','a.testwhat'), ['DIR/f2.output1', 'DIR/f2.output2'], '.output3')])
+
+ def test_tasks(self):
+ """
+ test if can use tasks to specify dependencies
+ """
+
+ paths = self.do_task_transform([task.output_from( "module.func1", # input params
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"),
+ test_path + "/*.test"],
+ task.regex(r"(.*)\.test"),
+ [r"\1.output1", r"\1.output2"], r"\1.output3")
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ((2, 'output5.test'), ['output5.output1', 'output5.output2'], 'output5.output3'),
+ ('DIR/f0.test', ['DIR/f0.output1', 'DIR/f0.output2'], 'DIR/f0.output3'),
+ ('DIR/f1.test', ['DIR/f1.output1', 'DIR/f1.output2'], 'DIR/f1.output3'),
+ ('DIR/f2.test', ['DIR/f2.output1', 'DIR/f2.output2'], 'DIR/f2.output3'),
+ (['output4.test', 'output.ignored'], ['output4.output1', 'output4.output2'], 'output4.output3'),
+ ('output1.test', ['output1.output1', 'output1.output2'], 'output1.output3'),
+ ('output2.test', ['output2.output1', 'output2.output2'], 'output2.output3'),
+ ('output3.test', ['output3.output1', 'output3.output2'], 'output3.output3'),
+ ])
+
+
+ # single job output consisting of a single file
+ paths = self.do_task_transform(task.output_from("module.func2"), task.regex(r"(.*)\..*"), r"\1.output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [('output.ignored', 'output.output')])
+
+
+
+ # Same output if task specified as part of a list of tasks
+ paths = self.do_task_transform([task.output_from("module.func2")], task.regex(r"(.*)\..*"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [('output.ignored', 'output')])
+
+ # single job output consisting of a list
+ paths = self.do_task_transform(task.output_from("module.func4"), task.regex(r"(.*)\..*"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [((2, 'output5.test'), 'output')] )
+
+ # Same output if task specified as part of a list of tasks
+ paths = self.do_task_transform([task.output_from("module.func4")], task.regex(r"(.*)\..*"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [((2, 'output5.test'), 'output')] )
+
+#
+#
+#=========================================================================================
+
+# collate_param_factory
+
+#=========================================================================================
+
+class Test_collate_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/f%d.output" % (test_path, 0))
+ touch("%s/e%d.output" % (test_path, 0))
+ for i in range(3):
+ touch("%s/f%d.test" % (test_path, i))
+ for i in range(3):
+ touch("%s/e%d.test" % (test_path, i))
+ time.sleep(0.1)
+ touch("%s/f%d.output" % (test_path, 1))
+ touch("%s/f%d.output" % (test_path, 2))
+ touch("%s/e%d.output" % (test_path, 1))
+ touch("%s/e%d.output" % (test_path, 2))
+
+ self.tasks = [t1, t2, t3, t4, t5]
+
+
+ def tearDown(self):
+ for i in range(3):
+ os.unlink("%s/f%d.test" % (test_path, i))
+ os.unlink("%s/e%d.test" % (test_path, i))
+ for i in range(3):
+ os.unlink("%s/f%d.output" % (test_path, i))
+ os.unlink("%s/e%d.output" % (test_path, i))
+ os.removedirs(test_path)
+ pass
+
+
+
+
+ #_____________________________________________________________________________
+
+ # wrappers
+
+ #_____________________________________________________________________________
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ fake_task.task_collate(orig_args)
+ return fake_task.param_generator_func
+
+
+ def do_task_collate (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ # extra dereference because we are only interested in the first (only) job
+ return list(p1 for (p1, ps) in self.get_param_iterator (*old_args)(None))
+
+
+ def test_regex(self):
+ """
+ test regex collate with globs
+ """
+ paths = self.do_task_collate(test_path + "/*", task.regex(r"(.*).test$"), r"\1.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [
+ (('DIR/e0.test',), 'DIR/e0.output'),
+ (('DIR/e1.test',), 'DIR/e1.output'),
+ (('DIR/e2.test',), 'DIR/e2.output'),
+ (('DIR/f0.test',), 'DIR/f0.output'),
+ (('DIR/f1.test',), 'DIR/f1.output'),
+ (('DIR/f2.test',), 'DIR/f2.output'),
+ ]
+ )
+ paths = self.do_task_collate(test_path + "/*", task.regex("(.*).test$"), task.inputs(r"\1.input2"), r"combined.output")
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [((
+ 'DIR/e0.input2',
+ 'DIR/e1.input2',
+ 'DIR/e2.input2',
+ 'DIR/f0.input2',
+ 'DIR/f1.input2',
+ 'DIR/f2.input2',
+ ), 'combined.output')])
+
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_collate(test_path + "/*.test", task.regex(r"(.*/[ef]).*\.test"),
+ [r"\1.output1", r"\1.output2"], r"\1.extra")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (
+ ('DIR/e0.test', 'DIR/e1.test', 'DIR/e2.test'), # input
+ ['DIR/e.output1', 'DIR/e.output2'], # output
+ 'DIR/e.extra' # extra
+ ),
+ (
+ ('DIR/f0.test', 'DIR/f1.test', 'DIR/f2.test'), # input
+ ['DIR/f.output1', 'DIR/f.output2'], # output
+ 'DIR/f.extra' # extra
+ )
+ ] )
+ def test_inputs(self):
+ """
+ test collate with task.inputs
+ """
+ #
+ # collating using inputs
+ #
+ paths = self.do_task_collate(test_path + "/*.test", task.regex(r"(.*/[ef])(.).*\.test"),
+ task.inputs(r"\1\2.whoopee"), [r"\1.output1", r"\1.output2"], r"\1.extra")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (
+ ('DIR/e0.whoopee', 'DIR/e1.whoopee', 'DIR/e2.whoopee'), # input
+ ['DIR/e.output1', 'DIR/e.output2'], # output
+ 'DIR/e.extra' # extra
+ ),
+ (
+ ('DIR/f0.whoopee', 'DIR/f1.whoopee', 'DIR/f2.whoopee'), # input
+ ['DIR/f.output1', 'DIR/f.output2'], # output
+ 'DIR/f.extra' # extra
+ )
+ ])
+ #
+ # collating using inputs where some files do not match regex
+ #
+ paths = self.do_task_collate(test_path + "/*.test", task.regex(r"(.*/f)[a-z0-9]+\.test"),
+ task.inputs(r"\1.whoopee"), [r"\1.output1", r"\1.output2"], r"\1.extra")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [(('DIR/f.whoopee',), ['DIR/f.output1', 'DIR/f.output2'], 'DIR/f.extra')])
+
+
+ #
+ # collating using inputs where multiple copies of the same input names are removed
+ #
+ paths = self.do_task_collate(test_path + "/*.test", task.regex(r"(.*/[ef])[a-z0-9]+\.test"),
+ task.inputs(r"\1.whoopee"), [r"\1.output1", r"\1.output2"], r"\1.extra")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (
+ ('DIR/e.whoopee',), # input
+ ['DIR/e.output1', 'DIR/e.output2'], # output
+ 'DIR/e.extra' # extra
+ ),
+ (
+ ('DIR/f.whoopee',), # input
+ ['DIR/f.output1', 'DIR/f.output2'], # output
+ 'DIR/f.extra' # extra
+ )
+ ] )
+
+ #
+ # test python set object. Note that set is constructed with the results of the substitution
+ #
+ paths = self.do_task_collate(test_path + "/*.test", task.regex(r"(.*/[ef])[a-z0-9]+\.test"),
+ task.inputs(r"\1.whoopee"), set([r"\1.output1", r"\1.output2", test_path + "/e.output2"]), r"\1.extra")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (
+ ('DIR/e.whoopee',), # input
+ set(['DIR/e.output1', 'DIR/e.output2']), # output
+ 'DIR/e.extra' # extra
+ ),
+ (
+ ('DIR/f.whoopee',), # input
+ set(['DIR/f.output1', 'DIR/f.output2', 'DIR/e.output2']), # output
+ 'DIR/f.extra' # extra
+ )
+ ])
+
+ def test_tasks(self):
+ """
+ test if can use tasks to specify dependencies
+ """
+
+ paths = self.do_task_collate([task.output_from( "module.func1", # input params
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"),
+ test_path + "/*.test"],
+ task.regex(r"(.*[oef])[a-z0-9]+\.test"),
+ [r"\1.output1", r"\1.output2"], r"\1.extra")
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (('DIR/e0.test', 'DIR/e1.test', 'DIR/e2.test'), ['DIR/e.output1', 'DIR/e.output2'], 'DIR/e.extra'),
+ (('DIR/f0.test', 'DIR/f1.test', 'DIR/f2.test'), ['DIR/f.output1', 'DIR/f.output2'], 'DIR/f.extra'),
+ (((2, 'output5.test'), ['output4.test', 'output.ignored'], 'output1.test', 'output2.test', 'output3.test'), ['o.output1', 'o.output2'], 'o.extra')
+ ])
+
+ paths = self.do_task_collate([task.output_from( "module.func1", # input params
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"),
+ test_path + "/*.test"],
+ task.regex(r"(.*[oef])[a-z0-9]+\.test"),
+ task.inputs(r"\1.whoopee"),
+ [r"\1.output1", r"\1.output2"], r"\1.extra")
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (
+ ("DIR/e.whoopee",), # input
+ ["DIR/e.output1", "DIR/e.output2"], # output
+ "DIR/e.extra" # extra
+ ),
+ (
+ ("DIR/f.whoopee",), # input
+ ["DIR/f.output1", "DIR/f.output2"], # output
+ "DIR/f.extra" # extra
+ ),
+ (
+ ("o.whoopee",), # input
+ ["o.output1", "o.output2"], # output
+ "o.extra" # extra
+ )
+ ] )
+
+
+ # single job output consisting of a single file
+ paths = self.do_task_collate(task.output_from("module.func2"), task.regex(r"(.*)\..*"), r"\1.output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ #print dumps(paths, indent = 4)
+ self.assertEqual(paths, [(('output.ignored',), 'output.output')])
+
+
+ # Same output if task specified as part of a list of tasks
+ paths = self.do_task_collate([task.output_from("module.func2")], task.regex(r"(.*)\..*"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [(('output.ignored',), 'output')])
+
+ #
+ # single job output consisting of a list
+ #
+ paths = self.do_task_collate(task.output_from("module.func4"), task.regex(r"(.*)\..*"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [(((2, 'output5.test'),), 'output')])
+
+
+ # Same output if task specified as part of a list of tasks
+ paths = self.do_task_collate([task.output_from("module.func4")], task.regex(r"(.*)\..*"), "output")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths, [(((2, 'output5.test'),), 'output')] )
+
+#=========================================================================================
+
+# files_param_factory
+
+#=========================================================================================
+
+class Test_files_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/f%d.output" % (test_path, 0))
+ for i in range(3):
+ touch("%s/f%d.test" % (test_path, i))
+ time.sleep(0.1)
+ touch("%s/f%d.output" % (test_path, 1))
+ touch("%s/f%d.output" % (test_path, 2))
+
+ self.tasks = [t1, t2, t3, t4, t5]
+
+
+ def tearDown(self):
+ for i in range(3):
+ os.unlink("%s/f%d.test" % (test_path, i))
+ for i in range(3):
+ os.unlink("%s/f%d.output" % (test_path, i))
+ os.removedirs(test_path)
+ pass
+
+
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ fake_task.task_files(orig_args)
+ return fake_task.param_generator_func
+
+ def files (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ return list(p1 for (p1, ps) in self.get_param_iterator (*old_args)(None))
+
+ def _test_simple(self):
+ """
+ test simple_form
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.files("a.test", "b.test")
+ self.assertEqual(paths,
+ [('a.test', 'b.test')] )
+ def test_glob(self):
+ """
+ test globbed form
+ """
+ #
+ # Replacement of globs in first parameter
+ #
+ paths = self.files(test_path + "/*", "a.test", "b.test")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths,
+ [
+ (
+ [
+ "DIR/f0.output",
+ "DIR/f0.test",
+ "DIR/f1.output",
+ "DIR/f1.test",
+ "DIR/f2.output",
+ "DIR/f2.test"
+ ],
+ "a.test",
+ "b.test"
+ )
+ ])
+ #
+ # Replacement of globs in first parameter in-place
+ #
+ paths = self.files([test_path + "/*", "robbie.test"], "a.test", "b.test")
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths,
+ [
+ (
+ [
+ "DIR/f0.output",
+ "DIR/f0.test",
+ "DIR/f1.output",
+ "DIR/f1.test",
+ "DIR/f2.output",
+ "DIR/f2.test",
+ "robbie.test"
+ ],
+ "a.test",
+ "b.test"
+ )
+ ])
+ #
+ # No Replacement of globs in other parameter of multi-job task
+ #
+ paths = self.files([[[test_path + "/*", "robbie.test"], "a.test", "b.test"], ["a.test", ["b.test", 2], "a.*"]])
+ paths = recursive_replace(recursive_replace(paths, test_path, "DIR"), exe_path, "DIR_E")
+ self.assertEqual(paths,
+ [
+ (
+ [
+ "DIR/f0.output",
+ "DIR/f0.test",
+ "DIR/f1.output",
+ "DIR/f1.test",
+ "DIR/f2.output",
+ "DIR/f2.test",
+ "robbie.test"
+ ],
+ "a.test",
+ "b.test"
+ ),
+ ("a.test", ["b.test", 2], "a.*")
+ ])
+
+
+ def _test_filelist(self):
+ """
+ test file list form
+ """
+ # simple list
+ file_list = ["DIR/f0.test", "DIR/f1.test", "DIR/f2.test"]
+ paths = self.files(*file_list)
+ self.assertEqual(paths, [
+ ("DIR/f0.test", "DIR/f1.test", "DIR/f2.test"),
+ ])
+
+ # complex list
+ file_list = [[["DIR/e0.test", set([5, "DIR/e1.test"]), "DIR/e2.test"], ["DIR/f0.test", "DIR/f1.test", "DIR/f2.test"]]]
+ paths = self.files(*file_list)
+ self.assertEqual(paths, [
+ ("DIR/e0.test", set([5, "DIR/e1.test"]), "DIR/e2.test"),
+ ("DIR/f0.test", "DIR/f1.test", "DIR/f2.test"),
+ ])
+
+ # bad list: missing list enclosure
+ file_list = [["DIR/f0.test", "DIR/f1.test", "DIR/f2.test"]]
+ self.assertRaises(error_task_files, self.files, *file_list)
+
+ # bad list: missing io
+ file_list = [[[1,2], ["DIR/e0.test", [5, "DIR/e1.test"], "DIR/e2.test"], ["DIR/f0.test", "DIR/f1.test", "DIR/f2.test"]]]
+ self.assertRaises(error_task_files, self.files, *file_list)
+
+ def _test_tasks(self):
+ """
+ test if can use tasks to specify dependencies
+ """
+ #
+ # substitution of tasks
+ #
+ paths = self.files(task.output_from( "module.func1",
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"), "rob.test", "b.test")
+ self.assertEqual(paths, [([5, ['output4.test', 'output.ignored'], 'output1.test', 'output2.test', 'output3.test',
+ 'output.ignored', (2, 'output5.test')],
+ 'rob.test',
+ 'b.test')])
+
+ #
+ # nested in place substitution of tasks
+ #
+ paths = self.files([task.output_from("module.func1",
+ "module.func2",
+ "module.func3",
+ "module.func4",
+ "module.func5"),
+ "robbie.test"], "a.test", "b.test")
+ self.assertEqual(paths,
+ [([5, ['output4.test', 'output.ignored'], 'output1.test', 'output2.test', 'output3.test', 'output.ignored', (2, 'output5.test'), 'robbie.test'], 'a.test', 'b.test')] )
+
+ # single job output consisting of a single file
+ paths = self.files(task.output_from("module.func2"), "output", "extra")
+ self.assertEqual(paths, [('output.ignored', 'output', 'extra')])
+
+
+ # Different output if task specified as part of a list of tasks
+ paths = self.files([task.output_from("module.func2"), task.output_from("module.func2")], "output", "extra")
+ self.assertEqual(paths, [(['output.ignored','output.ignored'], 'output', 'extra')])
+
+ # single job output consisting of a list
+ paths = self.files(task.output_from("module.func4"), "output", "extra")
+ self.assertEqual(paths, [((2, 'output5.test'), 'output', 'extra')])
+
+ # Same output if task specified as part of a list of tasks
+ paths = self.files([task.output_from("module.func4"), task.output_from("module.func2")], "output", "extra")
+ self.assertEqual(paths, [([(2, 'output5.test'), 'output.ignored'], 'output', 'extra')])
+
+
+#
+#=========================================================================================
+
+# product_param_factory
+
+#=========================================================================================
+
+class Test_product_param_factory(unittest.TestCase):
+ def setUp(self):
+ if not os.path.exists(test_path):
+ os.makedirs(test_path)
+ touch("%s/a.test1" % (test_path))
+ touch("%s/b.test1" % (test_path))
+ touch("%s/c.test2" % (test_path))
+ touch("%s/d.test2" % (test_path))
+ touch("%s/a.testwhat1" % (test_path))
+ touch("%s/b.testwhat1" % (test_path))
+ touch("%s/c.testwhat2" % (test_path))
+ touch("%s/d.testwhat2" % (test_path))
+ time.sleep(0.1)
+ touch("%s/a.b.output" % (test_path))
+ touch("%s/a.c.output" % (test_path))
+ touch("%s/b.c.output" % (test_path))
+ touch("%s/b.d.output" % (test_path))
+
+ self.tasks = [t1, t2, t3, t4, t5]
+ self.maxDiff = None
+
+
+ def tearDown(self):
+ os.unlink("%s/a.test1" % (test_path))
+ os.unlink("%s/b.test1" % (test_path))
+ os.unlink("%s/c.test2" % (test_path))
+ os.unlink("%s/d.test2" % (test_path))
+ os.unlink("%s/a.testwhat1" % (test_path))
+ os.unlink("%s/b.testwhat1" % (test_path))
+ os.unlink("%s/c.testwhat2" % (test_path))
+ os.unlink("%s/d.testwhat2" % (test_path))
+ os.unlink("%s/a.b.output" % (test_path))
+ os.unlink("%s/a.c.output" % (test_path))
+ os.unlink("%s/b.c.output" % (test_path))
+ os.unlink("%s/b.d.output" % (test_path))
+
+
+ os.removedirs(test_path)
+ pass
+
+
+
+
+ #_____________________________________________________________________________
+
+ # wrappers
+
+ #_____________________________________________________________________________
+ def get_param_iterator (self, *orig_args):
+ #
+ # replace function / function names with tasks
+ #
+ # fake virgin task
+ fake_task = task._task("module", "func_fake%d" % randint(1, 1000000))
+ fake_task.task_product(orig_args)
+ return fake_task.param_generator_func
+
+
+ def do_task_product (self, *old_args):
+ """
+ This extra function is to simulate the forwarding from the decorator to
+ the task creation function
+ """
+ # extra dereference because we are only interested in the first (only) job
+ return list(p1 for (p1, ps) in self.get_param_iterator (*old_args)(None))
+
+
+ def test_simple(self):
+ """
+ test simple_form
+ """
+ #
+ # simple 1 input, 1 output
+ #
+ paths = self.do_task_product([test_path + "/a.test1", test_path + "/b.test1"], task.formatter("(?P<ID>\w+)\.(.+)"),
+ [test_path + "/c.test2", test_path + "/d.test2", test_path + "/e.ignore"], task.formatter("(?P<ID>\w+)\.(test2)"),
+ r"{path[0][0]}/{ID[0][0]}.{1[1][0]}.output")
+
+ self.assertEqual(recursive_replace(paths, test_path, "DIR"),
+ [
+ (('DIR/a.test1','DIR/c.test2'),'DIR/a.c.output'),
+ (('DIR/a.test1','DIR/d.test2'),'DIR/a.d.output'),
+ (('DIR/b.test1','DIR/c.test2'),'DIR/b.c.output'),
+ (('DIR/b.test1','DIR/d.test2'),'DIR/b.d.output')
+ ]
+ )
+
+ def test_inputs(self):
+ """
+ test transform with inputs in both regex and suffix forms
+ """
+ #
+ # (replace) inputs
+ #
+ #
+ paths = self.do_task_product([test_path + "/a.test1", test_path + "/b.test1"], task.formatter("(?P<ID>\w+)\.(.+)"),
+ [test_path + "/c.test2", test_path + "/d.test2", test_path + "/e.ignore"], task.formatter("(?P<ID>\w+)\.(test2)"),
+ task.inputs(("{path[0][0]}/{basename[0][0]}.testwhat1", "{path[1][0]}/{basename[1][0]}.testwhat2") ),
+ r"{path[0][0]}/{ID[0][0]}.{1[1][0]}.output")
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ (('DIR/a.testwhat1','DIR/c.testwhat2'),'DIR/a.c.output'),
+ (('DIR/a.testwhat1','DIR/d.testwhat2'),'DIR/a.d.output'),
+ (('DIR/b.testwhat1','DIR/c.testwhat2'),'DIR/b.c.output'),
+ (('DIR/b.testwhat1','DIR/d.testwhat2'),'DIR/b.d.output')
+ ]
+ )
+ #
+ # add inputs
+ #
+ #
+ paths = self.do_task_product([test_path + "/a.test1", test_path + "/b.test1"], task.formatter("(?P<ID>\w+)\.(.+)"),
+ [test_path + "/c.test2", test_path + "/d.test2", test_path + "/e.ignore"], task.formatter("(?P<ID>\w+)\.(test2)"),
+ task.add_inputs("{path[0][0]}/{basename[0][0]}.testwhat1", "{path[1][0]}/{basename[1][0]}.testwhat2", ),
+ r"{path[0][0]}/{ID[0][0]}.{1[1][0]}.output")
+
+ paths = recursive_replace(paths, test_path, "DIR")
+ self.assertEqual(paths,
+ [
+ ((('DIR/a.test1','DIR/c.test2'), 'DIR/a.testwhat1','DIR/c.testwhat2'),'DIR/a.c.output'),
+ ((('DIR/a.test1','DIR/d.test2'), 'DIR/a.testwhat1','DIR/d.testwhat2'),'DIR/a.d.output'),
+ ((('DIR/b.test1','DIR/c.test2'), 'DIR/b.testwhat1','DIR/c.testwhat2'),'DIR/b.c.output'),
+ ((('DIR/b.test1','DIR/d.test2'), 'DIR/b.testwhat1','DIR/d.testwhat2'),'DIR/b.d.output')
+ ]
+ )
+
+
+
+#
+# debug parameter ignored if called as a module
+#
+if sys.argv.count("--debug"):
+ sys.argv.remove("--debug")
+#sys.argv.append("Test_files_param_factory")
+#sys.argv.append("Test_merge_param_factory")
+#sys.argv.append("Test_transform_param_factory")
+#sys.argv.append("Test_files_param_factory")
+unittest.main()
diff --git a/ruffus/test/test_files_decorator.py b/ruffus/test/test_files_decorator.py
new file mode 100755
index 0000000..980f069
--- /dev/null
+++ b/ruffus/test/test_files_decorator.py
@@ -0,0 +1,277 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_files_decorator.py
+
+ test @files which take single files forwards arguments as single files rather
+ than as lists
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+print("\tRuffus Version = ", ruffus.__version__)
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+
+ if isinstance(infiles, str):
+ infile_names = [infiles]
+ elif infiles == None:
+ infile_names = []
+ else:
+ infile_names = infiles
+
+ if isinstance(outfiles, str):
+ outfile_names = [outfiles]
+ else:
+ outfile_names = outfiles
+
+
+ output_text = list()
+ for f in infile_names:
+ with open(f) as ii:
+ output_text.append(ii.read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfile_names:
+ with open(f, "w") as oo:
+ oo.write(output_text)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+tempdir = "tempdir/"
+ at follows(mkdir(tempdir))
+#
+# task1
+#
+ at files(None, tempdir + 'a.1')
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+
+
+#
+# task2
+#
+ at transform(task1, regex(r".*"), tempdir + 'b.1')
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+ assert(infiles == tempdir + "a.1")
+
+
+
+#
+# task3
+#
+ at files(task2, tempdir + 'c.1')
+def task3(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+ assert(infiles == tempdir + "b.1")
+
+#
+# task4
+#
+ at follows(task3)
+ at files([[None, tempdir + 'd.1'], [None, tempdir + 'e.1']])
+def task4(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+
+#
+# task5
+#
+ at files(task4, tempdir + "f.1")
+def task5(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ test_job_io(infiles, outfiles, extra_params)
+ assert(infiles == [tempdir + "d.1", tempdir + "e.1"])
+
+import unittest
+
+class Test_task(unittest.TestCase):
+
+ def tearDown (self):
+ """
+ """
+ import glob
+ for f in glob.glob(tempdir + "*"):
+ os.unlink(f)
+ os.rmdir(tempdir)
+
+
+ def test_task (self):
+ pipeline_run([task5], options.forced_tasks, multiprocess = options.jobs,
+ verbose = options.verbose)
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose)
+
+ elif options.dependency_file:
+ with open(options.dependency_file, "w") as graph_file:
+ pipeline_printout_graph (graph_file,
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_files_post_merge.py b/ruffus/test/test_files_post_merge.py
new file mode 100755
index 0000000..4a9903c
--- /dev/null
+++ b/ruffus/test/test_files_post_merge.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_files_post_merge.py
+
+ bug where @files follows merge and extra parenthesis inserted
+
+ use :
+ --debug to test automatically
+ --start_again the first time you run the file
+ --jobs_per_task N to simulate tasks with N numbers of files per task
+
+ -j N / --jobs N to speify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-s", "--start_again", dest="start_again",
+ action="store_true", default=False,
+ help="Make a new 'original.fa' file to simulate having to restart "
+ "pipeline from scratch.")
+parser.add_option("--jobs_per_task", dest="jobs_per_task",
+ default=3,
+ metavar="N",
+ type="int",
+ help="Simulates tasks with N numbers of files per task.")
+
+
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+tempdir = "temp_filesre_split_and_combine/"
+
+
+
+if options.verbose:
+ verbose_output = sys.stderr
+else:
+ verbose_output =open("/dev/null", "w")
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# split_fasta_file
+#
+ at posttask(lambda: verbose_output.write("Split into %d files\n" % options.jobs_per_task))
+ at split(tempdir + "original.fa", [tempdir + "files.split.success", tempdir + "files.split.*.fa"])
+def split_fasta_file (input_file, outputs):
+
+ #
+ # remove previous fasta files
+ #
+ success_flag = outputs[0]
+ output_file_names = outputs[1:]
+ for f in output_file_names:
+ os.unlink(f)
+
+ #
+ # create as many files as we are simulating in jobs_per_task
+ #
+ for i in range(options.jobs_per_task):
+ open(tempdir + "files.split.%03d.fa" % i, "w")
+
+ open(success_flag, "w")
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# align_sequences
+#
+ at posttask(lambda: verbose_output.write("Sequences aligned\n"))
+ at transform(split_fasta_file, suffix(".fa"), ".aln") # fa -> aln
+def align_sequences (input_file, output_filename):
+ open(output_filename, "w").write("%s\n" % output_filename)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# percentage_identity
+#
+ at posttask(lambda: verbose_output.write("%Identity calculated\n"))
+ at transform(align_sequences, # find all results from align_sequences
+ suffix(".aln"), # replace suffix with:
+ [r".pcid", # .pcid suffix for the result
+ r".pcid_success"]) # .pcid_success to indicate job completed
+def percentage_identity (input_file, output_files):
+ (output_filename, success_flag_filename) = output_files
+ open(output_filename, "w").write("%s\n" % output_filename)
+ open(success_flag_filename, "w")
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# combine_results
+#
+ at posttask(lambda: verbose_output.write("Results recombined\n"))
+ at merge(percentage_identity, tempdir + "all.combine_results")
+def combine_results (input_files, output_files):
+ """
+ Combine all
+ """
+ (output_filename) = output_files
+ out = open(output_filename, "w")
+ for inp, flag in input_files:
+ out.write(open(inp).read())
+
+
+
+ at files(combine_results, os.path.join(tempdir, "check_all_is.well"))
+def post_merge_check (input_filename, output_filename):
+ """
+ check that merge sends just one file, not a list to me
+ """
+ open(output_filename, "w").write(open(input_filename).read())
+
+ at files(post_merge_check, os.path.join(tempdir, "check_all_is.weller"))
+def post_post_merge_check (input_filename, output_filename):
+ """
+ check that @files forwards a single file on when given a single file
+ """
+ open(output_filename, "w").write(open(input_filename).read())
+
+def start_pipeline_afresh ():
+ """
+ Recreate directory and starting file
+ """
+ print("Start again", file=verbose_output)
+ import os
+ os.system("rm -rf %s" % tempdir)
+ os.makedirs(tempdir)
+ open(tempdir + "original.fa", "w").close()
+
+if __name__ == '__main__':
+ if options.start_again:
+ start_pipeline_afresh()
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ start_pipeline_afresh()
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+ os.system("rm -rf %s" % tempdir)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
diff --git a/ruffus/test/test_filesre_combine.py b/ruffus/test/test_filesre_combine.py
new file mode 100755
index 0000000..a0e15fb
--- /dev/null
+++ b/ruffus/test/test_filesre_combine.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ branching.py
+
+ test branching dependencies
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-F", "--flowchart", dest="flowchart",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a flowchart of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("--flowchart_format", dest="flowchart_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of flowchart file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+species_list = defaultdict(list)
+species_list["mammals"].append("cow" )
+species_list["mammals"].append("horse" )
+species_list["mammals"].append("sheep" )
+species_list["reptiles"].append("snake" )
+species_list["reptiles"].append("lizard" )
+species_list["reptiles"].append("crocodile" )
+species_list["fish" ].append("pufferfish")
+
+
+tempdir = "temp_filesre_combine/"
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# task1
+#
+ at follows(mkdir(tempdir, tempdir + "test"))
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 1 Done\n"))
+def prepare_files ():
+ for grouping in species_list.keys():
+ for species_name in species_list[grouping]:
+ filename = tempdir + "%s.%s.animal" % (species_name, grouping)
+ open(filename, "w").write(species_name + "\n")
+
+
+#
+# task2
+#
+ at files_re(tempdir + '*.animal', r'(.*/)(.*)\.(.*)\.animal', combine(r'\1\2.\3.animal'), r'\1\3.results')
+ at follows(prepare_files)
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 2 Done\n"))
+def summarise_by_grouping(infiles, outfile):
+ """
+ Summarise by each species group, e.g. mammals, reptiles, fish
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfile]))
+ o = open(outfile, "w")
+ for i in infiles:
+ o.write(open(i).read())
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfile]))
+
+
+
+
+
+
+
+def check_species_correct():
+ """
+ #cow.mammals.animal
+ #horse.mammals.animal
+ #sheep.mammals.animal
+ # -> mammals.results
+ #
+ #snake.reptiles.animal
+ #lizard.reptiles.animal
+ #crocodile.reptiles.animal
+ # -> reptiles.results
+ #
+ #pufferfish.fish.animal
+ # -> fish.results
+ """
+ for grouping in species_list:
+ assert(open(tempdir + grouping + ".results").read() ==
+ "".join(s + "\n" for s in sorted(species_list[grouping])))
+
+
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose=options.verbose)
+
+ elif options.flowchart:
+ pipeline_printout_graph ( open(options.flowchart, "w"),
+ options.flowchart_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ import os
+ os.system("rm -rf %s" % tempdir)
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ verbose = options.verbose)
+
+
+ check_species_correct()
+ os.system("rm -rf %s" % tempdir)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ verbose = options.verbose)
diff --git a/ruffus/test/test_filesre_split_and_combine.py b/ruffus/test/test_filesre_split_and_combine.py
new file mode 100755
index 0000000..97121e6
--- /dev/null
+++ b/ruffus/test/test_filesre_split_and_combine.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ branching.py
+
+ test branching dependencies
+
+ use :
+ --debug to test automatically
+ --start_again the first time you run the file
+ --jobs_per_task N to simulate tasks with N numbers of files per task
+
+ -j N / --jobs N to speify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-s", "--start_again", dest="start_again",
+ action="store_true", default=False,
+ help="Make a new 'original.fa' file to simulate having to restart "
+ "pipeline from scratch.")
+parser.add_option("--jobs_per_task", dest="jobs_per_task",
+ default=50,
+ metavar="N",
+ type="int",
+ help="Simulates tasks with N numbers of files per task.")
+
+
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+tempdir = "temp_filesre_split_and_combine/"
+
+def sleep_a_while ():
+ time.sleep(0.1)
+
+
+if options.verbose:
+ verbose_output = sys.stderr
+else:
+ verbose_output =open("/dev/null", "w")
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# split_fasta_file
+#
+ at posttask(sleep_a_while)
+ at posttask(lambda: verbose_output.write("Split into %d files\n" % options.jobs_per_task))
+ at files(tempdir + "original.fa", tempdir + "files.split.success")
+def split_fasta_file (input_file, success_flag):
+ #
+ # remove existing fasta files
+ #
+ import glob
+ filenames = sorted(glob.glob(tempdir + "files.split.*.fa"))
+ for f in filenames:
+ os.unlink(f)
+
+
+ import random
+ random.seed()
+ for i in range(options.jobs_per_task):
+ open(tempdir + "files.split.%03d.fa" % i, "w")
+
+ open(success_flag, "w")
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# align_sequences
+#
+ at posttask(sleep_a_while)
+ at posttask(lambda: verbose_output.write("Sequences aligned\n"))
+ at follows(split_fasta_file)
+ at files_re(tempdir + "files.split.*.fa", # find all .fa files
+ ".fa$", ".aln") # fa -> aln
+def align_sequences (input_file, output_filename):
+ open(output_filename, "w").write("%s\n" % output_filename)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# percentage_identity
+#
+ at posttask(sleep_a_while)
+ at posttask(lambda: verbose_output.write("%Identity calculated\n"))
+ at files_re(align_sequences, # find all results from align_sequences
+ r"(.*\.)(.+).aln$", # match file name root and substitute
+ r'\g<0>', # the original file
+ [r"\1\2.pcid", # .pcid suffix for the result
+ r"\1\2.pcid_success"], # .pcid_success to indicate job completed
+ r"\2") # extra parameter to remember the file index
+def percentage_identity (input_file, output_files, split_index):
+ (output_filename, success_flag_filename) = output_files
+ open(output_filename, "w").write("%s\n" % split_index)
+ open(success_flag_filename, "w")
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# combine_results
+#
+ at posttask(lambda: verbose_output.write("Results recombined\n"))
+ at posttask(sleep_a_while)
+ at files_re(percentage_identity, combine(r".*.pcid$"),
+ [tempdir + "all.combine_results",
+ tempdir + "all.combine_results_success"])
+def combine_results (input_files, output_files):
+ """
+ Combine all
+ """
+ (output_filename, success_flag_filename) = output_files
+ out = open(output_filename, "w")
+ for inp, flag in input_files:
+ out.write(open(inp).read())
+ open(success_flag_filename, "w")
+
+
+
+def start_pipeline_afresh ():
+ """
+ Recreate directory and starting file
+ """
+ print("Start again", file=verbose_output)
+ import os
+ os.system("rm -rf %s" % tempdir)
+ os.makedirs(tempdir)
+ open(tempdir + "original.fa", "w").close()
+ sleep_a_while ()
+
+if __name__ == '__main__':
+ if options.start_again:
+ start_pipeline_afresh()
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ start_pipeline_afresh()
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+ os.system("rm -rf %s" % tempdir)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
diff --git a/ruffus/test/test_follows_mkdir.py b/ruffus/test/test_follows_mkdir.py
new file mode 100755
index 0000000..a7d7739
--- /dev/null
+++ b/ruffus/test/test_follows_mkdir.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_follows_mkdir.py
+
+ test make directory dependencies
+
+ use :
+ -j N / --jobs N to specify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+directories = [os.path.abspath('a'), 'b']
+ at follows(mkdir(directories), mkdir('c'), mkdir('d', 'e'), mkdir('e'))
+def task_which_makes_directories ():
+ pass
+
+
+import unittest
+
+class Test_task_mkdir(unittest.TestCase):
+
+ def setUp (self):
+ """
+ """
+ pass
+
+ def tearDown (self):
+ """
+ delete directories
+ """
+ for d in 'abcde':
+ fullpath = os.path.join(exe_path, d)
+ os.rmdir(fullpath)
+
+
+ def test_mkdir (self):
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
+ for d in 'abcde':
+ fullpath = os.path.join(exe_path, d)
+ self.assertTrue(os.path.exists(fullpath))
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_graphviz.py b/ruffus/test/test_graphviz.py
new file mode 100755
index 0000000..34cdc37
--- /dev/null
+++ b/ruffus/test/test_graphviz.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ play_with_colours.py
+ [--log_file PATH]
+ [--verbose]
+
+"""
+import unittest
+
+################################################################################
+#
+# test
+#
+#
+# Copyright (c) 7/13/2010 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+import sys, os
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+from optparse import OptionParser
+try:
+ import StringIO as io
+except:
+ import io as io
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from ruffus import *
+from ruffus.ruffus_exceptions import JobSignalledBreak
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+#
+# up to date tasks
+#
+ at check_if_uptodate (lambda : (False, ""))
+def Up_to_date_task1(infile, outfile):
+ pass
+
+ at graphviz(URL='"http://cnn.com"', fillcolor = '"#FFCCCC"',
+ color = '"#FF0000"', pencolor='"#FF0000"', fontcolor='"#4B6000"',
+ label_suffix = "???", label_prefix = "What is this?<BR/> ",
+ label = "<What <FONT COLOR=\"red\">is</FONT>this>",
+ shape= "component", height = 1.5, peripheries = 5,
+ style="dashed")
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Up_to_date_task2(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task2)
+def Up_to_date_task3(infile, outfile):
+ pass
+
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task3)
+def Up_to_date_final_target(infile, outfile):
+ pass
+
+
+#
+# Explicitly specified
+#
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Up_to_date_task1)
+def Explicitly_specified_task(infile, outfile):
+ pass
+
+
+
+#
+# Tasks to run
+#
+ at follows(Explicitly_specified_task)
+def Task_to_run1(infile, outfile):
+ pass
+
+
+ at follows(Task_to_run1)
+def Task_to_run2(infile, outfile):
+ pass
+
+ at follows(Task_to_run2)
+def Task_to_run3(infile, outfile):
+ pass
+
+ at check_if_uptodate (lambda : (False, ""))
+ at follows(Task_to_run2)
+def Up_to_date_task_forced_to_rerun(infile, outfile):
+ pass
+
+
+#
+# Final target
+#
+ at follows(Up_to_date_task_forced_to_rerun, Task_to_run3)
+def Final_target(infile, outfile):
+ pass
+
+#
+# Ignored downstream
+#
+ at follows(Final_target)
+def Downstream_task1_ignored(infile, outfile):
+ pass
+
+ at follows(Final_target)
+def Downstream_task2_ignored(infile, outfile):
+ pass
+
+
+
+
+
+
+
+
+
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+
+
+
+
+class Test_graphviz(unittest.TestCase):
+ #___________________________________________________________________________
+ #
+ # test product() pipeline_printout diagnostic error messsages
+ #
+ # require verbose >= 3 or an empty jobs list
+ #___________________________________________________________________________
+ def test_graphviz_dot(self):
+ """Make sure annotations from graphviz appear in dot
+ """
+
+ s = StringIO()
+ pipeline_printout_graph (
+ s,
+ # use flowchart file name extension to decide flowchart format
+ # e.g. svg, jpg etc.
+ "dot",
+ [Final_target, Up_to_date_final_target])
+ self.assertTrue('[URL="http://cnn.com", color="#FF0000", fillcolor="#FFCCCC", fontcolor="#4B6000", height=1.5, label=<What is this?<BR/> What <FONT COLOR="red">is</FONT>this???>, pencolor="#FF0000", peripheries=5, shape=component, style=dashed]' in s.getvalue())
+
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ #pipeline_printout(sys.stdout, [test_product_task], verbose = 5)
+ #pipeline_printout_graph( "test.png", "png", [Final_target, Up_to_date_final_target])
+ #pipeline_printout_graph( "test.dot", "dot", [Final_target, Up_to_date_final_target])
+ unittest.main()
+
+
diff --git a/ruffus/test/test_inputs_with_multiple_args_raising_exception.py b/ruffus/test/test_inputs_with_multiple_args_raising_exception.py
new file mode 100755
index 0000000..c1be929
--- /dev/null
+++ b/ruffus/test/test_inputs_with_multiple_args_raising_exception.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_inputs_with_multiple_args_raising_exception.py
+
+ inputs with multiple arguments should raise an exception
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+print("\tRuffus Version = ", ruffus.__version__)
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+try:
+ @transform(None, regex("b"), inputs("a", "b"), "task_1.output")
+ def task_1 (i, o):
+ for f in o:
+ open(f, 'w')
+except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
+ print("\tExpected exception thrown")
+ sys.exit(0)
+
+raise Exception("Inputs(...) with multiple arguments should have thrown an exception")
+
+import unittest
+
+class Test_task_mkdir(unittest.TestCase):
+
+ def setUp (self):
+ """
+ """
+ pass
+
+ def tearDown (self):
+ """
+ """
+ pass
+
+
+ def test_no_re_match (self):
+ pipeline_run([task_1], options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_job_completion_checksums.py b/ruffus/test/test_job_completion_checksums.py
new file mode 100755
index 0000000..caea11d
--- /dev/null
+++ b/ruffus/test/test_job_completion_checksums.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_job_completion_transform.py
+
+ test several cases where the dbdict should be updated
+
+"""
+
+
+import unittest
+import os
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+
+import sys, re
+
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict)
+from ruffus.task import get_default_history_file_name
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS,
+ CHECKSUM_HISTORY_TIMESTAMPS,
+ CHECKSUM_FUNCTIONS,
+ CHECKSUM_FUNCTIONS_AND_PARAMS)
+from ruffus.ruffus_exceptions import RethrownJobError
+
+possible_chksms = list(range(CHECKSUM_FUNCTIONS_AND_PARAMS + 1))
+workdir = 'tmp_test_job_completion/'
+input_file = os.path.join(workdir, 'input.txt')
+transform1_out = input_file.replace('.txt', '.output')
+split1_outputs = [ os.path.join(workdir, 'split.out1.txt'),
+ os.path.join(workdir, 'split.out2.txt')]
+merge2_output = os.path.join(workdir, 'merged.out')
+
+runtime_data = []
+
+ at transform(input_file, suffix('.txt'), '.output', runtime_data)
+def transform1(in_name, out_name, how_many):
+ with open(out_name, 'w') as outfile:
+ with open(in_name) as ii:
+ outfile.write(ii.read())
+
+ at transform(input_file, suffix('.txt'), '.output', runtime_data)
+def transform_raise_error(in_name, out_name, how_many):
+ # raise an error unless runtime_data has 'okay' in it
+ with open(out_name, 'w') as outfile:
+ with open(in_name) as ii:
+ outfile.write(ii.read())
+ if 'okay' not in runtime_data:
+ raise RuntimeError("'okay' wasn't in runtime_data!")
+
+ at split(input_file, split1_outputs)
+def split1(in_name, out_names):
+ for n in out_names:
+ with open(n, 'w') as outfile:
+ with open(in_name) as ii:
+ outfile.write(ii.read() + '\n')
+
+ at merge(split1, merge2_output)
+def merge2(in_names, out_name):
+ with open(out_name, 'w') as outfile:
+ for n in in_names:
+ with open(n) as ii:
+ outfile.write(ii.read() + '\n')
+
+
+#CHECKSUM_FILE_TIMESTAMPS = 0 # only rerun when the file timestamps are out of date (classic mode)
+#CHECKSUM_HISTORY_TIMESTAMPS = 1 # also rerun when the history shows a job as being out of date
+#CHECKSUM_FUNCTIONS = 2 # also rerun when function body has changed
+#CHECKSUM_FUNCTIONS_AND_PARAMS = 3 # also rerun when function parameters have changed
+
+
+def cleanup_tmpdir():
+ os.system('rm -f %s %s' % (os.path.join(workdir, '*'), get_default_history_file_name()))
+
+
+class TestJobCompletion(unittest.TestCase):
+ def setUp(self):
+ try:
+ os.mkdir(workdir)
+ except OSError:
+ pass
+
+ def test_ouput_doesnt_exist(self):
+ """Input file exists, output doesn't exist"""
+ # output doesn't exist-- should run for all levels
+ # create a new input file
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ self.assertTrue(re.search(r'Job needs update: Missing file\n\s+\[tmp_test_job_completion/input.output\]'
+ , s.getvalue()))
+
+
+
+ def test_ouput_out_of_date(self):
+ """Input file exists, output out of date"""
+ # output exists but is out of date-- should run for all levels
+ cleanup_tmpdir()
+ with open(transform1_out, 'w') as outfile:
+ outfile.write('testme')
+ time.sleep(0.1)
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ self.assertIn('Job needs update:', s.getvalue())
+ if chksm == CHECKSUM_FILE_TIMESTAMPS:
+ self.assertIn('Input files:', s.getvalue())
+ self.assertIn('Output files:', s.getvalue())
+ else:
+ self.assertIn('Previous incomplete run leftover', s.getvalue())
+
+ def test_ouput_timestamp_okay(self):
+ """Input file exists, output timestamp up to date"""
+ # output exists and timestamp is up to date-- not run for lvl 0, run for all others
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ time.sleep(0.1)
+ with open(transform1_out, 'w') as outfile:
+ outfile.write('testme')
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ if chksm == CHECKSUM_FILE_TIMESTAMPS:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+ else:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Previous incomplete run leftover',
+ s.getvalue())
+
+ def test_ouput_up_to_date(self):
+ """Input file exists, output up to date"""
+ # output is up to date-- not run for any levels
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+ def test_ouput_up_to_date_func_changed(self):
+ """Input file exists, output up to date, function body changed"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ if sys.hexversion >= 0x03000000:
+ transform1.__code__ = split1.__code__ # simulate source change
+ else:
+ transform1.func_code = split1.func_code # simulate source change
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_FUNCTIONS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Pipeline function has changed',
+ s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+ def test_ouput_up_to_date_func_changed(self):
+ """Input file exists, output up to date, function body changed"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ # simulate source change
+ if sys.hexversion >= 0x03000000:
+ split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__
+ else:
+ split1.func_code, transform1.func_code = transform1.func_code, split1.func_code
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_FUNCTIONS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Pipeline function has changed',
+ s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+ # clean up our function-changing mess!
+ if sys.hexversion >= 0x03000000:
+ split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__
+ else:
+ split1.func_code, transform1.func_code = transform1.func_code, split1.func_code
+
+
+ def test_ouput_up_to_date_param_changed(self):
+ """Input file exists, output up to date, parameter to function changed"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ runtime_data.append('different') # simulate change to config file
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_FUNCTIONS_AND_PARAMS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Pipeline parameters have changed',
+ s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+ def test_raises_error(self):
+ """run a function that fails but creates output, then check what should run"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ time.sleep(.5)
+ del runtime_data[:]
+ with self.assertRaises(RethrownJobError): # poo. Shouldn't this be RuntimeError?
+ pipeline_run([transform_raise_error], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS) # generates output then fails
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [transform_raise_error], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Previous incomplete run leftover',
+ s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+
+ def test_split_output(self):
+ """test multiple-output checksums"""
+ # outputs out of date
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ time.sleep(.5)
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [split1], verbose=6, checksum_level=chksm)
+ self.assertIn('Job needs update:', s.getvalue())
+
+ # all outputs incorrectly generated
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ time.sleep(.5)
+ for f in split1_outputs:
+ with open(f, 'w') as outfile:
+ outfile.write('testme')
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [split1], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Previous incomplete run leftover',
+ s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+ # one output incorrectly generated
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ job_history = dbdict.open(get_default_history_file_name(), picklevalues=True)
+ del job_history[os.path.relpath(split1_outputs[0])]
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [split1], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Previous incomplete run leftover',
+ s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+ def test_merge_output(self):
+ """test multiple-input checksums"""
+ # one output incorrectly generated
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ job_history = dbdict.open(get_default_history_file_name(), picklevalues=True)
+ del job_history[os.path.relpath(split1_outputs[0])]
+
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [merge2], verbose=6, checksum_level=chksm)
+ if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
+ self.assertIn('Job needs update:', s.getvalue())
+ self.assertIn('Previous incomplete run leftover', s.getvalue())
+ else:
+ #self.assertIn('Job up-to-date', s.getvalue())
+ pass
+
+ # make sure the jobs run fine
+ cleanup_tmpdir()
+ with open(input_file, 'w') as outfile:
+ outfile.write('testme')
+ pipeline_run([merge2], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+ for chksm in possible_chksms:
+ s = StringIO()
+ pipeline_printout(s, [merge2], verbose=6, checksum_level=chksm)
+ #self.assertIn('Job up-to-date', s.getvalue())
+ self.assertNotIn('Job needs update:', s.getvalue())
+ self.assertNotIn('Previous incomplete run leftover', s.getvalue())
+
+
+ def tearDown(self):
+ shutil.rmtree(workdir)
+
+#if __name__ == '__main__':
+# try:
+# os.mkdir(workdir)
+# except OSError:
+# pass
+# #os.system('rm %s/*' % workdir)
+# #open(input_file, 'w').close()
+# s = StringIO()
+# pipeline_run([transform1], checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
+# pipeline_printout(s, [transform1], verbose=6, checksum_level=0)
+# print s.getvalue()
+# #open(transform1_out) # raise an exception if test fails
diff --git a/ruffus/test/test_job_history_with_exceptions.py b/ruffus/test/test_job_history_with_exceptions.py
new file mode 100755
index 0000000..81329d3
--- /dev/null
+++ b/ruffus/test/test_job_history_with_exceptions.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_job_history_with_exceptions.py
+
+ Make sure that when an exception is thrown only the current and following tasks fail
+
+"""
+
+
+import unittest
+import os
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+import re
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+#from ruffus.combinatorics import *
+from ruffus.ruffus_exceptions import RethrownJobError
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS,
+ get_default_history_file_name)
+
+workdir = 'tmp_test_job_history_with_exceptions'
+#sub-1s resolution in system?
+one_second_per_job = None
+throw_exception = False
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at originate([workdir + "/" + prefix + "_name.tmp1" for prefix in "abcd"])
+def generate_initial_files1(on):
+ with open(on, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# generate_initial_files2
+#___________________________________________________________________________
+ at originate([workdir + "/e_name.tmp1", workdir + "/f_name.tmp1"])
+def generate_initial_files2(on):
+ with open(on, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# generate_initial_files3
+#___________________________________________________________________________
+ at originate([workdir + "/g_name.tmp1", workdir + "/h_name.tmp1"])
+def generate_initial_files3(on):
+ with open(on, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at originate(workdir + "/i_name.tmp1")
+def generate_initial_files4(on):
+ with open(on, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# test_task2
+#___________________________________________________________________________
+ at collate([generate_initial_files1, generate_initial_files2, generate_initial_files3,
+ generate_initial_files4],
+ formatter(),
+ "{path[0]}/all.tmp2")
+#@transform([generate_initial_files1, generate_initial_files2, generate_initial_files3,
+# generate_initial_files4],
+# formatter( ),
+# "{path[0]}/{basename[0]}.tmp2")
+def test_task2( infiles, outfile):
+ with open(outfile, "w") as p:
+ pass
+ #print >>sys.stderr, "8" * 80, "\n", " task2 :%s %s " % (infiles, outfile)
+
+#___________________________________________________________________________
+#
+# test_task3
+#___________________________________________________________________________
+ at transform(test_task2, suffix(".tmp2"), ".tmp3")
+def test_task3( infile, outfile):
+ global throw_exception
+ if throw_exception != None:
+ throw_exception = not throw_exception
+ if throw_exception:
+ #print >>sys.stderr, "Throw exception for ", infile, outfile
+ raise Exception("oops")
+ else:
+ #print >>sys.stderr, "No throw exception for ", infile, outfile
+ pass
+ with open(outfile, "w") as p: pass
+ #print >>sys.stderr, "8" * 80, "\n", " task3 :%s %s " % (infile, outfile)
+
+#___________________________________________________________________________
+#
+# test_task4
+#___________________________________________________________________________
+ at transform(test_task3, suffix(".tmp3"), ".tmp4")
+def test_task4( infile, outfile):
+ with open(outfile, "w") as p: pass
+ #print >>sys.stderr, "8" * 80, "\n", " task4 :%s %s " % (infile, outfile)
+
+
+
+
+def cleanup_tmpdir():
+ os.system('rm -f %s %s' % (os.path.join(workdir, '*'), RUFFUS_HISTORY_FILE))
+
+
+VERBOSITY = 5
+VERBOSITY = 11
+
+class Test_job_history_with_exceptions(unittest.TestCase):
+ def setUp(self):
+ try:
+ os.mkdir(workdir)
+ except OSError:
+ pass
+
+ #___________________________________________________________________________
+ #
+ # test product() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_job_history_with_exceptions(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ pipeline_printout(s, [test_task4], verbose=VERBOSITY, wrap_width = 10000)
+ #print s.getvalue()
+
+ def test_job_history_with_exceptions_run(self):
+ """Run"""
+ for i in range(1):
+ cleanup_tmpdir()
+ try:
+ pipeline_run([test_task4], verbose = 0,
+ #multithread = 2,
+ one_second_per_job = one_second_per_job)
+ except:
+ pass
+ s = StringIO()
+ pipeline_printout(s, [test_task4], verbose=VERBOSITY, wrap_width = 10000)
+ #
+ # task 2 should be up to date because exception was throw in task 3
+ #
+ pipeline_printout_str = s.getvalue()
+ correct_order = not re.search('Tasks which will be run:.*\n(.*\n)*Task = test_task2', pipeline_printout_str)
+ if not correct_order:
+ print(pipeline_printout_str)
+ self.assertTrue(correct_order)
+ sys.stderr.write(".")
+ print()
+
+
+
+ def test_recreate_job_history(self):
+ """Run"""
+ global throw_exception
+ throw_exception = None
+ cleanup_tmpdir()
+
+ #
+ # print "Initial run without creating sqlite file"
+ #
+ pipeline_run([test_task4], verbose = 0,
+ checksum_level = CHECKSUM_FILE_TIMESTAMPS,
+ multithread = 10,
+ one_second_per_job = one_second_per_job)
+
+ #
+ # print "printout without sqlite"
+ #
+ s = StringIO()
+ pipeline_printout(s, [test_task4], checksum_level = CHECKSUM_FILE_TIMESTAMPS)
+ self.assertTrue(not re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()))
+ #
+ # print "printout expecting sqlite file"
+ #
+ s = StringIO()
+ pipeline_printout(s, [test_task4])
+ self.assertTrue(re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()))
+ #
+ # print "Regenerate sqlite file"
+ #
+ pipeline_run([test_task4],
+ checksum_level = CHECKSUM_FILE_TIMESTAMPS,
+ history_file = ruffus_utility.get_default_history_file_name (),
+ multithread = 1,
+ verbose = 0,
+ touch_files_only = 2,
+ one_second_per_job = one_second_per_job)
+ #
+ # print "printout expecting sqlite file"
+ #
+ s = StringIO()
+ pipeline_printout(s, [test_task4], verbose = VERBOSITY)
+ succeed = not re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue())
+ if not succeed:
+ print(s.getvalue(), file=sys.stderr)
+ self.assertTrue(succeed)
+
+ throw_exception = False
+
+
+ #___________________________________________________________________________
+ #
+ # cleanup
+ #___________________________________________________________________________
+ def tearDown(self):
+ shutil.rmtree(workdir)
+ pass
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ #pipeline_printout(sys.stdout, [test_product_task], verbose = VERBOSITY)
+ unittest.main()
diff --git a/ruffus/test/test_mkdir.py b/ruffus/test/test_mkdir.py
new file mode 100755
index 0000000..b58d3bf
--- /dev/null
+++ b/ruffus/test/test_mkdir.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_mkdir.py
+
+ test product, combine, permute, combine_with_replacement
+
+"""
+
+
+import unittest
+import os
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+from ruffus.ruffus_exceptions import RethrownJobError
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS)
+
+workdir = 'tmp_test_mkdir'
+#sub-1s resolution in system?
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at split(1, [workdir + "/" + prefix + "_name.tmp1" for prefix in "abcd"])
+def generate_initial_files1(in_name, out_names):
+ for on in out_names:
+ with open(on, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# test_product_task
+#___________________________________________________________________________
+ at mkdir(workdir + "/test1")
+ at mkdir(workdir + "/test2")
+ at mkdir(generate_initial_files1, formatter(),
+ ["{path[0]}/{basename[0]}.dir", 3, "{path[0]}/{basename[0]}.dir2"])
+ at transform( generate_initial_files1,
+ formatter(),
+ "{path[0]}/{basename[0]}.dir/{basename[0]}.tmp2")
+def test_transform( infiles, outfile):
+ with open(outfile, "w") as p: pass
+
+
+ at mkdir(workdir + "/test3")
+ at mkdir(generate_initial_files1, formatter(),
+ "{path[0]}/{basename[0]}.dir2")
+def test_transform2():
+ print("Loose cannon!", file=sys.stderr)
+
+
+
+def cleanup_tmpdir():
+ os.system('rm -f %s %s' % (os.path.join(workdir, '*'), RUFFUS_HISTORY_FILE))
+
+
+class Testmkdir(unittest.TestCase):
+ def setUp(self):
+ try:
+ os.mkdir(workdir)
+ except OSError:
+ pass
+
+ #___________________________________________________________________________
+ #
+ # test mkdir() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_mkdir_printout(self):
+ """Input file exists, output doesn't exist"""
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_transform, test_transform2], verbose=5, wrap_width = 10000)
+ #self.assertIn('Job needs update: Missing files '
+ # '[tmp_test_mkdir/a_name.tmp1, '
+ # 'tmp_test_mkdir/e_name.tmp1, '
+ # 'tmp_test_mkdir/h_name.tmp1, '
+ # 'tmp_test_mkdir/a_name.e_name.h_name.tmp2]', s.getvalue())
+
+ def test_mkdir_run(self):
+ """Run mkdir"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_transform, test_transform2], verbose=0, multiprocess = 2)
+
+
+
+ #___________________________________________________________________________
+ #
+ # cleanup
+ #___________________________________________________________________________
+ def tearDown(self):
+ shutil.rmtree(workdir)
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ unittest.main()
diff --git a/ruffus/test/test_pausing.py b/ruffus/test/test_pausing.py
new file mode 100755
index 0000000..250888f
--- /dev/null
+++ b/ruffus/test/test_pausing.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_pausing.py
+
+ test time.sleep keeping input files and output file times correct
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Do not echo to shell but only print to log.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Functions
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import time
+def test_job_io(infiles, outfiles, extra_params):
+ """
+ cat input files content to output files
+ after writing out job parameters
+ """
+ # dump parameters
+ params = (infiles, outfiles) + extra_params
+
+ if isinstance(infiles, str):
+ infiles = [infiles]
+ elif infiles == None:
+ infiles = []
+ if isinstance(outfiles, str):
+ outfiles = [outfiles]
+ output_text = list()
+ for f in infiles:
+ output_text.append(open(f).read())
+ output_text = "".join(sorted(output_text))
+ output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
+ for f in outfiles:
+ with open(f, "w") as ff:
+ ff.write(output_text)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# 1 -> 2 -> 3 ->
+# -> 4 ->
+# 5 -> 6
+#
+
+tempdir = "test_pausing_dir/"
+#
+# task1
+#
+ at files(None, [tempdir + d for d in ('a.1', 'b.1', 'c.1')])
+ at follows(mkdir(tempdir))
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 1 Done\n"))
+def task1(infiles, outfiles, *extra_params):
+ """
+ First task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+#
+# task2
+#
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 2 Done\n"))
+ at follows(task1)
+ at transform(tempdir + "*.1", suffix(".1"), ".2")
+def task2(infiles, outfiles, *extra_params):
+ """
+ Second task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+
+#
+# task3
+#
+ at transform(task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3')
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 3 Done\n"))
+def task3(infiles, outfiles, *extra_params):
+ """
+ Third task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+
+#
+# task4
+#
+ at transform(tempdir + "*.1", suffix(".1"), ".4")
+ at follows(task1)
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 4 Done\n"))
+def task4(infiles, outfiles, *extra_params):
+ """
+ Fourth task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+#
+# task5
+#
+ at files(None, tempdir + 'a.5')
+ at follows(mkdir(tempdir))
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 5 Done\n"))
+def task5(infiles, outfiles, *extra_params):
+ """
+ Fifth task is extra slow
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+#
+# task6
+#
+#@files([[[tempdir + d for d in 'a.3', 'b.3', 'c.3', 'a.4', 'b.4', 'c.4', 'a.5'], tempdir + 'final.6']])
+ at merge([task3, task4, task5], tempdir + "final.6")
+ at follows(task3, task4, task5, )
+ at posttask(lambda: open(tempdir + "task.done", "a").write("Task 6 Done\n"))
+def task6(infiles, outfiles, *extra_params):
+ """
+ final task
+ """
+ open(tempdir + "jobs.start", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+ test_job_io(infiles, outfiles, extra_params)
+ open(tempdir + "jobs.finish", "a").write('job = %s\n' % json.dumps([infiles, outfiles]))
+
+
+
+
+
+def check_job_order_correct(filename):
+ """
+ 1 -> 2 -> 3 ->
+ -> 4 ->
+ 5 -> 6
+ """
+
+ precedence_rules = [[1, 2],
+ [2, 3],
+ [1, 4],
+ [5, 6],
+ [3, 6],
+ [4, 6]]
+
+ index_re = re.compile(r'.*\.([0-9])["\]\n]*$')
+ job_indices = defaultdict(list)
+ for linenum, l in enumerate(open(filename)):
+ m = index_re.search(l)
+ if not m:
+ raise "Non-matching line in [%s]" % filename
+ job_indices[int(m.group(1))].append(linenum)
+
+ for job_index in job_indices:
+ job_indices[job_index].sort()
+
+ for before, after in precedence_rules:
+ if job_indices[before][-1] >= job_indices[after][0]:
+ raise "Precedence violated for job %d [line %d] and job %d [line %d] of [%s]"
+
+
+
+def check_final_output_correct():
+ """
+ check if the final output in final.6 is as expected
+ """
+ expected_output = \
+""" ["DIR/a.1"] -> ["DIR/a.2"]
+ ["DIR/a.1"] -> ["DIR/a.4"]
+ ["DIR/a.2", "DIR/a.1"] -> ["DIR/a.3"]
+ ["DIR/a.3", "DIR/b.3", "DIR/c.3", "DIR/a.4", "DIR/b.4", "DIR/c.4", "DIR/a.5"] -> ["DIR/final.6"]
+ ["DIR/b.1"] -> ["DIR/b.2"]
+ ["DIR/b.1"] -> ["DIR/b.4"]
+ ["DIR/b.2", "DIR/a.1"] -> ["DIR/b.3"]
+ ["DIR/c.1"] -> ["DIR/c.2"]
+ ["DIR/c.1"] -> ["DIR/c.4"]
+ ["DIR/c.2", "DIR/a.1"] -> ["DIR/c.3"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.1", "DIR/b.1", "DIR/c.1"]
+ [] -> ["DIR/a.5"]"""
+ expected_output = expected_output.replace(" ", "").replace("DIR/", tempdir).split("\n")
+ final_6_contents = sorted([l.rstrip() for l in open(tempdir + "final.6", "r").readlines()])
+ if final_6_contents != expected_output:
+ for i, (l1, l2) in enumerate(zip(final_6_contents, expected_output)):
+ if l1 != l2:
+ sys.stderr.write("%d\n >%s<\n >%s<\n" % (i, l1, l2))
+ raise Exception ("Final.6 output is not as expected\n")
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ long_winded=True,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ import os
+ os.system("rm -rf %s" % tempdir)
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
+
+ check_final_output_correct()
+ check_job_order_correct(tempdir + "jobs.start")
+ check_job_order_correct(tempdir + "jobs.finish")
+ os.system("rm -rf %s" % tempdir)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
diff --git a/ruffus/test/test_regex_error_messages.py b/ruffus/test/test_regex_error_messages.py
new file mode 100755
index 0000000..796aa7c
--- /dev/null
+++ b/ruffus/test/test_regex_error_messages.py
@@ -0,0 +1,444 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_regex_error_messages.py
+
+ test product, combine, permute, combine_with_replacement
+
+ Includes code from python.unittest with the following copyright notice:
+
+
+ Copyright (c) 1999-2003 Steve Purcell
+ Copyright (c) 2003-2010 Python Software Foundation
+ This module is free software, and you may redistribute it and/or modify
+ it under the same terms as Python itself, so long as this copyright message
+ and disclaimer are retained in their original form.
+
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+ SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
+ THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ DAMAGE.
+
+ THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+ AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
+ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+"""
+
+
+import unittest
+import os, re
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import time
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+from ruffus.ruffus_exceptions import *
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE)
+
+workdir = 'tmp_test_regex_error_messages'
+#sub-1s resolution in system?
+one_second_per_job = None
+parallelism = 2
+#___________________________________________________________________________
+#
+# generate_initial_files1
+#___________________________________________________________________________
+ at originate([workdir + "/" + prefix + "_name.tmp1" for prefix in "abcdefghi"])
+def generate_initial_files1(out_name):
+ with open(out_name, 'w') as outfile:
+ pass
+
+#___________________________________________________________________________
+#
+# test_regex_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ regex("(.*)/(?P<PREFIX>[abcd])(_name)(.tmp1)"),
+ r"\1/\g<PREFIX>\3.tmp2",# output file
+ r"\2", # extra: prefix = \2
+ r"\g<PREFIX>", # extra: prefix = \2
+ r"\4") # extra: extension
+def test_regex_task(infiles, outfile,
+ prefix1,
+ prefix2,
+ extension):
+ with open(outfile, "w") as p:
+ pass
+
+ if prefix1 != prefix2:
+ raise Exception("Expecting %s == %s" % (prefix1, prefix2))
+
+
+#___________________________________________________________________________
+#
+# test_regex_unmatched_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ regex("(.*)/(?P<PREFIX>[abcd])(_name)(.xxx)"),
+ r"\1/\g<PREFIXA>\3.tmp2",# output file
+ r"\2", # extra: prefix = \2
+ r"\g<PREFIX>", # extra: prefix = \2
+ r"\4") # extra: extension
+def test_regex_unmatched_task(infiles, outfile,
+ prefix1,
+ prefix2,
+ extension):
+ raise Exception("Should blow up first")
+
+
+#___________________________________________________________________________
+#
+# test_suffix_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ suffix(".tmp1"),
+ r".tmp2", # output file
+ r"\1") # extra: basename
+def test_suffix_task(infile, outfile,
+ basename):
+ with open (outfile, "w") as f: pass
+
+
+#___________________________________________________________________________
+#
+# test_suffix_unmatched_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ suffix(".tmp1"),
+ r".tmp2", # output file
+ r"\2") # extra: unknown
+def test_suffix_unmatched_task(infiles, outfile, unknown):
+ raise Exception("Should blow up first")
+
+
+#___________________________________________________________________________
+#
+# test_suffix_unmatched_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ suffix(".tmp2"),
+ r".tmp2") # output file
+def test_suffix_unmatched_task2(infiles, outfile):
+ raise Exception("Should blow up first")
+
+
+
+#___________________________________________________________________________
+#
+# test_product_misspelt_capture_error_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ regex("(.*)/(?P<PREFIX>[abcd])(_name)(.tmp)"),
+ r"\1/\g<PREFIXA>\3.tmp2",# output file
+ r"\2", # extra: prefix = \2
+ r"\g<PREFIX>", # extra: prefix = \2
+ r"\4") # extra: extension
+def test_regex_misspelt_capture_error_task(infiles, outfile,
+ prefix1,
+ prefix2,
+ extension):
+ raise Exception("Should blow up first")
+
+
+#___________________________________________________________________________
+#
+# test_regex_misspelt_capture2_error_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ regex("(.*)/(?P<PREFIX>[abcd])(_name)(.tmp)"),
+ r"\1/\g<PREFIX>\3.tmp2",# output file
+ r"\2", # extra: prefix = \2
+ r"\g<PREFIXA>", # extra: prefix = \2
+ r"\4") # extra: extension
+def test_regex_misspelt_capture2_error_task(infiles, outfile,
+ prefix1,
+ prefix2,
+ extension):
+ raise Exception("Should blow up first")
+
+
+#___________________________________________________________________________
+#
+# test_regex_out_of_range_regex_reference_error_task
+#___________________________________________________________________________
+ at transform(
+ generate_initial_files1,
+ regex("(.*)/(?P<PREFIX>[abcd])(_name)(.tmp)"),
+ r"\1/\g<PREFIX>\5.tmp2",# output file
+ r"\2", # extra: prefix = \2
+ r"\g<PREFIX>", # extra: prefix = \2
+ r"\4") # extra: extension
+def test_regex_out_of_range_regex_reference_error_task(infiles, outfile,
+ prefix1,
+ prefix2,
+ extension):
+ raise Exception("Should blow up first")
+
+
+
+
+
+def cleanup_tmpdir():
+ os.system('rm -f %s %s' % (os.path.join(workdir, '*'), RUFFUS_HISTORY_FILE))
+
+class _AssertRaisesContext_27(object):
+ """A context manager used to implement TestCase.assertRaises* methods.
+ Taken from python unittest2.7
+ """
+
+ def __init__(self, expected, test_case, expected_regexp=None):
+ self.expected = expected
+ self.failureException = test_case.failureException
+ self.expected_regexp = expected_regexp
+
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, tb):
+ if exc_type is None:
+ try:
+ exc_name = self.expected.__name__
+ except AttributeError:
+ exc_name = str(self.expected)
+ raise self.failureException(
+ "{0} not raised".format(exc_name))
+ if not issubclass(exc_type, self.expected):
+ # let unexpected exceptions pass through
+ return False
+ self.exception = exc_value # store for later retrieval
+ if self.expected_regexp is None:
+ return True
+
+ expected_regexp = self.expected_regexp
+ if isinstance(expected_regexp, basestring):
+ expected_regexp = re.compile(expected_regexp)
+ if not expected_regexp.search(str(exc_value)):
+ raise self.failureException('"%s" does not match "%s"' %
+ (expected_regexp.pattern, str(exc_value)))
+ return True
+
+
+class Test_regex_error_messages(unittest.TestCase):
+ def setUp(self):
+ try:
+ os.mkdir(workdir)
+ except OSError:
+ pass
+ if sys.hexversion < 0x03000000:
+ self.assertRaisesRegex = self.assertRaisesRegexp27
+
+ if sys.hexversion < 0x02700000:
+ self.assertIn = self.my_assertIn
+
+ def my_assertIn (self, test_string, full_string):
+ self.assertTrue(test_string in full_string)
+
+
+ #
+ def assertRaisesRegexp27(self, expected_exception, expected_regexp,
+ callable_obj=None, *args, **kwargs):
+ """Asserts that the message in a raised exception matches a regexp.
+
+ Args:
+ expected_exception: Exception class expected to be raised.
+ expected_regexp: Regexp (re pattern object or string) expected
+ to be found in error message.
+ callable_obj: Function to be called.
+ args: Extra args.
+ kwargs: Extra kwargs.
+ """
+ context = _AssertRaisesContext_27(expected_exception, self, expected_regexp)
+ if callable_obj is None:
+ return context
+ with context:
+ callable_obj(*args, **kwargs)
+
+
+
+ #___________________________________________________________________________
+ #
+ # test regex() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_regex_printout(self):
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_regex_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('Missing files\n\s+\[tmp_test_regex_error_messages/a_name.tmp1, tmp_test_regex_error_messages/a_name.tmp2', s.getvalue()))
+
+
+ def test_regex_run(self):
+ """Run transform(...,regex()...)"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_regex_task], verbose=0, multiprocess = parallelism, one_second_per_job = one_second_per_job)
+
+
+ #___________________________________________________________________________
+ #
+ # test regex() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_regex_unmatched_printout(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ pipeline_printout(s, [test_regex_unmatched_task], verbose=5, wrap_width = 10000)
+ self.assertIn("Warning: File match failure: File 'tmp_test_regex_error_messages/a_name.tmp1' does not match regex", s.getvalue())
+
+ def test_regex_unmatched_run(self):
+ """Run transform(...,regex()...)"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_regex_unmatched_task], verbose=0, multiprocess = parallelism, one_second_per_job = one_second_per_job)
+
+
+ #___________________________________________________________________________
+ #
+ # test suffix() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_suffix_printout(self):
+ cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [test_suffix_task], verbose=5, wrap_width = 10000)
+ self.assertTrue(re.search('Missing files\n\s+\[tmp_test_regex_error_messages/a_name.tmp1, tmp_test_regex_error_messages/a_name.tmp2', s.getvalue()))
+
+ def test_suffix_run(self):
+ """Run transform(...,suffix()...)"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_suffix_task], verbose=0, multiprocess = parallelism, one_second_per_job = one_second_per_job)
+
+
+ #___________________________________________________________________________
+ #
+ # test suffix() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_suffix_unmatched(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ self.assertRaisesRegex(fatal_error_input_file_does_not_match,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*invalid group reference",
+ pipeline_printout,
+ s, [test_suffix_unmatched_task],
+ verbose = 3)
+ self.assertRaisesRegex(RethrownJobError,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*invalid group reference",
+ pipeline_run,
+ [test_suffix_unmatched_task], verbose = 0, multiprocess = parallelism)
+
+
+ #___________________________________________________________________________
+ #
+ # test suffix() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_suffix_unmatched_printout2(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ pipeline_printout(s, [test_suffix_unmatched_task2], verbose=5, wrap_width = 10000)
+ self.assertIn("Warning: File match failure: File 'tmp_test_regex_error_messages/a_name.tmp1' does not match suffix", s.getvalue())
+
+ def test_suffix_unmatched_run2(self):
+ """Run transform(...,suffix()...)"""
+ # output is up to date, but function body changed (e.g., source different)
+ cleanup_tmpdir()
+ pipeline_run([test_suffix_unmatched_task2], verbose=0, multiprocess = parallelism, one_second_per_job = one_second_per_job)
+
+
+
+ #___________________________________________________________________________
+ #
+ # test regex() errors: func pipeline_printout
+ #___________________________________________________________________________
+ def test_regex_misspelt_capture_error(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ self.assertRaisesRegex(fatal_error_input_file_does_not_match,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*unknown group name",
+ pipeline_printout,
+ s, [test_regex_misspelt_capture_error_task],
+ verbose = 3)
+ self.assertRaisesRegex(RethrownJobError,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*unknown group name",
+ pipeline_run,
+ [test_regex_misspelt_capture_error_task], verbose = 0)
+
+ #___________________________________________________________________________
+ #
+ # test regex() errors: func pipeline_printout
+ #___________________________________________________________________________
+ def test_regex_misspelt_capture2_error(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ self.assertRaisesRegex(fatal_error_input_file_does_not_match,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*unknown group name",
+ pipeline_printout,
+ s, [test_regex_misspelt_capture2_error_task],
+ verbose = 3)
+ self.assertRaisesRegex(RethrownJobError,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*unknown group name",
+ pipeline_run,
+ [test_regex_misspelt_capture2_error_task], verbose = 0, multiprocess = parallelism)
+
+
+ #___________________________________________________________________________
+ #
+ # test regex() errors: func pipeline_printout
+ #___________________________________________________________________________
+ def test_regex_out_of_range_regex_reference_error_printout(self):
+ cleanup_tmpdir()
+ s = StringIO()
+ self.assertRaisesRegex(fatal_error_input_file_does_not_match,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*invalid group reference",
+ pipeline_printout,
+ s, [test_regex_out_of_range_regex_reference_error_task],
+ verbose = 3)
+ self.assertRaisesRegex(RethrownJobError,
+ "File '.*?' does not match regex\('.*?'\) and pattern '.*?':\n.*invalid group reference",
+ pipeline_run,
+ [test_regex_out_of_range_regex_reference_error_task], verbose = 0, multiprocess = parallelism)
+
+
+ #___________________________________________________________________________
+ #
+ # cleanup
+ #___________________________________________________________________________
+ def tearDown(self):
+ pass
+ shutil.rmtree(workdir)
+
+
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ #pipeline_printout(sys.stdout, [test_product_task], verbose = 3)
+ parallelism = 1
+ suite = unittest.TestLoader().loadTestsFromTestCase(Test_regex_error_messages)
+ unittest.TextTestRunner(verbosity=2).run(suite)
+ parallelism = 2
+ suite = unittest.TestLoader().loadTestsFromTestCase(Test_regex_error_messages)
+ unittest.TextTestRunner(verbosity=2).run(suite)
+ #unittest.main()
+
diff --git a/ruffus/test/test_ruffus_utility.py b/ruffus/test/test_ruffus_utility.py
new file mode 100755
index 0000000..eac816f
--- /dev/null
+++ b/ruffus/test/test_ruffus_utility.py
@@ -0,0 +1,965 @@
+#!/usr/bin/env python
+from __future__ import print_function
+################################################################################
+#
+# test_ruffus_utility.py
+#
+#
+# Copyright (c) 2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+
+"""
+ test_ruffus_utility.py
+"""
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+import unittest, os,sys
+if __name__ != '__main__':
+ raise Exception ("This is not a callable module [%s]" % __main__)
+
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus.ruffus_utility import *
+
+
+os.chdir(exe_path)
+
+import unittest, time
+
+#_________________________________________________________________________________________
+
+# get_nested_tasks_or_globs
+
+#_________________________________________________________________________________________
+class Test_get_nested_tasks_or_globs(unittest.TestCase):
+ def setUp(self):
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ os.chdir(exe_path)
+
+ # self.assertEqual(self.seq, range(10))
+ # self.assertTrue(element in self.seq)
+ # self.assertRaises(ValueError, random.sample, self.seq, 20)
+
+ def check_equal (self, a,b):
+ self.assertEqual(get_nested_tasks_or_globs(a), b)
+
+ def test_get_nested_tasks_or_globs(self):
+
+ #
+ # test strings
+ #
+ self.check_equal("test", (set(), set(), set()))
+ self.check_equal([("test1",), "test2", 3], (set(), set(), set()))
+
+ #
+ # test missing
+ #
+ self.check_equal((1,3, [5]), (set(), set(), set()))
+ self.check_equal(None, (set(), set(), set()))
+
+ #
+ # test glob
+ #
+ self.check_equal([("test1.*",), "test?2", 3], (set(), set(['test1.*', 'test?2']), set()))
+
+ #
+ # test glob and string
+ #
+ self.check_equal([("test*1",), (("test3",),),"test2", 3], (set(), set(['test*1']), set()))
+
+ #
+ # test task function
+ #
+ self.check_equal(is_glob, (set([is_glob]), set([]), set()))
+ self.check_equal([is_glob, [1, "this", ["that*", 5]], [(get_strings_in_nested_sequence,)]], (
+ set([is_glob, get_strings_in_nested_sequence]), set(["that*"]), set()))
+ #
+ # test wrapper
+ #
+ self.check_equal(output_from(is_glob, ["what", 7], 5), (set([is_glob, "what"]), set([]), set()))
+
+#_________________________________________________________________________________________
+
+# replace_func_names_with_tasks
+
+#_________________________________________________________________________________________
+class Test_replace_func_names_with_tasks(unittest.TestCase):
+ def setUp(self):
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ os.chdir(exe_path)
+
+ # self.assertEqual(self.seq, range(10))
+ # self.assertTrue(element in self.seq)
+ # self.assertRaises(ValueError, random.sample, self.seq, 20)
+
+ def check_equal (self, a,b, d):
+ self.assertEqual(replace_func_names_with_tasks(a, d), b)
+
+ def test_replace_func_names_with_tasks(self):
+ func_or_name_to_task = {is_glob: "FF is_glob", "what" : "FF what", get_strings_in_nested_sequence: "FF get_strings_in_nested_sequence"}
+
+
+ #
+ # test strings
+ #
+ self.check_equal("test", "test", func_or_name_to_task)
+ self.check_equal( [("test1",), "test2", 3],
+ [("test1",), "test2", 3],
+ func_or_name_to_task)
+
+ #
+ # test missing
+ #
+ self.check_equal((1,3, [5]), (1,3, [5]), func_or_name_to_task)
+ self.check_equal(None, None, func_or_name_to_task)
+
+
+
+ #
+ # test task function
+ #
+ self.check_equal(is_glob, "FF is_glob", func_or_name_to_task)
+ self.check_equal([is_glob, [1, "this", ["that*", 5]], [(get_strings_in_nested_sequence,)]],
+ ["FF is_glob", [1, "this", ["that*", 5]], [("FF get_strings_in_nested_sequence",)]],
+ func_or_name_to_task)
+ #
+ # test wrapper
+ #
+ self.check_equal(output_from(is_glob, ["what", 7], 5),
+ ["FF is_glob", ["FF what", 7], 5],
+ func_or_name_to_task)
+ self.check_equal(output_from(is_glob),
+ "FF is_glob",
+ func_or_name_to_task)
+
+ self.check_equal([1, output_from(is_glob, ["what", 7], 5)],
+ [1, "FF is_glob", ["FF what", 7], 5],
+ func_or_name_to_task)
+
+ self.check_equal([1, output_from(is_glob), ["what", 7], 5],
+ [1, "FF is_glob", ["what", 7], 5],
+ func_or_name_to_task)
+
+
+
+#_________________________________________________________________________________________
+
+# non_str_sequence
+
+#_________________________________________________________________________________________
+class Test_non_str_sequence(unittest.TestCase):
+
+ def test_non_str_sequence (self):
+ """
+ non_str_sequence()
+ """
+ test_str1 = "asfas"
+ class inherited_str (str):
+ #
+ # use __new__ instead of init because str is immutable
+ #
+ def __new__( cls, a):
+ obj = super( inherited_str, cls).__new__( inherited_str, a )
+ return obj
+
+ test_str2 = inherited_str("test")
+ class inherited_list (list):
+ def __init__ (self, *param):
+ list.__init__(self, *param)
+ test_str3 = list(test_str1)
+ test_str4 = inherited_list(test_str2)
+ self.assertTrue(not non_str_sequence(test_str1))
+ self.assertTrue(not non_str_sequence(test_str2))
+ self.assertTrue(non_str_sequence(test_str3))
+ self.assertTrue(non_str_sequence(test_str4))
+
+#_________________________________________________________________________________________
+
+# get_strings_in_nested_sequence
+
+#_________________________________________________________________________________________
+class Test_get_strings_in_nested_sequence(unittest.TestCase):
+
+ def test_get_strings_in_nested_sequence (self):
+ """
+ get_strings_in_nested_sequence()
+ """
+ class inherited_str (str):
+ #
+ # use __new__ instead of init because str is immutable
+ #
+ def __new__( cls, a):
+ obj = super( inherited_str, cls).__new__( inherited_str, a )
+ return obj
+
+ class inherited_list (list):
+ def __init__ (self, *param):
+ list.__init__(self, *param)
+
+ self.assertEqual(get_strings_in_nested_sequence("one"), ["one"])
+ self.assertEqual(get_strings_in_nested_sequence(["one", "two"]), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence(["one", 1, "two"]), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence(["one", [1, ["two"]]]), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence([inherited_str("one"), [1, ["two"]]]), [inherited_str("one"), "two"])
+ self.assertEqual(get_strings_in_nested_sequence(inherited_list([inherited_str("one"), [1, ["two"]]])),
+ inherited_list([inherited_str("one"), "two"]))
+
+
+#_________________________________________________________________________________________
+
+# get_first_strings_in_nested_sequence
+
+#_________________________________________________________________________________________
+class Test_get_first_strings_in_nested_sequence(unittest.TestCase):
+
+ def test_get_first_strings_in_nested_sequence (self):
+ """
+ get_first_strings_in_nested_sequence()
+ """
+ class inherited_str (str):
+ #
+ # use __new__ instead of init because str is immutable
+ #
+ def __new__( cls, a):
+ obj = super( inherited_str, cls).__new__( inherited_str, a )
+ return obj
+
+ class inherited_list (list):
+ def __init__ (self, *param):
+ list.__init__(self, *param)
+
+ self.assertEqual(get_strings_in_nested_sequence("one", True), ["one"])
+ self.assertEqual(get_strings_in_nested_sequence(["one", "two"], True), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence(["one", 1, "two"], True), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence(["one", [1, ["two"]]], True), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence([inherited_str("one"), [1, ["two"]]], True), [inherited_str("one"), "two"])
+ self.assertEqual(get_strings_in_nested_sequence(inherited_list([inherited_str("one"), [1, ["two"]]]), True),
+ inherited_list([inherited_str("one"), "two"]))
+ self.assertEqual(get_strings_in_nested_sequence(["one", [1, ["two"], "three"]], True), ["one", "two"])
+ d = {"four" :4}
+ self.assertEqual(get_strings_in_nested_sequence(["one", [1, [d, "two"], "three"]], True), ["one", "two"])
+ self.assertEqual(get_strings_in_nested_sequence(None, True), [])
+ self.assertEqual(get_strings_in_nested_sequence([], True), [])
+ self.assertEqual(get_strings_in_nested_sequence([1,2,3, d], True), [])
+
+
+#_________________________________________________________________________________________
+
+# Test_compile_regex
+
+#_________________________________________________________________________________________
+class Test_compile_regex (unittest.TestCase):
+ def test_compile_regex (self):
+ compile_regex("Dummy Task", regex(".*"), Exception, "test1")
+
+ # bad regex
+ self.assertRaises(Exception, compile_regex, "Dummy Task", regex(".*)"), Exception, "test1")
+ try:
+ compile_regex("Dummy Task", regex(".*)"), Exception, "test1")
+ except Exception as e:
+ self.assertEqual(e.args, ('Dummy Task', "test1: regular expression regex('.*)') is malformed\n[sre_constants.error: (unbalanced parenthesis)]"))
+
+ # bad number of items regex
+ self.assertRaises(Exception, compile_regex, "Dummy Task", regex(".*", "o"), Exception, "test1")
+ try:
+ compile_regex("Dummy Task", regex(".*", "o"), Exception, "test1")
+ except Exception as e:
+ self.assertEqual(e.args, ('Dummy Task', "test1: regex('.*', 'o') is malformed\nregex(...) should only be used to wrap a single regular expression string"))
+
+ # 0 number of items regex
+ self.assertRaises(Exception, compile_regex, "Dummy Task", regex(), Exception, "test1")
+ try:
+ compile_regex("Dummy Task", regex(), Exception, "test1")
+ except Exception as e:
+ self.assertEqual(e.args, ('Dummy Task', 'test1: regex() is malformed\nregex(...) should only be used to wrap a single regular expression string'))
+
+ # bad number of items suffix
+ self.assertRaises(Exception, compile_suffix, "Dummy Task", suffix(".*", "o"), Exception, "test1")
+ try:
+ compile_suffix("Dummy Task", suffix(".*", "o"), Exception, "test1")
+ except Exception as e:
+ self.assertEqual(e.args, ('Dummy Task', "test1: suffix('('.*', 'o')') is malformed.\nsuffix(...) should only be used to wrap a single string matching the suffices of file names"))
+
+ # 0 number of items suffix
+ self.assertRaises(Exception, compile_suffix, "Dummy Task", suffix(), Exception, "test1")
+ try:
+ compile_suffix("Dummy Task", suffix(), Exception, "test1")
+ except Exception as e:
+ self.assertEqual(e.args, ('Dummy Task', 'test1: suffix() is malformed.\nsuffix(...) should be used to wrap a string matching the suffices of file names'))
+
+#_________________________________________________________________________________________
+
+# Test_check_files_io_parameters
+
+#_________________________________________________________________________________________
+class Test_check_files_io_parameters (unittest.TestCase):
+ def test_check_files_io_parameters(self):
+
+
+ class t_fake_task(object):
+ def __init__ (self):
+ self._action_type = None
+ self._name = "fake task"
+ fake_task = t_fake_task()
+
+ single_job_params = [["input", "output"]]
+ multiple_job_params = [["input1", "output1"], ["input2", "output2"]]
+
+ check_files_io_parameters (fake_task, single_job_params, error_task_files)
+ check_files_io_parameters (fake_task, multiple_job_params, error_task_files)
+
+
+ #Bad format
+ bad_single_job_params = ["input", "output"]
+ self.assertRaises(error_task_files, check_files_io_parameters, fake_task, bad_single_job_params, error_task_files)
+
+ #Missing output files for job
+ bad_multiple_job_params = [["input1", "output1"], ["input2"]]
+ self.assertRaises(error_task_files, check_files_io_parameters, fake_task, bad_multiple_job_params, error_task_files)
+
+ #Missing input files for job
+ bad_multiple_job_params = [["input1", "output1"], []]
+ self.assertRaises(error_task_files, check_files_io_parameters, fake_task, bad_multiple_job_params, error_task_files)
+
+ #Input or output file parameters should contain at least one or more file names strings
+ #bad_multiple_job_params = [[1, 2]]
+ #self.assertRaises(error_task_files, check_files_io_parameters, fake_task, bad_multiple_job_params, error_task_files)
+
+#_________________________________________________________________________________________
+
+# Test_get_first_string_in_nested_sequence
+
+#_________________________________________________________________________________________
+class Test_get_first_string_in_nested_sequence (unittest.TestCase):
+ def test_get_first_string_in_nested_sequence(self):
+
+ self.assertEqual(get_first_string_in_nested_sequence("a") , "a")
+ self.assertEqual(get_first_string_in_nested_sequence(None) , None)
+ self.assertEqual(get_first_string_in_nested_sequence(1) , None)
+ self.assertEqual(get_first_string_in_nested_sequence((1,2)) , None)
+ self.assertEqual(get_first_string_in_nested_sequence((1,2, "a")) , "a")
+ self.assertEqual(get_first_string_in_nested_sequence((1,2, "a")) , "a")
+ self.assertEqual(get_first_string_in_nested_sequence((1,[2,"b"], "a")) , "b")
+ self.assertEqual(get_first_string_in_nested_sequence((1,set([2,"b"]), "a")) , "b")
+
+#_________________________________________________________________________________________
+
+# Test_check_parallel_parameters
+
+#_________________________________________________________________________________________
+class Test_check_parallel_parameters (unittest.TestCase):
+ def test_check_parallel_parameters(self):
+
+
+ class t_fake_task(object):
+ def __init__ (self):
+ self._action_type = None
+ self._name = "fake task"
+ fake_task = t_fake_task()
+
+ single_job_params = [["input", "output"]]
+ multiple_job_params = [["input1", "output1"], ["input2", "output2"]]
+
+ check_parallel_parameters (fake_task, single_job_params, error_task_files)
+ check_parallel_parameters (fake_task, multiple_job_params, error_task_files)
+
+
+ #Bad format
+ bad_single_job_params = ["input", "output"]
+ self.assertRaises(error_task_parallel, check_parallel_parameters, fake_task, bad_single_job_params, error_task_parallel)
+
+#_________________________________________________________________________________________
+
+# expand_nested_tasks_or_globs
+
+#_________________________________________________________________________________________
+class Test_expand_nested_tasks_or_globs(unittest.TestCase):
+ def setUp(self):
+ exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ os.chdir(exe_path)
+ t1 = task._task("module", "func1");
+ t2 = task._task("module", "func2");
+ t3 = task._task("module", "func3");
+ self.tasks = [t1, t2, t3]
+
+ # self.assertEqual(self.seq, range(10))
+ # self.assertTrue(element in self.seq)
+ # self.assertRaises(ValueError, random.sample, self.seq, 20)
+
+ def check_equal (self, a,b):
+
+ tasks, globs, runtime_data_names = get_nested_tasks_or_globs(a)
+ func_or_name_to_task = dict(list(zip((non_str_sequence, get_strings_in_nested_sequence, "what"), self.tasks)))
+
+ task_or_glob_to_files = dict()
+ #for f in func_or_name_to_task:
+ # print f, task_or_glob_to_files[func_or_name_to_task[f]]
+
+ task_or_glob_to_files[self.tasks[0] ] = ["t1a", "t1b"] # non_str_sequence
+ task_or_glob_to_files[self.tasks[1] ] = ["t2"] # get_strings_in_nested_sequence
+ task_or_glob_to_files[self.tasks[2] ] = ["t3"] # "what"
+ task_or_glob_to_files["that*" ] = ["that1", "that2"]
+ task_or_glob_to_files["test*1" ] = ["test11","test21"]
+ task_or_glob_to_files["test1.*"] = ["test1.1", "test1.2"]
+ task_or_glob_to_files["test?2" ] = ["test12"]
+
+
+ param_a = replace_func_names_with_tasks(a, func_or_name_to_task)
+ self.assertEqual(expand_nested_tasks_or_globs(param_a, task_or_glob_to_files), b)
+
+ def test_expand_nested_tasks_or_globs(self):
+
+ #
+ # test strings
+ #
+ self.check_equal("test", "test")
+ self.check_equal([("test1",), "test2", 3], [("test1",), "test2", 3])
+
+ #
+ # test missing
+ #
+ self.check_equal(None, None)
+
+ #
+ # test glob
+ #
+ self.check_equal([("test1.*",), "test?2", 3],
+ [("test1.1","test1.2"), "test12", 3])
+ self.check_equal(["test1.*", "test?2", 3],
+ ["test1.1","test1.2", "test12", 3])
+
+ #
+ # test glob and string
+ #
+ self.check_equal([("test*1",), (("test3",),),"test2", 3],
+ [("test11","test21"), (("test3",),),"test2", 3])
+
+ #
+ # test task function
+ #
+ self.check_equal(non_str_sequence, ["t1a", "t1b"])
+ self.check_equal(get_strings_in_nested_sequence, ["t2"])
+ self.check_equal([get_strings_in_nested_sequence, non_str_sequence], ["t2", "t1a", "t1b"])
+ self.check_equal([non_str_sequence, [1, "this", ["that*", 5]], [(get_strings_in_nested_sequence,)]],
+ ['t1a', 't1b', [1, 'this', ['that1', 'that2', 5]], [('t2',)]])
+ #
+ # test wrapper
+ #
+ self.check_equal(output_from(non_str_sequence, ["what", 7], 5),
+ ['t1a', 't1b', ['t3', 7], 5])
+#
+#
+#_________________________________________________________________________________________
+
+# Test_regex_replace
+
+#_________________________________________________________________________________________
+class Test_regex_replace (unittest.TestCase):
+ def helper (self, data, result):
+ regex_str = "([a-z]+)\.([a-z]+)\.([a-z]+)\.([a-z]+)"
+ try_result = regex_replace("aaa.bbb.ccc.aaa",
+ regex_str,
+ re.compile(regex_str),
+ data)
+ self.assertEqual(try_result , result)
+
+ def test_regex_replace(self):
+ self.helper(r"\3.\2.\1", "ccc.bbb.aaa")
+ self.helper(None, None)
+ self.helper(1, 1)
+ self.helper([r"\3.\2.\1", 1], ["ccc.bbb.aaa", 1])
+ # note set is constructed with substituted results!
+ self.helper([r"\3.\2.\1", 1, (set([r"\1\2", r"\4\2", "aaabbb"]), "whatever", {1:2, 3:4})],
+ ['ccc.bbb.aaa', 1, (set(['aaabbb']), 'whatever', {1: 2, 3: 4})])
+
+
+
+
+#_________________________________________________________________________________________
+
+# Test_path_decomposition
+
+#_________________________________________________________________________________________
+class Test_path_decomposition (unittest.TestCase):
+ def helper (self, test_path, expected_result):
+ try_result = path_decomposition(test_path)
+ self.assertEqual(try_result , expected_result)
+
+ def test_path_decomposition(self):
+ # normal path
+ self.helper("/a/b/c/d/filename.txt",
+ { 'basename': 'filename',
+ 'ext': '.txt',
+ 'subpath': ['/a/b/c/d', '/a/b/c', '/a/b', '/a', '/'],
+ 'subdir': ['d', 'c', 'b', 'a', '/'],
+ 'path': '/a/b/c/d'
+ })
+ # double slash
+ self.helper("//a/filename.txt",
+ { 'basename': 'filename',
+ 'ext': '.txt',
+ 'subpath': ['//a', '//'],
+ 'path': '//a',
+ 'subdir': ['a', '//']
+ })
+ # test no path
+ self.helper("filename.txt",
+ { 'basename': 'filename',
+ 'ext': '.txt',
+ 'subpath': [],
+ 'path': '',
+ 'subdir': []
+ })
+ # root
+ self.helper("/filename.txt",
+ { 'basename': 'filename',
+ 'ext': '.txt',
+ 'path': '/',
+ 'subpath': ['/'],
+ 'subdir': ['/']
+ })
+ # unrooted
+ self.helper("a/b/filename.txt",
+ { 'basename': 'filename',
+ 'ext': '.txt',
+ 'subpath': ['a/b', 'a'],
+ 'path': 'a/b',
+ 'subdir': ['b', 'a']
+ })
+ # glob
+ self.helper("/a/b/*.txt",
+ { 'basename': '*',
+ 'ext': '.txt',
+ 'path': '/a/b',
+ 'subpath': ['/a/b', '/a', '/'],
+ 'subdir': ['b', 'a', '/']
+ })
+ # no basename
+ # extention becomes basename
+ self.helper("/a/b/.txt",
+ { 'basename': '.txt',
+ 'ext': '',
+ 'path': '/a/b',
+ 'subpath': ['/a/b', '/a', '/'],
+ 'subdir': ['b', 'a', '/']
+ })
+ # no ext
+ self.helper("/a/b/filename",
+ { 'basename': 'filename',
+ 'ext': '',
+ 'path': '/a/b',
+ 'subpath': ['/a/b', '/a', '/'],
+ 'subdir': ['b', 'a', '/']
+ })
+ # empty ext
+ self.helper("/a/b/filename.",
+ { 'basename': 'filename',
+ 'ext': '.',
+ 'path': '/a/b',
+ 'subpath': ['/a/b', '/a', '/'],
+ 'subdir': ['b', 'a', '/']
+ })
+ # only path
+ self.helper("/a/b/",
+ { 'basename': '',
+ 'ext': '',
+ 'path': '/a/b',
+ 'subpath': ['/a/b', '/a', '/'],
+ 'subdir': ['b', 'a', '/']
+ })
+
+class Test_apply_func_to_sequence (unittest.TestCase):
+ def helper (self, test_seq, func, tuple_of_conforming_types, expected_result):
+ try_result = apply_func_to_sequence(test_seq, func, tuple_of_conforming_types)
+ self.assertEqual(try_result, expected_result)
+
+ def test_apply_func_to_sequence(self):
+
+ self.helper([
+ ["saf", "sdfasf",1],
+ 2,
+ set([2,"odd"]),
+ {1:2},
+ [
+ ["sadf",3]
+ ]
+ ],
+ len, (str,),
+ [
+ [3, 6, 1],
+ 2,
+ set([2, 3]),
+ {1: 2},
+ [
+ [4, 3]
+ ]
+ ])
+
+#_________________________________________________________________________________________
+
+# Test_parameter_list_as_string
+
+#_________________________________________________________________________________________
+class Test_parameter_list_as_string (unittest.TestCase):
+
+ def test_conversion(self):
+
+ self.assertEqual(parameter_list_as_string([1,2,3]),
+ '1, 2, 3')
+ self.assertEqual(parameter_list_as_string([1,"2",3]),
+ "1, '2', 3")
+ self.assertEqual(parameter_list_as_string([1,None,3]),
+ '1, None, 3')
+ self.assertEqual(parameter_list_as_string([1,[2,3],3]),
+ '1, [2, 3], 3')
+ self.assertEqual(parameter_list_as_string([1,[],3]),
+ '1, [], 3')
+ self.assertEqual( parameter_list_as_string(None),
+ '')
+ self.assertRaises(TypeError, parameter_list_as_string)
+
+#
+#_________________________________________________________________________________________
+
+# Test_regex_match_str
+
+#_________________________________________________________________________________________
+class Test_regex_match_str (unittest.TestCase):
+
+ def test_matches(self):
+
+ # first string named and unamed captures, second string no captures
+ test_str_list = ["aaa.bbb.ccc", "ddd.eee.fff"]
+ compiled_regexes = ["aaa.(b+).(?P<CCC>c+)", "ddd.eee.fff"]
+ results = [{0: 'aaa.bbb.ccc', 1: 'bbb', 2: 'ccc', 'CCC': 'ccc'}, {0: 'ddd.eee.fff'}]
+ for ss, rr, result in zip(test_str_list, compiled_regexes, results):
+ self.assertEqual(regex_match_str(ss, rr), result)
+
+ # first string named and unamed captures, second string unnamed captures
+ compiled_regexes = ["aaa.(b+).(?P<CCC>c+)", ".+(f)"]
+ results = [{0: 'aaa.bbb.ccc', 1: 'bbb', 2: 'ccc', 'CCC': 'ccc'}, {0: 'ddd.eee.fff', 1: 'f'}]
+ for ss, rr, result in zip(test_str_list, compiled_regexes, results):
+ self.assertEqual(regex_match_str(ss, rr), result)
+
+ # first string named and unamed captures, second string no capture
+ compiled_regexes = ["aaa.(b+).(?P<CCC>c+)", ".+"]
+ results = [{0: 'aaa.bbb.ccc', 1: 'bbb', 2: 'ccc', 'CCC': 'ccc'}, {0: 'ddd.eee.fff'}]
+ for ss, rr, result in zip(test_str_list, compiled_regexes, results):
+ self.assertEqual(regex_match_str(ss, rr), result)
+
+ # first string named and unamed captures, second string None
+ compiled_regexes = ["aaa.(b+).(?P<CCC>c+)", None]
+ results = [{0: 'aaa.bbb.ccc', 1: 'bbb', 2: 'ccc', 'CCC': 'ccc'}, None]
+ for ss, rr, result in zip(test_str_list, compiled_regexes, results):
+ self.assertEqual(regex_match_str(ss, rr), result)
+
+ # Both None
+ compiled_regexes = []
+ results = [None, None]
+ for ss, rr, result in zip(test_str_list, compiled_regexes, results):
+ self.assertEqual(regex_match_str(ss, rr), result)
+
+ # first string named and unamed captures, second string Failed
+ compiled_regexes = ["aaa.(b+).(?P<CCC>c+)", "PP"]
+ results = [{0: 'aaa.bbb.ccc', 1: 'bbb', 2: 'ccc', 'CCC': 'ccc'}, False]
+ for ss, rr, result in zip(test_str_list, compiled_regexes, results):
+ self.assertEqual(regex_match_str(ss, rr), result)
+
+
+ # first string named and unamed captures, second parameter number not string
+ self.assertRaises(Exception, regex_match_str, test_str_list[0], 6)
+
+
+
+#_________________________________________________________________________________________
+
+# Test_path_decomposition
+
+#_________________________________________________________________________________________
+class Test_get_all_paths_components (unittest.TestCase):
+ def helper (self, test_paths, regex_str, expected_result):
+ try_result = get_all_paths_components(test_paths, regex_str)
+ self.assertEqual(try_result, expected_result)
+
+ def test_get_all_paths_components(self):
+ # no regex
+ self.helper(["/a/b/c/sample1.bam"], None,
+ [
+ {'basename': 'sample1',
+ 'ext': '.bam',
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'subdir': ['c', 'b', 'a', '/']
+ }
+ ])
+
+ # regex
+ self.helper(["/a/b/c/sample1.bam"], [r"(.*)(?P<id>\d+)\..+"],
+ [
+ {
+ 0: '/a/b/c/sample1.bam',
+ 1: '/a/b/c/sample',
+ 2: '1',
+ 'id': '1',
+ 'basename': 'sample1',
+ 'ext': '.bam',
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'subdir': ['c', 'b', 'a', '/']
+ }
+ ])
+ # nameclash
+ # "basename" overridden by named regular expression capture group
+ self.helper(["/a/b/c/sample1.bam"], [r"(.*)(?P<basename>\d+)\..+"],
+ [
+ {
+ 0: '/a/b/c/sample1.bam',
+ 1: '/a/b/c/sample',
+ 2: '1',
+ 'basename': '1',
+ 'ext': '.bam',
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'subdir': ['c', 'b', 'a', '/']
+ }
+ ])
+
+ # empty path
+ self.helper([""],[r"(.*)(?P<basename>\d+)\..+"], [{}])
+ self.helper([""],[], [{'path': [], 'basename': '', 'ext': '', 'subdir': []}])
+ # not matching regular expression
+ self.helper(["/a/b/c/nonumber.txt"],[r"(.*)(?P<id>\d+)\..+"], [{}])
+ # multiple paths
+ self.helper(["/a/b/c/sample1.bam",
+ "dbsnp15.vcf",
+ "/test.txt"] ,
+ [
+ r"(.*)(?P<id>\d+)\..+",
+ r"(.*)(?P<id>\d+)\..+",
+ r"(.*)(?P<id>\d+)\..+"],
+ [ {
+ 0: '/a/b/c/sample1.bam', # captured by index
+ 1: '/a/b/c/sample', # captured by index
+ 2: '1', # captured by index
+ 'id': '1', # captured by name
+ 'ext': '.bam',
+ 'subdir': ['c', 'b', 'a', '/'],
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'basename': 'sample1',
+ },
+ {
+ 0: 'dbsnp15.vcf', # captured by index
+ 1: 'dbsnp1', # captured by index
+ 2: '5', # captured by index
+ 'id': '5', # captured by name
+ 'ext': '.vcf',
+ 'subdir': [],
+ 'subpath': [],
+ 'path': '',
+ 'basename': 'dbsnp15',
+ },
+
+ # no regular expression match
+ {}
+ ])
+
+
+
+ #_________________________________________________________________________________________
+
+ # Test_path_decomposition
+
+ #_________________________________________________________________________________________
+ class Test_get_all_paths_components (unittest.TestCase):
+ def helper (self, test_paths, regex_str, expected_result):
+ try_result = get_all_paths_components(test_paths, regex_str)
+ self.assertEqual(try_result, expected_result)
+
+ def test_get_all_paths_components(self):
+ # no regex
+ self.helper(["/a/b/c/sample1.bam"], None,
+ [
+ {'basename': 'sample1',
+ 'ext': '.bam',
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'subdir': ['c', 'b', 'a', '/']
+ }
+ ])
+
+ # regex
+ self.helper(["/a/b/c/sample1.bam"],
+ [r"(.*)(?P<id>\d+)\..+"],
+ [
+ {
+ 0: '/a/b/c/sample1.bam',
+ 1: '/a/b/c/sample',
+ 2: '1',
+ 'id': '1',
+ 'basename': 'sample1',
+ 'ext': '.bam',
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'subdir': ['c', 'b', 'a', '/']
+ }
+ ])
+ # nameclash
+ # "basename" overridden by named regular expression capture group
+ self.helper(["/a/b/c/sample1.bam"],[r"(.*)(?P<basename>\d+)\..+"],
+ [
+ {
+ 0: '/a/b/c/sample1.bam',
+ 1: '/a/b/c/sample',
+ 'basename': '1',
+ 'ext': '.bam',
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'subdir': ['c', 'b', 'a', '/']
+ }
+ ])
+
+ # empty path
+ self.helper([""],[r"(.*)(?P<basename>\d+)\..+"], [{}])
+ # not matching regular expression
+ self.helper(["/a/b/c/nonumber.txt"], [r"(.*)(?P<id>\d+)\..+"], [{}])
+ # multiple paths
+ self.helper(["/a/b/c/sample1.bam",
+ "dbsnp15.vcf",
+ "/test.txt"] , [r"(.*)(?P<id>\d+)\..+"],
+ [ {
+ 0: '/a/b/c/sample1.bam', # captured by index
+ 1: '/a/b/c/sample', # captured by index
+ 2: '1', # captured by index
+ 'id': '1', # captured by name
+ 'ext': '.bam',
+ 'subdir': ['c', 'b', 'a', '/'],
+ 'subpath': ['/a/b/c', '/a/b', '/a', '/'],
+ 'path': '/a/b/c',
+ 'basename': 'sample1',
+ },
+ {
+ 0: 'dbsnp15.vcf', # captured by index
+ 1: 'dbsnp1', # captured by index
+ 2: '5', # captured by index
+ 'id': '5', # captured by name
+ 'ext': '.vcf',
+ 'subdir': [],
+ 'subpath': [],
+ 'path': '',
+ 'basename': 'dbsnp15',
+ },
+
+ # no regular expression match
+ # everything fails!
+ {
+ }
+ ])
+
+
+#
+#_________________________________________________________________________________________
+
+# Test_swap_nesting_order
+
+#_________________________________________________________________________________________
+class Test_swap_nesting_order (unittest.TestCase):
+
+ def test_swap_nesting_order(self):
+ orig_data = [
+ {'a':1, 'b':2},
+ {'a':3, 'b': 4, 'c':5}
+ ]
+
+ self.assertEqual(swap_nesting_order(orig_data),
+ ([],
+ {'a': {0: 1, 1: 3},
+ 'c': {1: 5},
+ 'b': {0: 2, 1: 4}})
+ )
+ orig_data = [
+ [ {'a':1, 'b':2},
+ {'a':3, 'b': 4, 'c':5} ],
+ [ {'a':6, 'b':7},
+ {'a':8, 'b': 9, 'd':10} ]
+ ]
+
+
+ self.assertEqual(swap_doubly_nested_order(orig_data),
+ ([],
+ { 'a': {0: {0: 1, 1: 3}, 1: {0: 6, 1: 8}},
+ 'c': {0: {1: 5}},
+ 'b': {0: {0: 2, 1: 4}, 1: {0: 7, 1: 9}},
+ 'd': {1: {1: 10}}
+ }))
+
+
+#
+#_________________________________________________________________________________________
+
+# Test_shorten_filenames_encoder
+
+#_________________________________________________________________________________________
+class Test_shorten_filenames_encoder (unittest.TestCase):
+
+ def test_shorten_filenames_encoder(self):
+ relative_path = os.path.abspath("../test1/something.py")
+ absolute_path = "/a/long/path/to/oss/ruffus/ruffus/test/something.py"
+
+ #
+ # test relative path
+ #
+ self.assertEqual(shorten_filenames_encoder(relative_path, 4),
+ '../test1/something.py')
+
+ # list of paths
+ self.assertEqual(shorten_filenames_encoder([[relative_path, relative_path]] * 2 + [6], 4),
+ '[[../test1/something.py, ../test1/something.py], [../test1/something.py, ../test1/something.py], 6]')
+
+ #
+ # test full path
+ #
+ self.assertEqual(shorten_filenames_encoder(absolute_path, 4),
+ '.../ruffus/ruffus/test/something.py')
+
+ # list of paths
+ self.assertEqual(shorten_filenames_encoder([[absolute_path, absolute_path]] * 2 + [6], 4),
+ '[[.../ruffus/ruffus/test/something.py, .../ruffus/ruffus/test/something.py], '
+ '[.../ruffus/ruffus/test/something.py, .../ruffus/ruffus/test/something.py], 6]')
+
+#
+#
+# debug parameter ignored if called as a module
+#
+if sys.argv.count("--debug"):
+ sys.argv.remove("--debug")
+#sys.argv.append("Test_regex_replace")
+unittest.main()
+
+
+
diff --git a/ruffus/test/test_softlink_uptodate.py b/ruffus/test/test_softlink_uptodate.py
new file mode 100755
index 0000000..dc43ef3
--- /dev/null
+++ b/ruffus/test/test_softlink_uptodate.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_softlink_uptodate.py
+
+ use :
+ --debug to test automatically
+ -j N / --jobs N to specify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+import sys, os
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+
+
+from ruffus import *
+import ruffus.dbdict as dbdict
+
+parser = cmdline.get_argparse( description='Test soft link up to date?', version = "%(prog)s v.2.23")
+options = parser.parse_args()
+
+# optional logger which can be passed to ruffus tasks
+logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+import multiprocessing.managers
+
+
+# list of executed tasks
+manager = multiprocessing.managers.SyncManager()
+manager.start()
+executed_tasks_proxy = manager.dict()
+mutex_proxy = manager.Lock()
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#
+# First task
+#
+ at originate(["a.1", "b.1"], executed_tasks_proxy, mutex_proxy)
+def start_task(output_file_name, executed_tasks_proxy, mutex_proxy):
+ with open(output_file_name, "w") as f:
+ pass
+ with mutex_proxy:
+ executed_tasks_proxy["start_task"] = 1
+
+#
+# Forwards file names, is always as up to date as its input files...
+#
+ at transform(start_task, suffix(".1"), ".1", executed_tasks_proxy, mutex_proxy)
+def same_file_name_task(input_file_name, output_file_name, executed_tasks_proxy, mutex_proxy):
+ with mutex_proxy:
+ executed_tasks_proxy["same_file_name_task"] = executed_tasks_proxy.get("same_file_name_task", 0) + 1
+
+#
+# Links file names, is always as up to date if links are not missing
+#
+ at transform(start_task, suffix(".1"), ".linked.1", executed_tasks_proxy, mutex_proxy)
+def linked_file_name_task(input_file_name, output_file_name, executed_tasks_proxy, mutex_proxy):
+ os.symlink(input_file_name, output_file_name)
+ with mutex_proxy:
+ executed_tasks_proxy["linked_file_name_task"] = executed_tasks_proxy.get("linked_file_name_task", 0) + 1
+
+
+#
+# Final task linking everything
+#
+ at transform([linked_file_name_task, same_file_name_task], suffix(".1"), ".3", executed_tasks_proxy, mutex_proxy)
+def final_task (input_file_name, output_file_name, executed_tasks_proxy, mutex_proxy):
+ with open(output_file_name, "w") as f:
+ pass
+ with mutex_proxy:
+ executed_tasks_proxy["final_task"] = executed_tasks_proxy.get("final_task", 0) + 1
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Run pipeline
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+#
+# Run task 1 only
+#
+logger.debug("Run start_task only")
+options.target_tasks = ["start_task"]
+cmdline.run (options, logger = logger, log_exceptions = True)
+logger.debug("")
+
+
+#
+# Run task 3 only
+#
+logger.debug("Run final_task: linked_file_name_task should run as well")
+options.target_tasks = []
+cmdline.run (options, logger = logger, log_exceptions = True)
+logger.debug("")
+
+
+#
+# Run task 3 again:
+#
+# All jobs should be up to date
+#
+logger.debug("Run final_task again: All jobs should be up to date")
+cmdline.run (options, logger = logger, log_exceptions = True)
+logger.debug("")
+
+
+#
+# cleanup
+#
+for f in ["a.1", "b.1", "a.linked.1", "b.linked.1", "a.3", "b.3", "a.linked.3", "b.linked.3"]:
+ if os.path.lexists(f):
+ os.unlink(f)
+
+
+
+
+#
+# Make sure right number of jobs / tasks ran
+#
+for task_name, jobs_count in ({'start_task': 1, 'final_task': 4, 'linked_file_name_task': 2}).items():
+ if task_name not in executed_tasks_proxy:
+ raise Exception("Error: %s did not run!!" % task_name)
+ if executed_tasks_proxy[task_name] != jobs_count:
+ raise Exception("Error: %s did not have %d jobs!!" % (task_name, jobs_count))
+if "same_file_name_task" in executed_tasks_proxy:
+ raise Exception("Error: %s should not have run!!" % "same_file_name_task")
+
+print("Succeeded")
diff --git a/ruffus/test/test_split_and_combine.py b/ruffus/test/test_split_and_combine.py
new file mode 100755
index 0000000..7396ea6
--- /dev/null
+++ b/ruffus/test/test_split_and_combine.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ branching.py
+
+ test branching dependencies
+
+ use :
+ --debug to test automatically
+ --start_again the first time you run the file
+ --jobs_per_task N to simulate tasks with N numbers of files per task
+
+ -j N / --jobs N to speify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-D", "--debug", dest="debug",
+ action="store_true", default=False,
+ help="Make sure output is correct and clean up.")
+parser.add_option("-s", "--start_again", dest="start_again",
+ action="store_true", default=False,
+ help="Make a new 'original.fa' file to simulate having to restart "
+ "pipeline from scratch.")
+parser.add_option("--jobs_per_task", dest="jobs_per_task",
+ default=50,
+ metavar="N",
+ type="int",
+ help="Simulates tasks with N numbers of files per task.")
+
+
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+tempdir = "temp_filesre_split_and_combine/"
+
+
+
+if options.verbose:
+ verbose_output = sys.stderr
+else:
+ verbose_output =open("/dev/null", "w")
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# split_fasta_file
+#
+ at posttask(lambda: verbose_output.write("Split into %d files\n" % options.jobs_per_task))
+ at split(tempdir + "original.fa", [tempdir + "files.split.success", tempdir + "files.split.*.fa"])
+def split_fasta_file (input_file, outputs):
+
+ #
+ # remove previous fasta files
+ #
+ success_flag = outputs[0]
+ output_file_names = outputs[1:]
+ for f in output_file_names:
+ os.unlink(f)
+
+ #
+ # create as many files as we are simulating in jobs_per_task
+ #
+ for i in range(options.jobs_per_task):
+ open(tempdir + "files.split.%03d.fa" % i, "w")
+
+ open(success_flag, "w")
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# align_sequences
+#
+ at posttask(lambda: verbose_output.write("Sequences aligned\n"))
+ at transform(split_fasta_file, suffix(".fa"), ".aln") # fa -> aln
+def align_sequences (input_file, output_filename):
+ open(output_filename, "w").write("%s\n" % output_filename)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# percentage_identity
+#
+ at posttask(lambda: verbose_output.write("%Identity calculated\n"))
+ at transform(align_sequences, # find all results from align_sequences
+ suffix(".aln"), # replace suffix with:
+ [r".pcid", # .pcid suffix for the result
+ r".pcid_success"]) # .pcid_success to indicate job completed
+def percentage_identity (input_file, output_files):
+ (output_filename, success_flag_filename) = output_files
+ open(output_filename, "w").write("%s\n" % output_filename)
+ open(success_flag_filename, "w")
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# combine_results
+#
+ at posttask(lambda: verbose_output.write("Results recombined\n"))
+ at merge(percentage_identity, [tempdir + "all.combine_results",
+ tempdir + "all.combine_results_success"])
+def combine_results (input_files, output_files):
+ """
+ Combine all
+ """
+ (output_filename, success_flag_filename) = output_files
+ out = open(output_filename, "w")
+ for inp, flag in input_files:
+ out.write(open(inp).read())
+ open(success_flag_filename, "w")
+
+
+
+def start_pipeline_afresh ():
+ """
+ Recreate directory and starting file
+ """
+ print("Start again", file=verbose_output)
+ import os
+ os.system("rm -rf %s" % tempdir)
+ os.makedirs(tempdir)
+ open(tempdir + "original.fa", "w").close()
+
+if __name__ == '__main__':
+ if options.start_again:
+ start_pipeline_afresh()
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ no_key_legend = options.no_key_legend_in_graph)
+ elif options.debug:
+ start_pipeline_afresh()
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ verbose = options.verbose)
+ os.system("rm -rf %s" % tempdir)
+ print("OK")
+ else:
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ verbose = options.verbose)
+
diff --git a/ruffus/test/test_split_regex_and_collate.py b/ruffus/test/test_split_regex_and_collate.py
new file mode 100755
index 0000000..737371e
--- /dev/null
+++ b/ruffus/test/test_split_regex_and_collate.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ branching.py
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+import re,time
+import operator
+from collections import defaultdict
+import random
+import shutil
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+from ruffus.combinatorics import *
+from ruffus.ruffus_exceptions import RethrownJobError
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS)
+
+import unittest
+
+JOBS_PER_TASK = 5
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+tempdir = "temp_filesre_split_and_combine"
+
+#
+# Three starting files
+#
+original_files = [tempdir + "/original_%d.fa" % d for d in range(3)]
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ at mkdir(tempdir)
+ at originate(original_files)
+def generate_initial_files(out_name):
+ with open(out_name, 'w') as outfile:
+ pass
+
+
+#
+# split_fasta_file
+#
+
+ at posttask(lambda: sys.stderr.write("\tSplit into %d files each\n" % JOBS_PER_TASK))
+ at subdivide(generate_initial_files,
+ regex(r".*\/original_(\d+).fa"), # match original files
+ [tempdir + r"/files.split.\1.success", # flag file for each original file
+ tempdir + r"/files.split.\1.*.fa"], # glob pattern
+ r"\1") # index of original file
+def split_fasta_file (input_file, outputs, original_index):
+
+ #
+ # remove previous fasta files
+ #
+ success_flag = outputs[0]
+ output_file_names = outputs[1:]
+ for f in output_file_names:
+ os.unlink(f)
+
+ #
+ # create as many files as we are simulating in JOBS_PER_TASK
+ #
+ for i in range(JOBS_PER_TASK):
+ with open(tempdir + "/files.split.%s.%03d.fa" % (original_index, i), "w") as oo:
+ pass
+
+ with open(success_flag, "w") as oo:
+ pass
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# align_sequences
+#
+ at posttask(lambda: sys.stderr.write("\tSequences aligned\n"))
+ at transform(split_fasta_file, suffix(".fa"), ".aln") # fa -> aln
+def align_sequences (input_file, output_filename):
+ with open(output_filename, "w") as oo:
+ oo.write("%s\n" % output_filename)
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# percentage_identity
+#
+ at posttask(lambda: sys.stderr.write("\t%Identity calculated\n"))
+ at transform(align_sequences, # find all results from align_sequences
+ suffix(".aln"), # replace suffix with:
+ [r".pcid", # .pcid suffix for the result
+ r".pcid_success"]) # .pcid_success to indicate job completed
+def percentage_identity (input_file, output_files):
+ (output_filename, success_flag_filename) = output_files
+ with open(output_filename, "w") as oo:
+ oo.write("%s\n" % output_filename)
+ with open(success_flag_filename, "w") as oo:
+ pass
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+#
+# combine_results
+#
+ at posttask(lambda: sys.stderr.write("\tResults recombined\n"))
+ at collate(percentage_identity, regex(r".*files.split\.(\d+)\.\d+.pcid"),
+ [tempdir + r"/\1.all.combine_results",
+ tempdir + r"/\1.all.combine_results_success"])
+def combine_results (input_files, output_files):
+ """
+ Combine all
+ """
+ (output_filename, success_flag_filename) = output_files
+ with open(output_filename, "w") as out:
+ for inp, flag in input_files:
+ with open(inp) as ii:
+ out.write(ii.read())
+ with open(success_flag_filename, "w") as oo:
+ pass
+
+
+
+
+class Test_split_regex_and_collate(unittest.TestCase):
+ def setUp(self):
+ import os
+ try:
+ shutil.rmtree(tempdir)
+ except:
+ pass
+ os.makedirs(tempdir)
+ for f in original_files:
+ with open(f, "w") as p: pass
+
+ def cleanup_tmpdir(self):
+ os.system('rm -f %s %s' % (os.path.join(tempdir, '*'), RUFFUS_HISTORY_FILE))
+
+ #___________________________________________________________________________
+ #
+ # test product() pipeline_printout and pipeline_run
+ #___________________________________________________________________________
+ def test_collate(self):
+ self.cleanup_tmpdir()
+
+ s = StringIO()
+ pipeline_printout(s, [combine_results], verbose=5, wrap_width = 10000)
+ self.assertTrue('Job needs update: Missing files\n' in s.getvalue())
+ #print s.getvalue()
+
+ pipeline_run([combine_results], verbose=0)
+
+ #___________________________________________________________________________
+ #
+ # cleanup
+ #___________________________________________________________________________
+ def tearDown(self):
+ shutil.rmtree(tempdir)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+# pipeline_printout(sys.stdout, [combine_results], verbose = 5)
+ unittest.main()
+
diff --git a/ruffus/test/test_task_file_dependencies.py b/ruffus/test/test_task_file_dependencies.py
new file mode 100755
index 0000000..5874399
--- /dev/null
+++ b/ruffus/test/test_task_file_dependencies.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+from __future__ import print_function
+################################################################################
+#
+# test_task_file_dependencies.py
+#
+#
+# Copyright (c) 2009 Leo Goodstadt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#################################################################################
+"""
+ test_task_file_dependencies.py
+"""
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+import unittest, os,sys
+if __name__ != '__main__':
+ raise Exception ("This is not a callable module [%s]" % __main__)
+
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus.ruffus_utility import open_job_history
+
+history_file = ':memory:'
+history_file = None
+
+class Test_needs_update_check_modify_time(unittest.TestCase):
+
+ def setUp (self):
+ """
+ Create a list of files separated in time so we can do dependency checking
+ """
+ import tempfile,time
+ self.files = list()
+ for i in range(6):
+ #test_file =tempfile.NamedTemporaryFile(delete=False, prefix='testing_tmp')
+ #self.files.append (test_file.name)
+ #test_file.close()
+
+ fh, temp_file_name = tempfile.mkstemp(suffix='.dot')
+ self.files.append (temp_file_name)
+ os.fdopen(fh, "w").close()
+ time.sleep(0.1)
+
+ def tearDown (self):
+ """
+ delete files
+ """
+ for f in self.files:
+ os.unlink(f)
+
+
+ def test_up_to_date (self):
+ #
+ # lists of files
+ #
+ self.assertTrue(not task.needs_update_check_modify_time (self.files[0:2],
+ self.files[2:6],
+ job_history = open_job_history(history_file))[0])
+ self.assertTrue( task.needs_update_check_modify_time (self.files[2:6],
+ self.files[0:2],
+ job_history = open_job_history(history_file))[0])
+ #
+ # singletons and lists of files
+ #
+ self.assertTrue(not task.needs_update_check_modify_time (self.files[0],
+ self.files[2:6],
+ job_history = open_job_history(history_file))[0])
+ self.assertTrue( task.needs_update_check_modify_time (self.files[2:6],
+ self.files[0],
+ job_history = open_job_history(history_file))[0])
+ #
+ # singletons
+ #
+ self.assertTrue( task.needs_update_check_modify_time (self.files[3],
+ self.files[0],
+ job_history = open_job_history(history_file))[0])
+ # self -self = no update
+ self.assertTrue(not task.needs_update_check_modify_time (self.files[0],
+ self.files[0],
+ job_history = open_job_history(history_file))[0])
+
+ #
+ # missing files means need update
+ #
+ self.assertTrue( task.needs_update_check_modify_time (self.files[0:2] +
+ ["uncreated"],
+ self.files[3:6],
+ job_history = open_job_history(history_file))[0])
+ self.assertTrue( task.needs_update_check_modify_time (self.files[0:2],
+ self.files[3:6] +
+ ["uncreated"],
+ job_history = open_job_history(history_file))[0])
+ #
+ # None means need update
+ #
+ self.assertTrue( task.needs_update_check_modify_time (self.files[0:2],
+ None,
+ job_history = open_job_history(history_file))[0])
+ #
+ # None input means need update only if do not exist
+ #
+ self.assertTrue( not task.needs_update_check_modify_time (None,
+ self.files[3:6],
+ job_history = open_job_history(history_file))[0])
+
+
+ #
+ # None + missing file means need update
+ #
+ self.assertTrue( task.needs_update_check_modify_time (self.files[0:2] +
+ ["uncreated"],
+ None,
+ job_history = open_job_history(history_file))[0])
+ self.assertTrue( task.needs_update_check_modify_time (None,
+ self.files[3:6] +
+ ["uncreated"],
+ job_history = open_job_history(history_file))[0])
+
+
+
+
+
+unittest.main()
+
diff --git a/ruffus/test/test_task_misc.py b/ruffus/test/test_task_misc.py
new file mode 100755
index 0000000..c607630
--- /dev/null
+++ b/ruffus/test/test_task_misc.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+ test_task_misc.py
+"""
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+import unittest, os,sys
+if __name__ != '__main__':
+ raise Exception ("This is not a callable module [%s]" % __main__)
+
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+
+
+
+class Test_needs_update_check_directory_missing(unittest.TestCase):
+
+ def setUp (self):
+ """
+ Create temp directory and temp file
+ """
+ import tempfile
+
+ #test_file =tempfile.NamedTemporaryFile(delete=False)
+ #self.tempfile = test_file.name
+ #test_file.close()
+ fh, self.tempfile = tempfile.mkstemp(suffix='.dot')
+ os.fdopen(fh, "w").close()
+ self.directory = tempfile.mkdtemp(prefix='testing_tmp')
+
+ def tearDown (self):
+ """
+ delete files
+ """
+ os.unlink(self.tempfile)
+ os.removedirs(self.directory)
+
+ def test_up_to_date (self):
+ #
+ # lists of files
+ #
+
+ self.assertTrue(not task.needs_update_check_directory_missing ([self.directory])[0])
+ self.assertTrue( task.needs_update_check_directory_missing (["missing directory"])[0])
+ self.assertRaises(task.error_not_a_directory,
+ task.needs_update_check_directory_missing, [self.tempfile])
+
+
+
+
+unittest.main()
+
diff --git a/ruffus/test/test_transform_add_inputs.py b/ruffus/test/test_transform_add_inputs.py
new file mode 100755
index 0000000..0fd84fd
--- /dev/null
+++ b/ruffus/test/test_transform_add_inputs.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_transform_with_no_re_matches.py
+
+ test messages with no regular expression matches
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+print("\tRuffus Version = ", ruffus.__version__)
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+def touch (filename):
+ with open(filename, "w"):
+ pass
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+tempdir = "tempdir/"
+ at follows(mkdir(tempdir))
+ at files([[None, tempdir+ "a.1"], [None, tempdir+ "b.1"]])
+def task1(i, o):
+ touch(o)
+
+
+ at follows(mkdir(tempdir))
+ at files([[None, tempdir+ "c.1"], [None, tempdir+ "d.1"]])
+def task2(i, o):
+ touch(o)
+
+
+ at transform(task1, regex(r"(.*)"), add_inputs(task2, "test_transform_inputs.*"), r"\1.output")
+def task3_add_inputs(i, o):
+ names = ",".join(sorted(i))
+ with open(o, "w") as oo:
+ oo.write(names)
+
+ at merge((task3_add_inputs), tempdir + "final.output")
+def task4(i, o):
+ with open(o, "w") as o_file:
+ for f in sorted(i):
+ with open(f) as ii:
+ o_file.write(f +":" + ii.read() + ";")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import unittest
+
+class Test_task(unittest.TestCase):
+
+ def tearDown (self):
+ """
+ """
+ import glob
+ for f in glob.glob(tempdir + "*"):
+ os.unlink(f)
+ os.rmdir(tempdir)
+
+
+ def test_task (self):
+ pipeline_run([task4], options.forced_tasks, multiprocess = options.jobs,
+ verbose = options.verbose)
+
+ correct_output = "tempdir/a.1.output:tempdir/a.1,tempdir/c.1,tempdir/d.1,test_transform_inputs.py;tempdir/b.1.output:tempdir/b.1,tempdir/c.1,tempdir/d.1,test_transform_inputs.py;"
+ with open(tempdir + "final.output") as real_output:
+ real_output_str = real_output.read()
+ self.assertTrue(correct_output == real_output_str)
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_transform_inputs.py b/ruffus/test/test_transform_inputs.py
new file mode 100644
index 0000000..ac2cbaa
--- /dev/null
+++ b/ruffus/test/test_transform_inputs.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_transform_with_no_re_matches.py
+
+ test messages with no regular expression matches
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+print("\tRuffus Version = ", ruffus.__version__)
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+def touch (filename):
+ with open(filename, "w"):
+ pass
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+tempdir = "tempdir/"
+ at follows(mkdir(tempdir))
+ at files([[None, tempdir+ "a.1"], [None, tempdir+ "b.1"]])
+def task1(i, o):
+ touch(o)
+
+
+ at follows(mkdir(tempdir))
+ at files([[None, tempdir+ "c.1"], [None, tempdir+ "d.1"]])
+def task2(i, o):
+ touch(o)
+
+
+ at transform(task1, regex(r"(.*)"), inputs(((r"\1"), task2, "test_transform_inputs.*")), r"\1.output")
+def task3(i, o):
+ names = ",".join(sorted(i))
+ for f in o:
+ with open(o, "w") as ff:
+ ff.write(names)
+
+ at merge((task3), tempdir + "final.output")
+def task4(i, o):
+ with open(o, "w") as o_file:
+ for f in sorted(i):
+ with open(f) as ff:
+ o_file.write(f +":" + ff.read() + ";")
+
+import unittest
+
+class Test_task(unittest.TestCase):
+
+ def tearDown (self):
+ """
+ """
+ import glob
+ for f in glob.glob(tempdir + "*"):
+ os.unlink(f)
+ os.rmdir(tempdir)
+
+
+ def test_task (self):
+ pipeline_run([task4], options.forced_tasks, multiprocess = options.jobs,
+ verbose = options.verbose)
+
+ correct_output = "tempdir/a.1.output:tempdir/a.1,tempdir/c.1,tempdir/d.1,test_transform_inputs.py;tempdir/b.1.output:tempdir/b.1,tempdir/c.1,tempdir/d.1,test_transform_inputs.py;"
+ with open(tempdir + "final.output") as ff:
+ real_output = ff.read()
+ self.assertTrue(correct_output == real_output)
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ with open(options.dependency_file, "w") as graph_printout_file:
+ pipeline_printout_graph (graph_printout_file,
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_transform_with_no_re_matches.py b/ruffus/test/test_transform_with_no_re_matches.py
new file mode 100755
index 0000000..c0ac59b
--- /dev/null
+++ b/ruffus/test/test_transform_with_no_re_matches.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_transform_with_no_re_matches.py
+
+ test messages with no regular expression matches
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+print("\tRuffus Version = ", ruffus.__version__)
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+ at files(None, "a")
+def task_1 (i, o):
+ for f in o:
+ with open(f, 'w') as oo:
+ pass
+
+ at transform(task_1, regex("b"), "task_2.output")
+def task_2 (i, o):
+ for f in o:
+ with open(f, 'w') as oo:
+ pass
+
+import unittest
+
+class Test_task_mkdir(unittest.TestCase):
+
+ def setUp (self):
+ """
+ """
+ pass
+
+ def tearDown (self):
+ """
+ """
+ for d in ['a']:
+ fullpath = os.path.join(exe_path, d)
+ os.unlink(fullpath)
+
+
+ def test_no_re_match (self):
+ class t_save_to_str_logger:
+ """
+ Everything to stderr
+ """
+ def __init__ (self):
+ self.info_str = ""
+ self.warning_str = ""
+ self.debug_str = ""
+ def info (self, message):
+ self.info_str += message
+ def warning (self, message):
+ self.warning_str += message
+ def debug (self, message):
+ self.debug_str += message
+
+ save_to_str_logger = t_save_to_str_logger()
+ pipeline_run([task_2], options.forced_tasks, multiprocess = options.jobs,
+ logger = save_to_str_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = 1)
+
+ self.assertTrue("no files names matched" in save_to_str_logger.warning_str)
+ print("\n Warning printed out correctly", file=sys.stderr)
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ with open(options.dependency_file, "w") as graph_file:
+ pipeline_printout_graph ( graph_file,
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_tutorial7.py b/ruffus/test/test_tutorial7.py
new file mode 100755
index 0000000..e42362d
--- /dev/null
+++ b/ruffus/test/test_tutorial7.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+from __future__ import print_function
+# make sure using local ruffus
+import sys, os
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+
+
+
+NUMBER_OF_RANDOMS = 10000
+CHUNK_SIZE = 1000
+working_dir = "temp_tutorial7/"
+
+
+
+import time, sys, os
+from ruffus import *
+
+import random
+import glob
+
+
+#---------------------------------------------------------------
+#
+# Create random numbers
+#
+ at follows(mkdir(working_dir))
+ at files(None, working_dir + "random_numbers.list")
+def create_random_numbers(input_file_name, output_file_name):
+ f = open(output_file_name, "w")
+ for i in range(NUMBER_OF_RANDOMS):
+ f.write("%g\n" % (random.random() * 100.0))
+
+#---------------------------------------------------------------
+#
+# Split initial file
+#
+ at follows(create_random_numbers)
+ at split(working_dir + "random_numbers.list", working_dir + "*.chunks")
+def step_4_split_numbers_into_chunks (input_file_name, output_files):
+ """
+ Splits random numbers file into XXX files of CHUNK_SIZE each
+ """
+ #
+ # clean up files from previous runs
+ #
+ for f in glob.glob("*.chunks"):
+ os.unlink(f)
+ #
+ # create new file every CHUNK_SIZE lines and
+ # copy each line into current file
+ #
+ output_file = None
+ cnt_files = 0
+ for i, line in enumerate(open(input_file_name)):
+ if i % CHUNK_SIZE == 0:
+ cnt_files += 1
+ output_file = open(working_dir + "%d.chunks" % cnt_files, "w")
+ output_file.write(line)
+
+#---------------------------------------------------------------
+#
+# Calculate sum and sum of squares for each chunk file
+#
+ at transform(step_4_split_numbers_into_chunks, suffix(".chunks"), ".sums")
+def step_5_calculate_sum_of_squares (input_file_name, output_file_name):
+ output = open(output_file_name, "w")
+ sum_squared, sum = [0.0, 0.0]
+ cnt_values = 0
+ for line in open(input_file_name):
+ cnt_values += 1
+ val = float(line.rstrip())
+ sum_squared += val * val
+ sum += val
+ output.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))
+
+
+def print_hooray_again():
+ print("hooray again")
+
+def print_whoppee_again():
+ print("whoppee again")
+
+
+#---------------------------------------------------------------
+#
+# Calculate sum and sum of squares for each chunk
+#
+ at posttask(lambda: sys.stdout.write("hooray\n"))
+ at posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(working_dir, "done")))
+ at merge(step_5_calculate_sum_of_squares, os.path.join(working_dir, "variance.result"))
+def step_6_calculate_variance (input_file_names, output_file_name):
+ """
+ Calculate variance naively
+ """
+ output = open(output_file_name, "w")
+ #
+ # initialise variables
+ #
+ all_sum_squared = 0.0
+ all_sum = 0.0
+ all_cnt_values = 0.0
+ #
+ # added up all the sum_squared, and sum and cnt_values from all the chunks
+ #
+ for input_file_name in input_file_names:
+ sum_squared, sum, cnt_values = list(map(float, open(input_file_name).readlines()))
+ all_sum_squared += sum_squared
+ all_sum += sum
+ all_cnt_values += cnt_values
+ all_mean = all_sum / all_cnt_values
+ variance = (all_sum_squared - all_sum * all_mean)/(all_cnt_values)
+ #
+ # print output
+ #
+ print(variance, file=output)
+
+#---------------------------------------------------------------
+#
+# Run
+#
+pipeline_run([step_6_calculate_variance], verbose = 1)
+import shutil
+shutil.rmtree(working_dir)
diff --git a/ruffus/test/test_unicode_filenames.py b/ruffus/test/test_unicode_filenames.py
new file mode 100755
index 0000000..29778c5
--- /dev/null
+++ b/ruffus/test/test_unicode_filenames.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_follows_mkdir.py
+
+ test make directory dependencies
+
+ use :
+ -j N / --jobs N to speify multitasking
+ -v to see the jobs in action
+ -n / --just_print to see what jobs would run
+
+"""
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# options
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+from optparse import OptionParser
+import sys, os
+import os.path
+try:
+ import StringIO as io
+except:
+ import io as io
+import re,time
+
+# add self to search path for testing
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
+if __name__ == '__main__':
+ module_name = os.path.split(sys.argv[0])[1]
+ module_name = os.path.splitext(module_name)[0];
+else:
+ module_name = __name__
+
+
+
+import ruffus
+print(ruffus.__version__)
+parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
+parser.add_option("-t", "--target_tasks", dest="target_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Target task(s) of pipeline.")
+parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
+ action="append",
+ default = list(),
+ metavar="JOBNAME",
+ type="string",
+ help="Pipeline task(s) which will be included even if they are up to date.")
+parser.add_option("-j", "--jobs", dest="jobs",
+ default=1,
+ metavar="jobs",
+ type="int",
+ help="Specifies the number of jobs (commands) to run simultaneously.")
+parser.add_option("-v", "--verbose", dest = "verbose",
+ action="count", default=0,
+ help="Print more verbose messages for each additional verbose level.")
+parser.add_option("-d", "--dependency", dest="dependency_file",
+ #default="simple.svg",
+ metavar="FILE",
+ type="string",
+ help="Print a dependency graph of the pipeline that would be executed "
+ "to FILE, but do not execute it.")
+parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
+ metavar="FORMAT",
+ type="string",
+ default = 'svg',
+ help="format of dependency graph file. Can be 'ps' (PostScript), "+
+ "'svg' 'svgz' (Structured Vector Graphics), " +
+ "'png' 'gif' (bitmap graphics) etc ")
+parser.add_option("-n", "--just_print", dest="just_print",
+ action="store_true", default=False,
+ help="Print a description of the jobs that would be executed, "
+ "but do not execute them.")
+parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
+ action="store_true", default=False,
+ help="Rebuild a minimum of tasks necessary for the target. "
+ "Ignore upstream out of date tasks if intervening tasks are fine.")
+parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
+ action="store_true", default=False,
+ help="Do not print out legend and key for dependency graph.")
+parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
+ action="store_true", default=False,
+ help="Draw horizontal dependency graph.")
+
+parameters = [
+ ]
+
+
+
+
+
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# imports
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+import re
+import operator
+import sys,os
+from collections import defaultdict
+import random
+
+sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+
+# use simplejson in place of json for python < 2.6
+try:
+ import json
+except ImportError:
+ import simplejson
+ json = simplejson
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Main logic
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+
+
+
+
+# get help string
+f =io.StringIO()
+parser.print_help(f)
+helpstr = f.getvalue()
+(options, remaining_args) = parser.parse_args()
+
+def touch (filename):
+ with open(filename, "w"):
+ pass
+
+if sys.hexversion >= 0x03000000:
+ unicode = str
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+
+# Tasks
+
+
+#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
+directories = [os.path.abspath(unicode("a")), unicode("b")]
+ at follows(mkdir(directories), mkdir(unicode("c")), mkdir(unicode("d"), unicode("e")), mkdir(unicode("e")))
+ at posttask(touch_file(unicode("f")))
+def task_which_makes_directories ():
+ pass
+
+ at files(None, ["g", "h"])
+def task_which_makes_files (i, o):
+ for f in o:
+ touch(f)
+
+import unittest
+
+class Test_task_mkdir(unittest.TestCase):
+
+ def setUp (self):
+ """
+ """
+ pass
+
+ def tearDown (self):
+ """
+ delete directories
+ """
+ for d in 'abcde':
+ fullpath = os.path.join(exe_path, d)
+ os.rmdir(fullpath)
+ for d in 'fgh':
+ fullpath = os.path.join(exe_path, d)
+ os.unlink(fullpath)
+
+
+ def test_mkdir (self):
+ pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
+ logger = stderr_logger if options.verbose else black_hole_logger,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ verbose = options.verbose)
+
+ for d in 'abcdefgh':
+ fullpath = os.path.join(exe_path, d)
+ self.assertTrue(os.path.exists(fullpath))
+
+
+if __name__ == '__main__':
+ if options.just_print:
+ pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
+ verbose = options.verbose,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
+
+ elif options.dependency_file:
+ pipeline_printout_graph ( open(options.dependency_file, "w"),
+ options.dependency_graph_format,
+ options.target_tasks,
+ options.forced_tasks,
+ draw_vertically = not options.draw_horizontally,
+ gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
+ no_key_legend = options.no_key_legend_in_graph)
+ else:
+ sys.argv= sys.argv[0:1]
+ unittest.main()
+
diff --git a/ruffus/test/test_verbosity.py b/ruffus/test/test_verbosity.py
new file mode 100755
index 0000000..b52e21a
--- /dev/null
+++ b/ruffus/test/test_verbosity.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+
+ test_verbosity.py
+
+"""
+
+
+import unittest
+import os
+import sys
+import shutil
+try:
+ from StringIO import StringIO
+except:
+ from io import StringIO
+
+import time
+import re
+
+exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
+sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..")))
+from ruffus import *
+from ruffus import (pipeline_run, pipeline_printout, suffix, transform, split,
+ merge, dbdict, follows)
+from ruffus.ruffus_exceptions import RethrownJobError
+from ruffus.ruffus_utility import (RUFFUS_HISTORY_FILE,
+ CHECKSUM_FILE_TIMESTAMPS)
+
+
+
+parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')
+options = parser.parse_args()
+logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
+
+
+#---------------------------------------------------------------
+# create initial files
+#
+ at originate([ ['/data/scratch/lg/what/one/two/three/job1.a.start', 'job1.b.start'],
+ ['/data/scratch/lg/what/one/two/three/job2.a.start', 'job2.b.start'],
+ ['/data/scratch/lg/what/one/two/three/job3.a.start', 'job3.b.start'] ])
+def create_initial_file_pairs(output_files):
+ # create both files as necessary
+ for output_file in output_files:
+ with open(output_file, "w") as oo: pass
+
+#---------------------------------------------------------------
+# first task
+ at transform(create_initial_file_pairs, suffix(".start"), ".output.1")
+def first_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+
+#---------------------------------------------------------------
+# second task
+ at transform(first_task, suffix(".output.1"), ".output.2")
+def second_task(input_files, output_file):
+ with open(output_file, "w"): pass
+
+class Test_verbosity(unittest.TestCase):
+ #___________________________________________________________________________
+ #
+ # test_printout_abbreviated_path1
+ #___________________________________________________________________________
+ def test_printout_abbreviated_path1(self):
+ """Input file exists, output doesn't exist"""
+ s = StringIO()
+ pipeline_printout(s, [second_task], verbose = 5, verbose_abbreviated_path = 1)
+ self.assertTrue(re.search('Job needs update: Missing files\n\s+'
+ '\[\.\.\./job2\.a\.start, job2\.b\.start, \.\.\./job2.a.output.1\]', s.getvalue()))
+
+ #___________________________________________________________________________
+ #
+ # test_printout_abbreviated_path2
+ #___________________________________________________________________________
+ def test_printout_abbreviated_path2(self):
+ """Input file exists, output doesn't exist"""
+ s = StringIO()
+ pipeline_printout(s, [second_task], verbose = 5, verbose_abbreviated_path = 2)
+ self.assertTrue('[.../three/job1.a.start, job1.b.start, .../three/job1.a.output.1]' in s.getvalue())
+
+
+ #___________________________________________________________________________
+ #
+ # test_printout_abbreviated_path2
+ #___________________________________________________________________________
+ def test_printout_abbreviated_path3(self):
+ """Input file exists, output doesn't exist"""
+ s = StringIO()
+ pipeline_printout(s, [second_task], verbose = 5, verbose_abbreviated_path = 3)
+ self.assertTrue('[.../two/three/job1.a.start, job1.b.start, .../two/three/job1.a.output.1]' in s.getvalue())
+
+ #___________________________________________________________________________
+ #
+ # test_printout_abbreviated_path9
+ #___________________________________________________________________________
+ def test_printout_abbreviated_path9(self):
+ """Input file exists, output doesn't exist"""
+ s = StringIO()
+ pipeline_printout(s, [second_task], verbose = 5, verbose_abbreviated_path = 9)
+ ret = s.getvalue()
+ self.assertTrue('[/data/scratch/lg/what/one/two/three/job2.a.start, job2.b.start,' in ret)
+
+
+ #___________________________________________________________________________
+ #
+ # test_printout_abbreviated_path0
+ #___________________________________________________________________________
+ def test_printout_abbreviated_path0(self):
+ """Input file exists, output doesn't exist"""
+ s = StringIO()
+ pipeline_printout(s, [second_task], verbose = 5, verbose_abbreviated_path = 0)
+ ret = s.getvalue()
+ self.assertTrue('[[/data/scratch/lg/what/one/two/three/job2.a.start,' in ret)
+ self.assertTrue('/ruffus/test/job2.b.start]' in ret)
+
+
+ #___________________________________________________________________________
+ #
+ # test_printout_abbreviated_path_minus_60
+ #___________________________________________________________________________
+ def test_printout_abbreviated_path_minus_60(self):
+ """Input file exists, output doesn't exist"""
+ s = StringIO()
+ pipeline_printout(s, [second_task], verbose = 5, verbose_abbreviated_path = -60)
+ ret = s.getvalue()
+ self.assertTrue('[<???> ratch/lg/what/one/two/three/job2.a.start, job2.b.start]' in ret)
+
+#
+# Necessary to protect the "entry point" of the program under windows.
+# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
+#
+if __name__ == '__main__':
+ unittest.main()
+
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..46e10c9
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+from distutils.core import setup
+#from setuptools import find_packages
+
+import sys, os
+if not sys.version_info[0:2] >= (2,6):
+ sys.stderr.write("Requires Python later than 2.6\n")
+ sys.exit(1)
+
+# quickly import the latest version of ruffus
+sys.path.insert(0, os.path.abspath("."))
+import ruffus.ruffus_version
+sys.path.pop(0)
+
+
+module_dependencies = []
+#module_dependencies = ['multiprocessing>=2.6', 'simplejson']
+
+
+setup(
+ name='ruffus',
+ version=ruffus.ruffus_version.__version, #major.minor[.patch[.sub]]
+ description='Light-weight Python Computational Pipeline Management',
+ maintainer="Leo Goodstadt",
+ maintainer_email="ruffus_lib at llew.org.uk",
+ author='Leo Goodstadt',
+ author_email='ruffus at llew.org.uk',
+ long_description=\
+"""
+***************************************
+Overview
+***************************************
+
+
+ The Ruffus module is a lightweight way to add support
+ for running computational pipelines.
+
+ Computational pipelines are often conceptually quite simple, especially
+ if we breakdown the process into simple stages, or separate **tasks**.
+
+ Each stage or **task** in a computational pipeline is represented by a python function
+ Each python function can be called in parallel to run multiple **jobs**.
+
+ Ruffus was originally designed for use in bioinformatics to analyse multiple genome
+ data sets.
+
+***************************************
+Documentation
+***************************************
+
+ Ruffus documentation can be found `here <http://www.ruffus.org.uk>`__ ,
+ with `download notes <http://www.ruffus.org.uk/installation.html>`__ ,
+ a `tutorial <http://www.ruffus.org.uk/tutorials/new_tutorial/introduction.html>`__ and
+ an `in-depth manual <http://www.ruffus.org.uk/tutorials/new_tutorial/manual_contents.html>`__ .
+
+
+***************************************
+Background
+***************************************
+
+ The purpose of a pipeline is to determine automatically which parts of a multi-stage
+ process needs to be run and in what order in order to reach an objective ("targets")
+
+ Computational pipelines, especially for analysing large scientific datasets are
+ in widespread use.
+ However, even a conceptually simple series of steps can be difficult to set up and
+ maintain.
+
+***************************************
+Design
+***************************************
+ The ruffus module has the following design goals:
+
+ * Lightweight
+ * Scalable / Flexible / Powerful
+ * Standard Python
+ * Unintrusive
+ * As simple as possible
+
+***************************************
+Features
+***************************************
+
+ Automatic support for
+
+ * Managing dependencies
+ * Parallel jobs, including dispatching work to computational clusters
+ * Re-starting from arbitrary points, especially after errors (checkpointing)
+ * Display of the pipeline as a flowchart
+ * Managing complex pipeline topologies
+
+
+***************************************
+A Simple example
+***************************************
+
+ Use the **@follows(...)** python decorator before the function definitions::
+
+ from ruffus import *
+ import sys
+
+ def first_task():
+ print "First task"
+
+ @follows(first_task)
+ def second_task():
+ print "Second task"
+
+ @follows(second_task)
+ def final_task():
+ print "Final task"
+
+
+
+
+ the ``@follows`` decorator indicate that the ``first_task`` function precedes ``second_task`` in
+ the pipeline.
+
+ The canonical Ruffus decorator is ``@transform`` which **transforms** data flowing down a
+ computational pipeline from one stage to teh next.
+
+********
+Usage
+********
+
+ Each stage or **task** in a computational pipeline is represented by a python function
+ Each python function can be called in parallel to run multiple **jobs**.
+
+ 1. Import module::
+
+ import ruffus
+
+
+ 1. Annotate functions with python decorators
+
+ 2. Print dependency graph if you necessary
+
+ - For a graphical flowchart in ``jpg``, ``svg``, ``dot``, ``png``, ``ps``, ``gif`` formats::
+
+ pipeline_printout_graph ("flowchart.svg")
+
+ This requires ``dot`` to be installed
+
+ - For a text printout of all jobs ::
+
+ pipeline_printout(sys.stdout)
+
+
+ 3. Run the pipeline::
+
+ pipeline_run()
+
+
+""",
+ url='http://www.ruffus.org.uk',
+ download_url = "https://pypi.python.org/pypi/ruffus",
+
+ install_requires = module_dependencies, #['multiprocessing>=1.0', 'json' ], #, 'python>=2.5'],
+ setup_requires = module_dependencies, #['multiprocessing>=1.0', 'json'], #, 'python>=2.5'],
+
+
+ classifiers=[
+ 'Intended Audience :: End Users/Desktop',
+ 'Development Status :: 5 - Production/Stable',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: Science/Research',
+ 'Intended Audience :: Information Technology',
+ 'License :: OSI Approved :: MIT License',
+ 'Programming Language :: Python',
+ 'Topic :: Scientific/Engineering',
+ 'Topic :: Scientific/Engineering :: Bio-Informatics',
+ 'Topic :: System :: Distributed Computing',
+ 'Topic :: Software Development :: Build Tools',
+ 'Topic :: Software Development :: Build Tools',
+ 'Topic :: Software Development :: Libraries',
+ 'Environment :: Console',
+ ],
+ license = "MIT",
+ keywords = "make task pipeline parallel bioinformatics science",
+
+
+ #packages = find_packages('src'), # include all packages under src
+ #package_dir = {'':'src'}, #packages are under src
+ packages=['ruffus'],
+ package_dir={'ruffus': 'ruffus'},
+ include_package_data = True, # include everything in source control
+ #package_data = {
+ # # If any package contains *.txt files, include them:
+ # '': ['*.TXT'], \
+ #}
+
+
+ )
+
+#
+# http://pypi.python.org/pypi
+# http://docs.python.org/distutils/packageindex.html
+#
+#
+#
+# python setup.py register
+# python setup.py sdist --format=gztar upload
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-ruffus.git
More information about the debian-med-commit
mailing list